| From: Nhat Pham <nphamcs@gmail.com> |
| Subject: zsmalloc: implement writeback mechanism for zsmalloc |
| Date: Mon, 28 Nov 2022 11:16:15 -0800 |
| |
| This commit adds the writeback mechanism for zsmalloc, analogous to the |
| zbud allocator. Zsmalloc will attempt to determine the coldest zspage |
| (i.e least recently used) in the pool, and attempt to write back all the |
| stored compressed objects via the pool's evict handler. |
| |
| Link: https://lkml.kernel.org/r/20221128191616.1261026-7-nphamcs@gmail.com |
| Signed-off-by: Nhat Pham <nphamcs@gmail.com> |
| Acked-by: Johannes Weiner <hannes@cmpxchg.org> |
| Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> |
| Cc: Dan Streetman <ddstreet@ieee.org> |
| Cc: Minchan Kim <minchan@kernel.org> |
| Cc: Nitin Gupta <ngupta@vflare.org> |
| Cc: Seth Jennings <sjenning@redhat.com> |
| Cc: Vitaly Wool <vitaly.wool@konsulko.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/zsmalloc.c | 194 +++++++++++++++++++++++++++++++++++++++++++++--- |
| 1 file changed, 183 insertions(+), 11 deletions(-) |
| |
| --- a/mm/zsmalloc.c~zsmalloc-implement-writeback-mechanism-for-zsmalloc |
| +++ a/mm/zsmalloc.c |
| @@ -271,12 +271,13 @@ struct zspage { |
| #ifdef CONFIG_ZPOOL |
| /* links the zspage to the lru list in the pool */ |
| struct list_head lru; |
| + bool under_reclaim; |
| + /* list of unfreed handles whose objects have been reclaimed */ |
| + unsigned long *deferred_handles; |
| #endif |
| |
| struct zs_pool *pool; |
| -#ifdef CONFIG_COMPACTION |
| rwlock_t lock; |
| -#endif |
| }; |
| |
| struct mapping_area { |
| @@ -297,10 +298,11 @@ static bool ZsHugePage(struct zspage *zs |
| return zspage->huge; |
| } |
| |
| -#ifdef CONFIG_COMPACTION |
| static void migrate_lock_init(struct zspage *zspage); |
| static void migrate_read_lock(struct zspage *zspage); |
| static void migrate_read_unlock(struct zspage *zspage); |
| + |
| +#ifdef CONFIG_COMPACTION |
| static void migrate_write_lock(struct zspage *zspage); |
| static void migrate_write_lock_nested(struct zspage *zspage); |
| static void migrate_write_unlock(struct zspage *zspage); |
| @@ -308,9 +310,6 @@ static void kick_deferred_free(struct zs |
| static void init_deferred_free(struct zs_pool *pool); |
| static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); |
| #else |
| -static void migrate_lock_init(struct zspage *zspage) {} |
| -static void migrate_read_lock(struct zspage *zspage) {} |
| -static void migrate_read_unlock(struct zspage *zspage) {} |
| static void migrate_write_lock(struct zspage *zspage) {} |
| static void migrate_write_lock_nested(struct zspage *zspage) {} |
| static void migrate_write_unlock(struct zspage *zspage) {} |
| @@ -413,6 +412,27 @@ static void zs_zpool_free(void *pool, un |
| zs_free(pool, handle); |
| } |
| |
| +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries); |
| + |
| +static int zs_zpool_shrink(void *pool, unsigned int pages, |
| + unsigned int *reclaimed) |
| +{ |
| + unsigned int total = 0; |
| + int ret = -EINVAL; |
| + |
| + while (total < pages) { |
| + ret = zs_reclaim_page(pool, 8); |
| + if (ret < 0) |
| + break; |
| + total++; |
| + } |
| + |
| + if (reclaimed) |
| + *reclaimed = total; |
| + |
| + return ret; |
| +} |
| + |
| static void *zs_zpool_map(void *pool, unsigned long handle, |
| enum zpool_mapmode mm) |
| { |
| @@ -451,6 +471,7 @@ static struct zpool_driver zs_zpool_driv |
| .malloc_support_movable = true, |
| .malloc = zs_zpool_malloc, |
| .free = zs_zpool_free, |
| + .shrink = zs_zpool_shrink, |
| .map = zs_zpool_map, |
| .unmap = zs_zpool_unmap, |
| .total_size = zs_zpool_total_size, |
| @@ -924,6 +945,25 @@ unlock: |
| return 0; |
| } |
| |
| +#ifdef CONFIG_ZPOOL |
| +/* |
| + * Free all the deferred handles whose objects are freed in zs_free. |
| + */ |
| +static void free_handles(struct zs_pool *pool, struct zspage *zspage) |
| +{ |
| + unsigned long handle = (unsigned long)zspage->deferred_handles; |
| + |
| + while (handle) { |
| + unsigned long nxt_handle = handle_to_obj(handle); |
| + |
| + cache_free_handle(pool, handle); |
| + handle = nxt_handle; |
| + } |
| +} |
| +#else |
| +static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {} |
| +#endif |
| + |
| static void __free_zspage(struct zs_pool *pool, struct size_class *class, |
| struct zspage *zspage) |
| { |
| @@ -938,6 +978,9 @@ static void __free_zspage(struct zs_pool |
| VM_BUG_ON(get_zspage_inuse(zspage)); |
| VM_BUG_ON(fg != ZS_EMPTY); |
| |
| + /* Free all deferred handles from zs_free */ |
| + free_handles(pool, zspage); |
| + |
| next = page = get_first_page(zspage); |
| do { |
| VM_BUG_ON_PAGE(!PageLocked(page), page); |
| @@ -1023,6 +1066,8 @@ static void init_zspage(struct size_clas |
| |
| #ifdef CONFIG_ZPOOL |
| INIT_LIST_HEAD(&zspage->lru); |
| + zspage->under_reclaim = false; |
| + zspage->deferred_handles = NULL; |
| #endif |
| |
| set_freeobj(zspage, 0); |
| @@ -1572,12 +1617,26 @@ void zs_free(struct zs_pool *pool, unsig |
| |
| obj_free(class->size, obj); |
| class_stat_dec(class, OBJ_USED, 1); |
| + |
| +#ifdef CONFIG_ZPOOL |
| + if (zspage->under_reclaim) { |
| + /* |
| + * Reclaim needs the handles during writeback. It'll free |
| + * them along with the zspage when it's done with them. |
| + * |
| + * Record current deferred handle at the memory location |
| + * whose address is given by handle. |
| + */ |
| + record_obj(handle, (unsigned long)zspage->deferred_handles); |
| + zspage->deferred_handles = (unsigned long *)handle; |
| + spin_unlock(&pool->lock); |
| + return; |
| + } |
| +#endif |
| fullness = fix_fullness_group(class, zspage); |
| - if (fullness != ZS_EMPTY) |
| - goto out; |
| + if (fullness == ZS_EMPTY) |
| + free_zspage(pool, class, zspage); |
| |
| - free_zspage(pool, class, zspage); |
| -out: |
| spin_unlock(&pool->lock); |
| cache_free_handle(pool, handle); |
| } |
| @@ -1777,7 +1836,7 @@ static enum fullness_group putback_zspag |
| return fullness; |
| } |
| |
| -#ifdef CONFIG_COMPACTION |
| +#if defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) |
| /* |
| * To prevent zspage destroy during migration, zspage freeing should |
| * hold locks of all pages in the zspage. |
| @@ -1819,6 +1878,24 @@ static void lock_zspage(struct zspage *z |
| } |
| migrate_read_unlock(zspage); |
| } |
| +#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */ |
| + |
| +#ifdef CONFIG_ZPOOL |
| +/* |
| + * Unlocks all the pages of the zspage. |
| + * |
| + * pool->lock must be held before this function is called |
| + * to prevent the underlying pages from migrating. |
| + */ |
| +static void unlock_zspage(struct zspage *zspage) |
| +{ |
| + struct page *page = get_first_page(zspage); |
| + |
| + do { |
| + unlock_page(page); |
| + } while ((page = get_next_page(page)) != NULL); |
| +} |
| +#endif /* CONFIG_ZPOOL */ |
| |
| static void migrate_lock_init(struct zspage *zspage) |
| { |
| @@ -1835,6 +1912,7 @@ static void migrate_read_unlock(struct z |
| read_unlock(&zspage->lock); |
| } |
| |
| +#ifdef CONFIG_COMPACTION |
| static void migrate_write_lock(struct zspage *zspage) |
| { |
| write_lock(&zspage->lock); |
| @@ -2399,6 +2477,100 @@ void zs_destroy_pool(struct zs_pool *poo |
| } |
| EXPORT_SYMBOL_GPL(zs_destroy_pool); |
| |
| +#ifdef CONFIG_ZPOOL |
| +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries) |
| +{ |
| + int i, obj_idx, ret = 0; |
| + unsigned long handle; |
| + struct zspage *zspage; |
| + struct page *page; |
| + enum fullness_group fullness; |
| + |
| + /* Lock LRU and fullness list */ |
| + spin_lock(&pool->lock); |
| + if (list_empty(&pool->lru)) { |
| + spin_unlock(&pool->lock); |
| + return -EINVAL; |
| + } |
| + |
| + for (i = 0; i < retries; i++) { |
| + struct size_class *class; |
| + |
| + zspage = list_last_entry(&pool->lru, struct zspage, lru); |
| + list_del(&zspage->lru); |
| + |
| + /* zs_free may free objects, but not the zspage and handles */ |
| + zspage->under_reclaim = true; |
| + |
| + class = zspage_class(pool, zspage); |
| + fullness = get_fullness_group(class, zspage); |
| + |
| + /* Lock out object allocations and object compaction */ |
| + remove_zspage(class, zspage, fullness); |
| + |
| + spin_unlock(&pool->lock); |
| + cond_resched(); |
| + |
| + /* Lock backing pages into place */ |
| + lock_zspage(zspage); |
| + |
| + obj_idx = 0; |
| + page = get_first_page(zspage); |
| + while (1) { |
| + handle = find_alloced_obj(class, page, &obj_idx); |
| + if (!handle) { |
| + page = get_next_page(page); |
| + if (!page) |
| + break; |
| + obj_idx = 0; |
| + continue; |
| + } |
| + |
| + /* |
| + * This will write the object and call zs_free. |
| + * |
| + * zs_free will free the object, but the |
| + * under_reclaim flag prevents it from freeing |
| + * the zspage altogether. This is necessary so |
| + * that we can continue working with the |
| + * zspage potentially after the last object |
| + * has been freed. |
| + */ |
| + ret = pool->zpool_ops->evict(pool->zpool, handle); |
| + if (ret) |
| + goto next; |
| + |
| + obj_idx++; |
| + } |
| + |
| +next: |
| + /* For freeing the zspage, or putting it back in the pool and LRU list. */ |
| + spin_lock(&pool->lock); |
| + zspage->under_reclaim = false; |
| + |
| + if (!get_zspage_inuse(zspage)) { |
| + /* |
| + * Fullness went stale as zs_free() won't touch it |
| + * while the page is removed from the pool. Fix it |
| + * up for the check in __free_zspage(). |
| + */ |
| + zspage->fullness = ZS_EMPTY; |
| + |
| + __free_zspage(pool, class, zspage); |
| + spin_unlock(&pool->lock); |
| + return 0; |
| + } |
| + |
| + putback_zspage(class, zspage); |
| + list_add(&zspage->lru, &pool->lru); |
| + unlock_zspage(zspage); |
| + } |
| + |
| + spin_unlock(&pool->lock); |
| + return -EAGAIN; |
| +} |
| +#endif /* CONFIG_ZPOOL */ |
| + |
| static int __init zs_init(void) |
| { |
| int ret; |
| _ |