| From: Yosry Ahmed <yosryahmed@google.com> |
| Subject: mm: zswap: multiple zpools support |
| Date: Tue, 20 Jun 2023 19:46:44 +0000 |
| |
| Support using multiple zpools of the same type in zswap, for concurrency |
| purposes. A fixed number of 32 zpools is suggested by this commit, which |
| was determined empirically. It can be later changed or made into a config |
| option if needed. |
| |
| On a setup with zswap and zsmalloc, comparing a single zpool to 32 zpools |
| shows improvements in the zsmalloc lock contention, especially on the swap |
| out path. |
| |
| The following shows the perf analysis of the swapout path when 10 |
| workloads are simultaneously reclaiming and refaulting tmpfs pages. There |
| are some improvements on the swap in path as well, but less significant. |
| |
| 1 zpool: |
| |
| |--28.99%--zswap_frontswap_store |
| | |
| <snip> |
| | |
| |--8.98%--zpool_map_handle |
| | | |
| | --8.98%--zs_zpool_map |
| | | |
| | --8.95%--zs_map_object |
| | | |
| | --8.38%--_raw_spin_lock |
| | | |
| | --7.39%--queued_spin_lock_slowpath |
| | |
| |--8.82%--zpool_malloc |
| | | |
| | --8.82%--zs_zpool_malloc |
| | | |
| | --8.80%--zs_malloc |
| | | |
| | |--7.21%--_raw_spin_lock |
| | | | |
| | | --6.81%--queued_spin_lock_slowpath |
| <snip> |
| |
| 32 zpools: |
| |
| |--16.73%--zswap_frontswap_store |
| | |
| <snip> |
| | |
| |--1.81%--zpool_malloc |
| | | |
| | --1.81%--zs_zpool_malloc |
| | | |
| | --1.79%--zs_malloc |
| | | |
| | --0.73%--obj_malloc |
| | |
| |--1.06%--zswap_update_total_size |
| | |
| |--0.59%--zpool_map_handle |
| | | |
| | --0.59%--zs_zpool_map |
| | | |
| | --0.57%--zs_map_object |
| | | |
| | --0.51%--_raw_spin_lock |
| <snip> |
| |
| Link: https://lkml.kernel.org/r/20230620194644.3142384-1-yosryahmed@google.com |
| Signed-off-by: Yosry Ahmed <yosryahmed@google.com> |
| Suggested-by: Yu Zhao <yuzhao@google.com> |
| Acked-by: Chris Li (Google) <chrisl@kernel.org> |
| Reviewed-by: Nhat Pham <nphamcs@gmail.com> |
| Tested-by: Nhat Pham <nphamcs@gmail.com> |
| Cc: Dan Streetman <ddstreet@ieee.org> |
| Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
| Cc: Seth Jennings <sjenning@redhat.com> |
| Cc: Vitaly Wool <vitaly.wool@konsulko.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/zswap.c | 83 +++++++++++++++++++++++++++++++++------------------ |
| 1 file changed, 55 insertions(+), 28 deletions(-) |
| |
| --- a/mm/zswap.c~mm-zswap-multiple-zpools-support |
| +++ a/mm/zswap.c |
| @@ -142,6 +142,9 @@ static bool zswap_exclusive_loads_enable |
| CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); |
| module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); |
| |
| +/* Number of zpools in zswap_pool (empirically determined for scalability) */ |
| +#define ZSWAP_NR_ZPOOLS 32 |
| + |
| /********************************* |
| * data structures |
| **********************************/ |
| @@ -161,7 +164,7 @@ struct crypto_acomp_ctx { |
| * needs to be verified that it's still valid in the tree. |
| */ |
| struct zswap_pool { |
| - struct zpool *zpool; |
| + struct zpool *zpools[ZSWAP_NR_ZPOOLS]; |
| struct crypto_acomp_ctx __percpu *acomp_ctx; |
| struct kref kref; |
| struct list_head list; |
| @@ -248,7 +251,7 @@ static bool zswap_has_pool; |
| |
| #define zswap_pool_debug(msg, p) \ |
| pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ |
| - zpool_get_type((p)->zpool)) |
| + zpool_get_type((p)->zpools[0])) |
| |
| static int zswap_writeback_entry(struct zswap_entry *entry, |
| struct zswap_tree *tree); |
| @@ -272,11 +275,13 @@ static void zswap_update_total_size(void |
| { |
| struct zswap_pool *pool; |
| u64 total = 0; |
| + int i; |
| |
| rcu_read_lock(); |
| |
| list_for_each_entry_rcu(pool, &zswap_pools, list) |
| - total += zpool_get_total_size(pool->zpool); |
| + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) |
| + total += zpool_get_total_size(pool->zpools[i]); |
| |
| rcu_read_unlock(); |
| |
| @@ -365,6 +370,16 @@ static bool zswap_rb_erase(struct rb_roo |
| return false; |
| } |
| |
| +static struct zpool *zswap_find_zpool(struct zswap_entry *entry) |
| +{ |
| + int i = 0; |
| + |
| + if (ZSWAP_NR_ZPOOLS > 1) |
| + i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS)); |
| + |
| + return entry->pool->zpools[i]; |
| +} |
| + |
| /* |
| * Carries out the common pattern of freeing and entry's zpool allocation, |
| * freeing the entry itself, and decrementing the number of stored pages. |
| @@ -381,7 +396,7 @@ static void zswap_free_entry(struct zswa |
| spin_lock(&entry->pool->lru_lock); |
| list_del(&entry->lru); |
| spin_unlock(&entry->pool->lru_lock); |
| - zpool_free(entry->pool->zpool, entry->handle); |
| + zpool_free(zswap_find_zpool(entry), entry->handle); |
| zswap_pool_put(entry->pool); |
| } |
| zswap_entry_cache_free(entry); |
| @@ -590,7 +605,8 @@ static struct zswap_pool *zswap_pool_fin |
| list_for_each_entry_rcu(pool, &zswap_pools, list) { |
| if (strcmp(pool->tfm_name, compressor)) |
| continue; |
| - if (strcmp(zpool_get_type(pool->zpool), type)) |
| + /* all zpools share the same type */ |
| + if (strcmp(zpool_get_type(pool->zpools[0]), type)) |
| continue; |
| /* if we can't get it, it's about to be destroyed */ |
| if (!zswap_pool_get(pool)) |
| @@ -695,6 +711,7 @@ static void shrink_worker(struct work_st |
| |
| static struct zswap_pool *zswap_pool_create(char *type, char *compressor) |
| { |
| + int i; |
| struct zswap_pool *pool; |
| char name[38]; /* 'zswap' + 32 char (max) num + \0 */ |
| gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; |
| @@ -715,15 +732,18 @@ static struct zswap_pool *zswap_pool_cre |
| if (!pool) |
| return NULL; |
| |
| - /* unique name for each pool specifically required by zsmalloc */ |
| - snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); |
| - |
| - pool->zpool = zpool_create_pool(type, name, gfp); |
| - if (!pool->zpool) { |
| - pr_err("%s zpool not available\n", type); |
| - goto error; |
| + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { |
| + /* unique name for each pool specifically required by zsmalloc */ |
| + snprintf(name, 38, "zswap%x", |
| + atomic_inc_return(&zswap_pools_count)); |
| + |
| + pool->zpools[i] = zpool_create_pool(type, name, gfp); |
| + if (!pool->zpools[i]) { |
| + pr_err("%s zpool not available\n", type); |
| + goto error; |
| + } |
| } |
| - pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); |
| + pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); |
| |
| strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); |
| |
| @@ -755,8 +775,8 @@ static struct zswap_pool *zswap_pool_cre |
| error: |
| if (pool->acomp_ctx) |
| free_percpu(pool->acomp_ctx); |
| - if (pool->zpool) |
| - zpool_destroy_pool(pool->zpool); |
| + while (i--) |
| + zpool_destroy_pool(pool->zpools[i]); |
| kfree(pool); |
| return NULL; |
| } |
| @@ -805,11 +825,14 @@ static struct zswap_pool *__zswap_pool_c |
| |
| static void zswap_pool_destroy(struct zswap_pool *pool) |
| { |
| + int i; |
| + |
| zswap_pool_debug("destroying", pool); |
| |
| cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); |
| free_percpu(pool->acomp_ctx); |
| - zpool_destroy_pool(pool->zpool); |
| + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) |
| + zpool_destroy_pool(pool->zpools[i]); |
| kfree(pool); |
| } |
| |
| @@ -1073,7 +1096,7 @@ static int zswap_writeback_entry(struct |
| struct page *page; |
| struct scatterlist input, output; |
| struct crypto_acomp_ctx *acomp_ctx; |
| - struct zpool *pool = entry->pool->zpool; |
| + struct zpool *pool = zswap_find_zpool(entry); |
| |
| u8 *src, *tmp = NULL; |
| unsigned int dlen; |
| @@ -1214,6 +1237,7 @@ static int zswap_frontswap_store(unsigne |
| struct crypto_acomp_ctx *acomp_ctx; |
| struct obj_cgroup *objcg = NULL; |
| struct zswap_pool *pool; |
| + struct zpool *zpool; |
| int ret; |
| unsigned int dlen = PAGE_SIZE; |
| unsigned long handle, value; |
| @@ -1324,10 +1348,11 @@ static int zswap_frontswap_store(unsigne |
| } |
| |
| /* store */ |
| + zpool = zswap_find_zpool(entry); |
| gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; |
| - if (zpool_malloc_support_movable(entry->pool->zpool)) |
| + if (zpool_malloc_support_movable(zpool)) |
| gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; |
| - ret = zpool_malloc(entry->pool->zpool, dlen, gfp, &handle); |
| + ret = zpool_malloc(zpool, dlen, gfp, &handle); |
| if (ret == -ENOSPC) { |
| zswap_reject_compress_poor++; |
| goto put_dstmem; |
| @@ -1336,9 +1361,9 @@ static int zswap_frontswap_store(unsigne |
| zswap_reject_alloc_fail++; |
| goto put_dstmem; |
| } |
| - buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); |
| + buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO); |
| memcpy(buf, dst, dlen); |
| - zpool_unmap_handle(entry->pool->zpool, handle); |
| + zpool_unmap_handle(zpool, handle); |
| mutex_unlock(acomp_ctx->mutex); |
| |
| /* populate entry */ |
| @@ -1409,6 +1434,7 @@ static int zswap_frontswap_load(unsigned |
| struct scatterlist input, output; |
| struct crypto_acomp_ctx *acomp_ctx; |
| u8 *src, *dst, *tmp; |
| + struct zpool *zpool; |
| unsigned int dlen; |
| int ret; |
| |
| @@ -1430,7 +1456,8 @@ static int zswap_frontswap_load(unsigned |
| goto stats; |
| } |
| |
| - if (!zpool_can_sleep_mapped(entry->pool->zpool)) { |
| + zpool = zswap_find_zpool(entry); |
| + if (!zpool_can_sleep_mapped(zpool)) { |
| tmp = kmalloc(entry->length, GFP_KERNEL); |
| if (!tmp) { |
| ret = -ENOMEM; |
| @@ -1440,12 +1467,12 @@ static int zswap_frontswap_load(unsigned |
| |
| /* decompress */ |
| dlen = PAGE_SIZE; |
| - src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); |
| + src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); |
| |
| - if (!zpool_can_sleep_mapped(entry->pool->zpool)) { |
| + if (!zpool_can_sleep_mapped(zpool)) { |
| memcpy(tmp, src, entry->length); |
| src = tmp; |
| - zpool_unmap_handle(entry->pool->zpool, entry->handle); |
| + zpool_unmap_handle(zpool, entry->handle); |
| } |
| |
| acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); |
| @@ -1457,8 +1484,8 @@ static int zswap_frontswap_load(unsigned |
| ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); |
| mutex_unlock(acomp_ctx->mutex); |
| |
| - if (zpool_can_sleep_mapped(entry->pool->zpool)) |
| - zpool_unmap_handle(entry->pool->zpool, entry->handle); |
| + if (zpool_can_sleep_mapped(zpool)) |
| + zpool_unmap_handle(zpool, entry->handle); |
| else |
| kfree(tmp); |
| |
| @@ -1619,7 +1646,7 @@ static int zswap_setup(void) |
| pool = __zswap_pool_create_fallback(); |
| if (pool) { |
| pr_info("loaded using pool %s/%s\n", pool->tfm_name, |
| - zpool_get_type(pool->zpool)); |
| + zpool_get_type(pool->zpools[0])); |
| list_add(&pool->list, &zswap_pools); |
| zswap_has_pool = true; |
| } else { |
| _ |