| From: Chengming Zhou <zhouchengming@bytedance.com> |
| Subject: mm/zswap: optimize and cleanup the invalidation of duplicate entry |
| Date: Fri, 9 Feb 2024 04:41:12 +0000 |
| |
| We may encounter duplicate entry in the zswap_store(): |
| |
| 1. swap slot that freed to per-cpu swap cache, doesn't invalidate |
| the zswap entry, then got reused. This has been fixed. |
| |
| 2. !exclusive load mode, swapin folio will leave its zswap entry |
| on the tree, then swapout again. This has been removed. |
| |
| 3. one folio can be dirtied again after zswap_store(), so need to |
| zswap_store() again. This should be handled correctly. |
| |
| So we must invalidate the old duplicate entry before inserting the |
| new one, which actually doesn't have to be done at the beginning |
| of zswap_store(). |
| |
| The good point is that we don't need to lock the tree twice in the normal |
| store success path. And cleanup the loop as we are here. |
| |
| Note we still need to invalidate the old duplicate entry when store failed |
| or zswap is disabled , otherwise the new data in swapfile could be |
| overwrite by the old data in zswap pool when lru writeback. |
| |
| Link: https://lkml.kernel.org/r/20240209044112.3883835-1-chengming.zhou@linux.dev |
| Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> |
| Acked-by: Johannes Weiner <hannes@cmpxchg.org> |
| Acked-by: Yosry Ahmed <yosryahmed@google.com> |
| Acked-by: Chris Li <chrisl@kernel.org> |
| Acked-by: Nhat Pham <nphamcs@gmail.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/zswap.c | 34 ++++++++++++++++------------------ |
| 1 file changed, 16 insertions(+), 18 deletions(-) |
| |
| --- a/mm/zswap.c~mm-zswap-optimize-and-cleanup-the-invalidation-of-duplicate-entry |
| +++ a/mm/zswap.c |
| @@ -1517,19 +1517,8 @@ bool zswap_store(struct folio *folio) |
| if (folio_test_large(folio)) |
| return false; |
| |
| - /* |
| - * If this is a duplicate, it must be removed before attempting to store |
| - * it, otherwise, if the store fails the old page won't be removed from |
| - * the tree, and it might be written back overriding the new data. |
| - */ |
| - spin_lock(&tree->lock); |
| - entry = zswap_rb_search(&tree->rbroot, offset); |
| - if (entry) |
| - zswap_invalidate_entry(tree, entry); |
| - spin_unlock(&tree->lock); |
| - |
| if (!zswap_enabled) |
| - return false; |
| + goto check_old; |
| |
| objcg = get_obj_cgroup_from_folio(folio); |
| if (objcg && !obj_cgroup_may_zswap(objcg)) { |
| @@ -1609,14 +1598,12 @@ insert_entry: |
| /* map */ |
| spin_lock(&tree->lock); |
| /* |
| - * A duplicate entry should have been removed at the beginning of this |
| - * function. Since the swap entry should be pinned, if a duplicate is |
| - * found again here it means that something went wrong in the swap |
| - * cache. |
| + * The folio may have been dirtied again, invalidate the |
| + * possibly stale entry before inserting the new entry. |
| */ |
| - while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { |
| - WARN_ON(1); |
| + if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { |
| zswap_invalidate_entry(tree, dupentry); |
| + WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry)); |
| } |
| if (entry->length) { |
| INIT_LIST_HEAD(&entry->lru); |
| @@ -1639,6 +1626,17 @@ freepage: |
| reject: |
| if (objcg) |
| obj_cgroup_put(objcg); |
| +check_old: |
| + /* |
| + * If the zswap store fails or zswap is disabled, we must invalidate the |
| + * possibly stale entry which was previously stored at this offset. |
| + * Otherwise, writeback could overwrite the new data in the swapfile. |
| + */ |
| + spin_lock(&tree->lock); |
| + entry = zswap_rb_search(&tree->rbroot, offset); |
| + if (entry) |
| + zswap_invalidate_entry(tree, entry); |
| + spin_unlock(&tree->lock); |
| return false; |
| |
| shrink: |
| _ |