| From 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a Mon Sep 17 00:00:00 2001 |
| From: Qu Wenruo <wqu@suse.com> |
| Date: Tue, 15 Mar 2022 19:28:05 +0800 |
| Subject: btrfs: avoid defragging extents whose next extents are not targets |
| |
| From: Qu Wenruo <wqu@suse.com> |
| |
| commit 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a upstream. |
| |
| [BUG] |
| There is a report that autodefrag is defragging single sector, which |
| is completely waste of IO, and no help for defragging: |
| |
| btrfs-cleaner-808 defrag_one_locked_range: root=256 ino=651122 start=0 len=4096 |
| |
| [CAUSE] |
| In defrag_collect_targets(), we check if the current range (A) can be merged |
| with next one (B). |
| |
| If mergeable, we will add range A into target for defrag. |
| |
| However there is a catch for autodefrag, when checking mergeability |
| against range B, we intentionally pass 0 as @newer_than, hoping to get a |
| higher chance to merge with the next extent. |
| |
| But in the next iteration, range B will looked up by defrag_lookup_extent(), |
| with non-zero @newer_than. |
| |
| And if range B is not really newer, it will rejected directly, causing |
| only range A being defragged, while we expect to defrag both range A and |
| B. |
| |
| [FIX] |
| Since the root cause is the difference in check condition of |
| defrag_check_next_extent() and defrag_collect_targets(), we fix it by: |
| |
| 1. Pass @newer_than to defrag_check_next_extent() |
| 2. Pass @extent_thresh to defrag_check_next_extent() |
| |
| This makes the check between defrag_collect_targets() and |
| defrag_check_next_extent() more consistent. |
| |
| While there is still some minor difference, the remaining checks are |
| focus on runtime flags like writeback/delalloc, which are mostly |
| transient and safe to be checked only in defrag_collect_targets(). |
| |
| Link: https://github.com/btrfs/linux/issues/423#issuecomment-1066981856 |
| CC: stable@vger.kernel.org # 5.16+ |
| Reviewed-by: Filipe Manana <fdmanana@suse.com> |
| Signed-off-by: Qu Wenruo <wqu@suse.com> |
| Signed-off-by: David Sterba <dsterba@suse.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| fs/btrfs/ioctl.c | 20 ++++++++++++++------ |
| 1 file changed, 14 insertions(+), 6 deletions(-) |
| |
| --- a/fs/btrfs/ioctl.c |
| +++ b/fs/btrfs/ioctl.c |
| @@ -1189,7 +1189,7 @@ static u32 get_extent_max_capacity(const |
| } |
| |
| static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, |
| - bool locked) |
| + u32 extent_thresh, u64 newer_than, bool locked) |
| { |
| struct extent_map *next; |
| bool ret = false; |
| @@ -1199,11 +1199,12 @@ static bool defrag_check_next_extent(str |
| return false; |
| |
| /* |
| - * We want to check if the next extent can be merged with the current |
| - * one, which can be an extent created in a past generation, so we pass |
| - * a minimum generation of 0 to defrag_lookup_extent(). |
| + * Here we need to pass @newer_then when checking the next extent, or |
| + * we will hit a case we mark current extent for defrag, but the next |
| + * one will not be a target. |
| + * This will just cause extra IO without really reducing the fragments. |
| */ |
| - next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); |
| + next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked); |
| /* No more em or hole */ |
| if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) |
| goto out; |
| @@ -1215,6 +1216,13 @@ static bool defrag_check_next_extent(str |
| */ |
| if (next->len >= get_extent_max_capacity(em)) |
| goto out; |
| + /* Skip older extent */ |
| + if (next->generation < newer_than) |
| + goto out; |
| + /* Also check extent size */ |
| + if (next->len >= extent_thresh) |
| + goto out; |
| + |
| ret = true; |
| out: |
| free_extent_map(next); |
| @@ -1420,7 +1428,7 @@ static int defrag_collect_targets(struct |
| goto next; |
| |
| next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, |
| - locked); |
| + extent_thresh, newer_than, locked); |
| if (!next_mergeable) { |
| struct defrag_target_range *last; |
| |