| From 333995d1bd68d9976fc87c58672b41be370c341e Mon Sep 17 00:00:00 2001 |
| From: Filipe Manana <fdmanana@suse.com> |
| Date: Mon, 9 Mar 2020 12:41:05 +0000 |
| Subject: [PATCH] btrfs: fix missing file extent item for hole after ranged |
| fsync |
| |
| commit 95418ed1d10774cd9a49af6f39e216c1256f1eeb upstream. |
| |
| When doing a fast fsync for a range that starts at an offset greater than |
| zero, we can end up with a log that when replayed causes the respective |
| inode miss a file extent item representing a hole if we are not using the |
| NO_HOLES feature. This is because for fast fsyncs we don't log any extents |
| that cover a range different from the one requested in the fsync. |
| |
| Example scenario to trigger it: |
| |
| $ mkfs.btrfs -O ^no-holes -f /dev/sdd |
| $ mount /dev/sdd /mnt |
| |
| # Create a file with a single 256K and fsync it to clear to full sync |
| # bit in the inode - we want the msync below to trigger a fast fsync. |
| $ xfs_io -f -c "pwrite -S 0xab 0 256K" -c "fsync" /mnt/foo |
| |
| # Force a transaction commit and wipe out the log tree. |
| $ sync |
| |
| # Dirty 768K of data, increasing the file size to 1Mb, and flush only |
| # the range from 256K to 512K without updating the log tree |
| # (sync_file_range() does not trigger fsync, it only starts writeback |
| # and waits for it to finish). |
| |
| $ xfs_io -c "pwrite -S 0xcd 256K 768K" /mnt/foo |
| $ xfs_io -c "sync_range -abw 256K 256K" /mnt/foo |
| |
| # Now dirty the range from 768K to 1M again and sync that range. |
| $ xfs_io -c "mmap -w 768K 256K" \ |
| -c "mwrite -S 0xef 768K 256K" \ |
| -c "msync -s 768K 256K" \ |
| -c "munmap" \ |
| /mnt/foo |
| |
| <power fail> |
| |
| # Mount to replay the log. |
| $ mount /dev/sdd /mnt |
| $ umount /mnt |
| |
| $ btrfs check /dev/sdd |
| Opening filesystem to check... |
| Checking filesystem on /dev/sdd |
| UUID: 482fb574-b288-478e-a190-a9c44a78fca6 |
| [1/7] checking root items |
| [2/7] checking extents |
| [3/7] checking free space cache |
| [4/7] checking fs roots |
| root 5 inode 257 errors 100, file extent discount |
| Found file extent holes: |
| start: 262144, len: 524288 |
| ERROR: errors found in fs roots |
| found 720896 bytes used, error(s) found |
| total csum bytes: 512 |
| total tree bytes: 131072 |
| total fs tree bytes: 32768 |
| total extent tree bytes: 16384 |
| btree space waste bytes: 123514 |
| file data blocks allocated: 589824 |
| referenced 589824 |
| |
| Fix this issue by setting the range to full (0 to LLONG_MAX) when the |
| NO_HOLES feature is not enabled. This results in extra work being done |
| but it gives the guarantee we don't end up with missing holes after |
| replaying the log. |
| |
| CC: stable@vger.kernel.org # 4.19+ |
| Reviewed-by: Josef Bacik <josef@toxicpanda.com> |
| Signed-off-by: Filipe Manana <fdmanana@suse.com> |
| Signed-off-by: David Sterba <dsterba@suse.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c |
| index 95c3b7b22e06..c5a0ca8e4cf8 100644 |
| --- a/fs/btrfs/file.c |
| +++ b/fs/btrfs/file.c |
| @@ -2082,6 +2082,16 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
| btrfs_init_log_ctx(&ctx, inode); |
| |
| /* |
| + * Set the range to full if the NO_HOLES feature is not enabled. |
| + * This is to avoid missing file extent items representing holes after |
| + * replaying the log. |
| + */ |
| + if (!btrfs_fs_incompat(fs_info, NO_HOLES)) { |
| + start = 0; |
| + end = LLONG_MAX; |
| + } |
| + |
| + /* |
| * We write the dirty pages in the range and wait until they complete |
| * out of the ->i_mutex. If so, we can flush the dirty pages by |
| * multi-task, and make the performance up. See |
| -- |
| 2.7.4 |
| |