| #! /bin/bash |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright (C) 2021 SUSE Linux Products GmbH. All Rights Reserved. |
| # |
| # FSQA Test No. 239 |
| # |
| # Test a particular scenario where we fsync a directory, then move one of its |
| # children directories into another directory and then finally sync the log |
| # trees by fsyncing any other inode. We want to check that after a power failure |
| # we are able to mount the filesystem and that the moved directory exists only |
| # as a child of the directory we moved it into. |
| # |
| . ./common/preamble |
| _begin_fstest auto quick log |
| |
| # Override the default cleanup function. |
| _cleanup() |
| { |
| _cleanup_flakey |
| cd / |
| rm -f $tmp.* |
| } |
| |
| . ./common/filter |
| . ./common/dmflakey |
| |
| _require_scratch |
| _require_dm_target flakey |
| |
| # The test requires a very specific layout of keys and items in the fs/subvolume |
| # btree to trigger a bug. So we want to make sure that on whatever platform we |
| # are, we have the same leaf/node size. |
| # |
| # Currently in btrfs the node/leaf size can not be smaller than the page |
| # size (but it can be greater than the page size). So use the largest |
| # supported node/leaf size (64K). |
| # |
| _scratch_mkfs "-n 65536" >>$seqres.full 2>&1 |
| _require_metadata_journaling $SCRATCH_DEV |
| _init_flakey |
| _mount_flakey |
| |
| # "testdir" is inode 257. |
| mkdir $SCRATCH_MNT/testdir |
| chmod 755 $SCRATCH_MNT/testdir |
| |
| # Create several empty files to have the directory "testdir" with its items |
| # spread over several leaves (7 in this case). |
| for ((i = 1; i <= 1200; i++)); do |
| echo -n > $SCRATCH_MNT/testdir/file$i |
| done |
| |
| # Create our test directory "dira", inode number 1458, which gets all its items |
| # in leaf 7. |
| # |
| # The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to the entry |
| # named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY item that points to |
| # that entry is in leaf 3. |
| # |
| # For this particular filesystem node size (64K), file count and file names, we |
| # endup with the directory entry items from inode 257 in leaves 2 and 3, as |
| # previously mentioned - what matters for triggering the bug exercised by this |
| # test case is that those items are not placed in leaf 1, they must be placed in |
| # a leaf different from the one containing the inode item for inode 257. |
| # |
| # The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for the |
| # parent inode (257) are the following: |
| # |
| # item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34 |
| # location key (1458 INODE_ITEM 0) type DIR |
| # transid 6 data_len 0 name_len 4 |
| # name: dira |
| # |
| # and: |
| # |
| # item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34 |
| # location key (1458 INODE_ITEM 0) type DIR |
| # transid 6 data_len 0 name_len 4 |
| # name: dira |
| # |
| mkdir $SCRATCH_MNT/testdir/dira |
| |
| # This fsync is for the zoned mode. On the zoned mode, we use a dedicated block |
| # group for tree-log. That block group is created on-demand or assigned to a |
| # metadata block group if there is none. On the first mount of a file system, we |
| # need to create one because there is only one metadata block group available |
| # for the regular metadata. That creation of a new block group forces tree-log |
| # to be a full commit on that transaction, which prevents logging "testdir" and |
| # "dira" and screws up the result. |
| # |
| # Calling fsync here will create the dedicated block group, and let them be |
| # logged. |
| $XFS_IO_PROG -c "fsync" $SCRATCH_MNT |
| |
| # Make sure everything done so far is durably persisted. |
| sync |
| |
| # Now do a change to inode 257 ("testdir") that does not result in COWing leaves |
| # 2 and 3 - the leaves that contain the directory items pointing to inode 1458 |
| # (directory "dira"). |
| # |
| # Changing permissions, the owner/group, updating or adding a xattr, etc, will |
| # not change (COW) leaves 2 and 3. So for the sake of simplicity change the |
| # permissions of inode 257, which results in updating its inode item and |
| # therefore change (COW) only leaf 1. |
| # |
| chmod 700 $SCRATCH_MNT/testdir |
| |
| # Now fsync directory inode 257. |
| # |
| # Since only the first leaf was changed/COWed, we log the inode item of inode 257 |
| # and only the entries found in the first leaf, all with a key of type |
| # BTRFS_DIR_ITEM_KEY, and no keys of type BTRFS_DIR_INDEX_KEY, because they sort |
| # after the former type and none exist in the first leaf. |
| # |
| # We also log 3 items that represent ranges for dir items and dir indexes for |
| # which the log is authoritative: |
| # |
| # 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is |
| # authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the |
| # range [0, 2285968570] (the offset here is the crc32c of the dentry's |
| # name). The value 2285968570 corresponds to the offset of the first key |
| # of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY); |
| # |
| # 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is |
| # authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the |
| # range [4293818216, (u64)-1] (the offset here is the crc32c of the dentry's |
| # name). The value 4293818216 corresponds to the offset of the highest key |
| # of type BTRFS_DIR_ITEM_KEY plus 1 (4293818215 + 1), which is located in |
| # leaf 2; |
| # |
| # 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203, which |
| # indicates the log is authoritative for all keys of type BTRFS_DIR_INDEX_KEY |
| # that have an offset in the range [1203, (u64)-1]. The value 1203 corresponds |
| # to the offset of the last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1), |
| # which is located in leaf 3; |
| # |
| # Also, because "testdir" is a directory and inode 1458 ("dira") is a child |
| # directory, we log inode 1458 too. |
| # |
| $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/testdir |
| |
| # Now move "dira", inode 1458, to be a child of the root directory (inode 256). |
| # |
| # Because this inode was previously logged, when "testdir" was fsynced, the |
| # log is updated so that the old inode reference, referring to inode 257 as |
| # the parent, is deleted and the new inode reference, referring to inode 256 |
| # as the parent, is added to the log. |
| # |
| mv $SCRATCH_MNT/testdir/dira $SCRATCH_MNT/ |
| |
| # Now change some file and fsync it. This guarantees the log changes made by |
| # the previous move/rename operation are persisted. We do not need to do any |
| # special modification to the file, just any change to any file and sync the |
| # log. |
| $XFS_IO_PROG -c "pwrite -S 0xab 0 64K" \ |
| -c "fsync" \ |
| $SCRATCH_MNT/testdir/file1 >>$seqres.full |
| |
| # Simulate a power failure and then mount again the filesystem to replay the log |
| # tree. We want to verify that we are able to mount the filesystem, meaning log |
| # replay was successful, and that directory inode 1458 ("dira") only has inode |
| # 256 (the filesystem's root) as its parent (and no longer a child of inode 257). |
| # |
| # It used to happen that during log replay we would end up having inode 1458 |
| # (directory "dira") with 2 hard links, being a child of inode 257 ("testdir") |
| # and inode 256 (the filesystem's root). This resulted in the tree checker |
| # detecting the issue and causing the mount operation to fail (with -EIO). |
| # |
| # This happened because in the log we have the new name/parent for inode 1458, |
| # which results in adding the new dentry with inode 256 as the parent, but the |
| # previous dentry, under inode 257 was never removed - this is because the |
| # ranges for dir items and dir indexes of inode 257 for which the log is |
| # authoritative do not include the old dir item and dir index for the dentry |
| # of inode 257 referring to inode 1458: |
| # |
| # - for dir items, the log is authoritative for the ranges [0, 2285968570] and |
| # [4293818216, (u64)-1]. The dir item at inode 257 pointing to inode 1458 has |
| # a key of (257 DIR_ITEM 3724298081), as previously mentioned, so the dir item |
| # is not deleted when the log replay procedure processes the authoritative |
| # ranges, as 3724298081 is outside both ranges; |
| # |
| # - for dir indexes, the log is authoritative for the range [1203, (u64)-1], and |
| # the dir index item of inode 257 pointing to inode 1458 has a key of |
| # (257 DIR_INDEX 1202), as previously mentioned, so the dir index item is not |
| # deleted when the log replay procedure processes the authoritative range. |
| # |
| _flakey_drop_and_remount |
| |
| [ -d $SCRATCH_MNT/testdir/dira ] && echo "/testdir/dira still exists" |
| [ -d $SCRATCH_MNT/dira ] || echo "/dira does not exists" |
| |
| # While at it also check that the data we wrote was not lost, just for the sake |
| # of completeness. |
| echo "File $SCRATCH_MNT/testdir/file1 data:" | _filter_scratch |
| od -A d -t x1 $SCRATCH_MNT/testdir/file1 |
| |
| _unmount_flakey |
| |
| status=0 |
| exit |