tests/btrfs/239 - pub/scm/linux/kernel/git/jlayton/xfstests-dev - Git at Google

 #! /bin/bash
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2021 SUSE Linux Products GmbH. All Rights Reserved.
 #
 # FSQA Test No. 239
 #
 # Test a particular scenario where we fsync a directory, then move one of its
 # children directories into another directory and then finally sync the log
 # trees by fsyncing any other inode. We want to check that after a power failure
 # we are able to mount the filesystem and that the moved directory exists only
 # as a child of the directory we moved it into.
 #
 . ./common/preamble
 _begin_fstest auto quick log

 # Override the default cleanup function.
 _cleanup()
 {
 	_cleanup_flakey
 	cd /
 	rm -f $tmp.*
 }

 . ./common/filter
 . ./common/dmflakey

 _require_scratch
 _require_dm_target flakey

 # The test requires a very specific layout of keys and items in the fs/subvolume
 # btree to trigger a bug. So we want to make sure that on whatever platform we
 # are, we have the same leaf/node size.
 #
 # Currently in btrfs the node/leaf size can not be smaller than the page
 # size (but it can be greater than the page size). So use the largest
 # supported node/leaf size (64K).
 #
 _scratch_mkfs "-n 65536" >>$seqres.full 2>&1
 _require_metadata_journaling $SCRATCH_DEV
 _init_flakey
 _mount_flakey

 # "testdir" is inode 257.
 mkdir $SCRATCH_MNT/testdir
 chmod 755 $SCRATCH_MNT/testdir

 # Create several empty files to have the directory "testdir" with its items
 # spread over several leaves (7 in this case).
 for ((i = 1; i <= 1200; i++)); do
 	echo -n > $SCRATCH_MNT/testdir/file$i
 done

 # Create our test directory "dira", inode number 1458, which gets all its items
 # in leaf 7.
 #
 # The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to the entry
 # named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY item that points to
 # that entry is in leaf 3.
 #
 # For this particular filesystem node size (64K), file count and file names, we
 # endup with the directory entry items from inode 257 in leaves 2 and 3, as
 # previously mentioned - what matters for triggering the bug exercised by this
 # test case is that those items are not placed in leaf 1, they must be placed in
 # a leaf different from the one containing the inode item for inode 257.
 #
 # The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for the
 # parent inode (257) are the following:
 #
 #           item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34
 #                location key (1458 INODE_ITEM 0) type DIR
 #                transid 6 data_len 0 name_len 4
 #                name: dira
 #
 # and:
 #
 #           item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34
 #                location key (1458 INODE_ITEM 0) type DIR
 #                transid 6 data_len 0 name_len 4
 #                name: dira
 #
 mkdir $SCRATCH_MNT/testdir/dira

 # This fsync is for the zoned mode. On the zoned mode, we use a dedicated block
 # group for tree-log. That block group is created on-demand or assigned to a
 # metadata block group if there is none. On the first mount of a file system, we
 # need to create one because there is only one metadata block group available
 # for the regular metadata. That creation of a new block group forces tree-log
 # to be a full commit on that transaction, which prevents logging "testdir" and
 # "dira" and screws up the result.
 #
 # Calling fsync here will create the dedicated block group, and let them be
 # logged.
 $XFS_IO_PROG -c "fsync" $SCRATCH_MNT

 # Make sure everything done so far is durably persisted.
 sync

 # Now do a change to inode 257 ("testdir") that does not result in COWing leaves
 # 2 and 3 - the leaves that contain the directory items pointing to inode 1458
 # (directory "dira").
 #
 # Changing permissions, the owner/group, updating or adding a xattr, etc, will
 # not change (COW) leaves 2 and 3. So for the sake of simplicity change the
 # permissions of inode 257, which results in updating its inode item and
 # therefore change (COW) only leaf 1.
 #
 chmod 700 $SCRATCH_MNT/testdir

 # Now fsync directory inode 257.
 #
 # Since only the first leaf was changed/COWed, we log the inode item of inode 257
 # and only the entries found in the first leaf, all with a key of type
 # BTRFS_DIR_ITEM_KEY, and no keys of type BTRFS_DIR_INDEX_KEY, because they sort
 # after the former type and none exist in the first leaf.
 #
 # We also log 3 items that represent ranges for dir items and dir indexes for
 # which the log is authoritative:
 #
 # 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
 #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the
 #    range [0, 2285968570] (the offset here is the crc32c of the dentry's
 #    name). The value 2285968570 corresponds to the offset of the first key
 #    of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY);
 #
 # 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
 #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the
 #    range [4293818216, (u64)-1] (the offset here is the crc32c of the dentry's
 #    name). The value 4293818216 corresponds to the offset of the highest key
 #    of type BTRFS_DIR_ITEM_KEY plus 1 (4293818215 + 1), which is located in
 #    leaf 2;
 #
 # 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203, which
 #    indicates the log is authoritative for all keys of type BTRFS_DIR_INDEX_KEY
 #    that have an offset in the range [1203, (u64)-1]. The value 1203 corresponds
 #    to the offset of the last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1),
 #    which is located in leaf 3;
 #
 # Also, because "testdir" is a directory and inode 1458 ("dira") is a child
 # directory, we log inode 1458 too.
 #
 $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/testdir

 # Now move "dira", inode 1458, to be a child of the root directory (inode 256).
 #
 # Because this inode was previously logged, when "testdir" was fsynced, the
 # log is updated so that the old inode reference, referring to inode 257 as
 # the parent, is deleted and the new inode reference, referring to inode 256
 # as the parent, is added to the log.
 #
 mv $SCRATCH_MNT/testdir/dira $SCRATCH_MNT/

 # Now change some file and fsync it. This guarantees the log changes made by
 # the previous move/rename operation are persisted. We do not need to do any
 # special modification to the file, just any change to any file and sync the
 # log.
 $XFS_IO_PROG -c "pwrite -S 0xab 0 64K" \
 	     -c "fsync" \
 	     $SCRATCH_MNT/testdir/file1 >>$seqres.full

 # Simulate a power failure and then mount again the filesystem to replay the log
 # tree. We want to verify that we are able to mount the filesystem, meaning log
 # replay was successful, and that directory inode 1458 ("dira") only has inode
 # 256 (the filesystem's root) as its parent (and no longer a child of inode 257).
 #
 # It used to happen that during log replay we would end up having inode 1458
 # (directory "dira") with 2 hard links, being a child of inode 257 ("testdir")
 # and inode 256 (the filesystem's root). This resulted in the tree checker
 # detecting the issue and causing the mount operation to fail (with -EIO).
 #
 # This happened because in the log we have the new name/parent for inode 1458,
 # which results in adding the new dentry with inode 256 as the parent, but the
 # previous dentry, under inode 257 was never removed - this is because the
 # ranges for dir items and dir indexes of inode 257 for which the log is
 # authoritative do not include the old dir item and dir index for the dentry
 # of inode 257 referring to inode 1458:
 #
 # - for dir items, the log is authoritative for the ranges [0, 2285968570] and
 #   [4293818216, (u64)-1]. The dir item at inode 257 pointing to inode 1458 has
 #   a key of (257 DIR_ITEM 3724298081), as previously mentioned, so the dir item
 #   is not deleted when the log replay procedure processes the authoritative
 #   ranges, as 3724298081 is outside both ranges;
 #
 # - for dir indexes, the log is authoritative for the range [1203, (u64)-1], and
 #   the dir index item of inode 257 pointing to inode 1458 has a key of
 #   (257 DIR_INDEX 1202), as previously mentioned, so the dir index item is not
 #   deleted when the log replay procedure processes the authoritative range.
 #
 _flakey_drop_and_remount

 [ -d $SCRATCH_MNT/testdir/dira ] && echo "/testdir/dira still exists"
 [ -d $SCRATCH_MNT/dira ] || echo "/dira does not exists"

 # While at it also check that the data we wrote was not lost, just for the sake
 # of completeness.
 echo "File $SCRATCH_MNT/testdir/file1 data:" | _filter_scratch
 od -A d -t x1 $SCRATCH_MNT/testdir/file1

 _unmount_flakey

 status=0
 exit
	#! /bin/bash
	# SPDX-License-Identifier: GPL-2.0
	# Copyright (C) 2021 SUSE Linux Products GmbH. All Rights Reserved.
	#
	# FSQA Test No. 239
	#
	# Test a particular scenario where we fsync a directory, then move one of its
	# children directories into another directory and then finally sync the log
	# trees by fsyncing any other inode. We want to check that after a power failure
	# we are able to mount the filesystem and that the moved directory exists only
	# as a child of the directory we moved it into.
	#
	. ./common/preamble
	_begin_fstest auto quick log

	# Override the default cleanup function.
	_cleanup()
	{
	_cleanup_flakey
	cd /
	rm -f $tmp.*
	}

	. ./common/filter
	. ./common/dmflakey

	_require_scratch
	_require_dm_target flakey

	# The test requires a very specific layout of keys and items in the fs/subvolume
	# btree to trigger a bug. So we want to make sure that on whatever platform we
	# are, we have the same leaf/node size.
	#
	# Currently in btrfs the node/leaf size can not be smaller than the page
	# size (but it can be greater than the page size). So use the largest
	# supported node/leaf size (64K).
	#
	_scratch_mkfs "-n 65536" >>$seqres.full 2>&1
	_require_metadata_journaling $SCRATCH_DEV
	_init_flakey
	_mount_flakey

	# "testdir" is inode 257.
	mkdir $SCRATCH_MNT/testdir
	chmod 755 $SCRATCH_MNT/testdir

	# Create several empty files to have the directory "testdir" with its items
	# spread over several leaves (7 in this case).
	for ((i = 1; i <= 1200; i++)); do
	echo -n > $SCRATCH_MNT/testdir/file$i
	done

	# Create our test directory "dira", inode number 1458, which gets all its items
	# in leaf 7.
	#
	# The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to the entry
	# named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY item that points to
	# that entry is in leaf 3.
	#
	# For this particular filesystem node size (64K), file count and file names, we
	# endup with the directory entry items from inode 257 in leaves 2 and 3, as
	# previously mentioned - what matters for triggering the bug exercised by this
	# test case is that those items are not placed in leaf 1, they must be placed in
	# a leaf different from the one containing the inode item for inode 257.
	#
	# The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for the
	# parent inode (257) are the following:
	#
	# item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34
	# location key (1458 INODE_ITEM 0) type DIR
	# transid 6 data_len 0 name_len 4
	# name: dira
	#
	# and:
	#
	# item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34
	# location key (1458 INODE_ITEM 0) type DIR
	# transid 6 data_len 0 name_len 4
	# name: dira
	#
	mkdir $SCRATCH_MNT/testdir/dira

	# This fsync is for the zoned mode. On the zoned mode, we use a dedicated block
	# group for tree-log. That block group is created on-demand or assigned to a
	# metadata block group if there is none. On the first mount of a file system, we
	# need to create one because there is only one metadata block group available
	# for the regular metadata. That creation of a new block group forces tree-log
	# to be a full commit on that transaction, which prevents logging "testdir" and
	# "dira" and screws up the result.
	#
	# Calling fsync here will create the dedicated block group, and let them be
	# logged.
	$XFS_IO_PROG -c "fsync" $SCRATCH_MNT

	# Make sure everything done so far is durably persisted.
	sync

	# Now do a change to inode 257 ("testdir") that does not result in COWing leaves
	# 2 and 3 - the leaves that contain the directory items pointing to inode 1458
	# (directory "dira").
	#
	# Changing permissions, the owner/group, updating or adding a xattr, etc, will
	# not change (COW) leaves 2 and 3. So for the sake of simplicity change the
	# permissions of inode 257, which results in updating its inode item and
	# therefore change (COW) only leaf 1.
	#
	chmod 700 $SCRATCH_MNT/testdir

	# Now fsync directory inode 257.
	#
	# Since only the first leaf was changed/COWed, we log the inode item of inode 257
	# and only the entries found in the first leaf, all with a key of type
	# BTRFS_DIR_ITEM_KEY, and no keys of type BTRFS_DIR_INDEX_KEY, because they sort
	# after the former type and none exist in the first leaf.
	#
	# We also log 3 items that represent ranges for dir items and dir indexes for
	# which the log is authoritative:
	#
	# 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
	# authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the
	# range [0, 2285968570] (the offset here is the crc32c of the dentry's
	# name). The value 2285968570 corresponds to the offset of the first key
	# of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY);
	#
	# 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
	# authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset in the
	# range [4293818216, (u64)-1] (the offset here is the crc32c of the dentry's
	# name). The value 4293818216 corresponds to the offset of the highest key
	# of type BTRFS_DIR_ITEM_KEY plus 1 (4293818215 + 1), which is located in
	# leaf 2;
	#
	# 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203, which
	# indicates the log is authoritative for all keys of type BTRFS_DIR_INDEX_KEY
	# that have an offset in the range [1203, (u64)-1]. The value 1203 corresponds
	# to the offset of the last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1),
	# which is located in leaf 3;
	#
	# Also, because "testdir" is a directory and inode 1458 ("dira") is a child
	# directory, we log inode 1458 too.
	#
	$XFS_IO_PROG -c "fsync" $SCRATCH_MNT/testdir

	# Now move "dira", inode 1458, to be a child of the root directory (inode 256).
	#
	# Because this inode was previously logged, when "testdir" was fsynced, the
	# log is updated so that the old inode reference, referring to inode 257 as
	# the parent, is deleted and the new inode reference, referring to inode 256
	# as the parent, is added to the log.
	#
	mv $SCRATCH_MNT/testdir/dira $SCRATCH_MNT/

	# Now change some file and fsync it. This guarantees the log changes made by
	# the previous move/rename operation are persisted. We do not need to do any
	# special modification to the file, just any change to any file and sync the
	# log.
	$XFS_IO_PROG -c "pwrite -S 0xab 0 64K" \
	-c "fsync" \
	$SCRATCH_MNT/testdir/file1 >>$seqres.full

	# Simulate a power failure and then mount again the filesystem to replay the log
	# tree. We want to verify that we are able to mount the filesystem, meaning log
	# replay was successful, and that directory inode 1458 ("dira") only has inode
	# 256 (the filesystem's root) as its parent (and no longer a child of inode 257).
	#
	# It used to happen that during log replay we would end up having inode 1458
	# (directory "dira") with 2 hard links, being a child of inode 257 ("testdir")
	# and inode 256 (the filesystem's root). This resulted in the tree checker
	# detecting the issue and causing the mount operation to fail (with -EIO).
	#
	# This happened because in the log we have the new name/parent for inode 1458,
	# which results in adding the new dentry with inode 256 as the parent, but the
	# previous dentry, under inode 257 was never removed - this is because the
	# ranges for dir items and dir indexes of inode 257 for which the log is
	# authoritative do not include the old dir item and dir index for the dentry
	# of inode 257 referring to inode 1458:
	#
	# - for dir items, the log is authoritative for the ranges [0, 2285968570] and
	# [4293818216, (u64)-1]. The dir item at inode 257 pointing to inode 1458 has
	# a key of (257 DIR_ITEM 3724298081), as previously mentioned, so the dir item
	# is not deleted when the log replay procedure processes the authoritative
	# ranges, as 3724298081 is outside both ranges;
	#
	# - for dir indexes, the log is authoritative for the range [1203, (u64)-1], and
	# the dir index item of inode 257 pointing to inode 1458 has a key of
	# (257 DIR_INDEX 1202), as previously mentioned, so the dir index item is not
	# deleted when the log replay procedure processes the authoritative range.
	#
	_flakey_drop_and_remount

	[ -d $SCRATCH_MNT/testdir/dira ] && echo "/testdir/dira still exists"
	[ -d $SCRATCH_MNT/dira ] \|\| echo "/dira does not exists"

	# While at it also check that the data we wrote was not lost, just for the sake
	# of completeness.
	echo "File $SCRATCH_MNT/testdir/file1 data:" \| _filter_scratch
	od -A d -t x1 $SCRATCH_MNT/testdir/file1

	_unmount_flakey

	status=0
	exit