blob: 025e1b0fe7a6c95d6e28a7af4577b8eaf2627831 [file] [log] [blame]
#! /bin/bash
# FSQA Test No. 038
#
# This test was motivated by btrfs issues, but it's generic enough as it
# doesn't use any btrfs specific features.
#
# Stress btrfs' block group allocation and deallocation while running fstrim in
# parallel. Part of the goal is also to get data block groups deallocated so
# that new metadata block groups, using the same physical device space ranges,
# get allocated while fstrim is running. This caused several issues ranging
# from invalid memory accesses, kernel crashes, metadata or data corruption,
# free space cache inconsistencies, free space leaks and memory leaks.
#
# These issues were fixed by the following btrfs linux kernel patches:
#
# Btrfs: fix invalid block group rbtree access after bg is removed
# Btrfs: fix crash caused by block group removal
# Btrfs: fix freeing used extents after removing empty block group
# Btrfs: fix race between fs trimming and block group remove/allocation
# Btrfs: fix race between writing free space cache and trimming
# Btrfs: make btrfs_abort_transaction consider existence of new block groups
# Btrfs: fix memory leak after block remove + trimming
# Btrfs: fix fs mapping extent map leak
# Btrfs: fix unprotected deletion from pending_chunks list
#
# The issues were found on a qemu/kvm guest with 4 virtual CPUs, 4Gb of ram and
# scsi-hd devices with discard support enabled (that means hole punching in the
# disk's image file is performed by the host).
#
#-----------------------------------------------------------------------
#
# Copyright (C) 2014 SUSE Linux Products GmbH. All Rights Reserved.
# Author: Filipe Manana <fdmanana@suse.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#-----------------------------------------------------------------------
#
seq=`basename $0`
seqres=$RESULT_DIR/$seq
echo "QA output created by $seq"
tmp=/tmp/$$
status=1 # failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15
_cleanup()
{
rm -fr $tmp
}
# get standard environment, filters and checks
. ./common/rc
. ./common/filter
# real QA test starts here
_supported_fs generic
_supported_os Linux
_require_scratch
_require_xfs_io_command "falloc"
rm -f $seqres.full
# Keep allocating and deallocating 1G of data space with the goal of creating
# and deleting 1 block group constantly. The intention is to race with the
# fstrim loop below.
fallocate_loop()
{
# Wait for running subcommand before exitting so that
# mountpoint is not busy when we try to unmount it
trap "wait; exit" SIGTERM
local name=$1
while true; do
$XFS_IO_PROG -f -c "falloc -k 0 1G" \
$SCRATCH_MNT/$name &> /dev/null
sleep 3
$XFS_IO_PROG -c "truncate 0" \
$SCRATCH_MNT/$name &> /dev/null
sleep 3
done
}
trim_loop()
{
# Wait for running subcommand before exitting so that
# mountpoint is not busy when we try to unmount it
trap "wait; exit" SIGTERM
while true; do
$FSTRIM_PROG $SCRATCH_MNT
done
}
# Create a bunch of small files that get their single extent inlined in the
# btree, so that we consume a lot of metadata space and get a chance of a
# data block group getting deleted and reused for metadata later. Sometimes
# the creation of all these files succeeds other times we get ENOSPC failures
# at some point - this depends on how fast the btrfs' cleaner kthread is
# notified about empty block groups, how fast it deletes them and how fast
# the fallocate calls happen. So we don't really care if they all succeed or
# not, the goal is just to keep metadata space usage growing while data block
# groups are deleted.
#
# Creating 200,000 files sequentially is really slow, so speed it up a bit
# by doing it concurrently with 4 threads in 4 separate directories.
nr_files=$((50000 * LOAD_FACTOR))
create_files()
{
local prefix=$1
for ((n = 0; n < 4; n++)); do
mkdir $SCRATCH_MNT/$n
(
trap "wait; exit" SIGTERM
for ((i = 1; i <= $nr_files; i++)); do
$XFS_IO_PROG -f -c "pwrite -S 0xaa 0 3900" \
$SCRATCH_MNT/$n/"${prefix}_$i" &> /dev/null
if [ $? -ne 0 ]; then
echo "Failed creating file $n/${prefix}_$i" >>$seqres.full
break
fi
done
) &
create_pids[$n]=$!
done
wait ${create_pids[@]}
}
_scratch_mkfs >>$seqres.full 2>&1
_scratch_mount
_require_fs_space $SCRATCH_MNT $((10 * 1024 * 1024))
_require_batched_discard $SCRATCH_MNT
for ((i = 0; i < $((4 * $LOAD_FACTOR)); i++)); do
trim_loop &
trim_pids[$i]=$!
done
for ((i = 0; i < $((1 * $LOAD_FACTOR)); i++)); do
fallocate_loop "falloc_file_$i" &
fallocate_pids[$i]=$!
done
create_files "foobar"
kill ${fallocate_pids[@]}
kill ${trim_pids[@]}
wait
# The fstests framework will now check for fs consistency with fsck.
# The trimming was racy and caused some btree nodes to get full of zeroes on
# disk, which obviously caused fs metadata corruption. The race often lead
# to missing free space entries in a block group's free space cache too.
echo "Silence is golden"
status=0
exit