| #! /bin/bash |
| # FSQA Test No. 038 |
| # |
| # This test was motivated by btrfs issues, but it's generic enough as it |
| # doesn't use any btrfs specific features. |
| # |
| # Stress btrfs' block group allocation and deallocation while running fstrim in |
| # parallel. Part of the goal is also to get data block groups deallocated so |
| # that new metadata block groups, using the same physical device space ranges, |
| # get allocated while fstrim is running. This caused several issues ranging |
| # from invalid memory accesses, kernel crashes, metadata or data corruption, |
| # free space cache inconsistencies, free space leaks and memory leaks. |
| # |
| # These issues were fixed by the following btrfs linux kernel patches: |
| # |
| # Btrfs: fix invalid block group rbtree access after bg is removed |
| # Btrfs: fix crash caused by block group removal |
| # Btrfs: fix freeing used extents after removing empty block group |
| # Btrfs: fix race between fs trimming and block group remove/allocation |
| # Btrfs: fix race between writing free space cache and trimming |
| # Btrfs: make btrfs_abort_transaction consider existence of new block groups |
| # Btrfs: fix memory leak after block remove + trimming |
| # Btrfs: fix fs mapping extent map leak |
| # Btrfs: fix unprotected deletion from pending_chunks list |
| # |
| # The issues were found on a qemu/kvm guest with 4 virtual CPUs, 4Gb of ram and |
| # scsi-hd devices with discard support enabled (that means hole punching in the |
| # disk's image file is performed by the host). |
| # |
| #----------------------------------------------------------------------- |
| # |
| # Copyright (C) 2014 SUSE Linux Products GmbH. All Rights Reserved. |
| # Author: Filipe Manana <fdmanana@suse.com> |
| # |
| # This program is free software; you can redistribute it and/or |
| # modify it under the terms of the GNU General Public License as |
| # published by the Free Software Foundation. |
| # |
| # This program is distributed in the hope that it would be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; if not, write the Free Software Foundation, |
| # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| #----------------------------------------------------------------------- |
| # |
| |
| seq=`basename $0` |
| seqres=$RESULT_DIR/$seq |
| echo "QA output created by $seq" |
| |
| tmp=/tmp/$$ |
| status=1 # failure is the default! |
| trap "_cleanup; exit \$status" 0 1 2 3 15 |
| |
| _cleanup() |
| { |
| rm -fr $tmp |
| } |
| |
| # get standard environment, filters and checks |
| . ./common/rc |
| . ./common/filter |
| |
| # real QA test starts here |
| _supported_fs generic |
| _supported_os Linux |
| _require_scratch |
| _require_xfs_io_command "falloc" |
| |
| rm -f $seqres.full |
| |
| # Keep allocating and deallocating 1G of data space with the goal of creating |
| # and deleting 1 block group constantly. The intention is to race with the |
| # fstrim loop below. |
| fallocate_loop() |
| { |
| # Wait for running subcommand before exitting so that |
| # mountpoint is not busy when we try to unmount it |
| trap "wait; exit" SIGTERM |
| |
| local name=$1 |
| while true; do |
| $XFS_IO_PROG -f -c "falloc -k 0 1G" \ |
| $SCRATCH_MNT/$name &> /dev/null |
| sleep 3 |
| $XFS_IO_PROG -c "truncate 0" \ |
| $SCRATCH_MNT/$name &> /dev/null |
| sleep 3 |
| done |
| } |
| |
| trim_loop() |
| { |
| # Wait for running subcommand before exitting so that |
| # mountpoint is not busy when we try to unmount it |
| trap "wait; exit" SIGTERM |
| |
| while true; do |
| $FSTRIM_PROG $SCRATCH_MNT |
| done |
| } |
| |
| # Create a bunch of small files that get their single extent inlined in the |
| # btree, so that we consume a lot of metadata space and get a chance of a |
| # data block group getting deleted and reused for metadata later. Sometimes |
| # the creation of all these files succeeds other times we get ENOSPC failures |
| # at some point - this depends on how fast the btrfs' cleaner kthread is |
| # notified about empty block groups, how fast it deletes them and how fast |
| # the fallocate calls happen. So we don't really care if they all succeed or |
| # not, the goal is just to keep metadata space usage growing while data block |
| # groups are deleted. |
| # |
| # Creating 200,000 files sequentially is really slow, so speed it up a bit |
| # by doing it concurrently with 4 threads in 4 separate directories. |
| nr_files=$((50000 * LOAD_FACTOR)) |
| create_files() |
| { |
| local prefix=$1 |
| |
| for ((n = 0; n < 4; n++)); do |
| mkdir $SCRATCH_MNT/$n |
| ( |
| trap "wait; exit" SIGTERM |
| |
| for ((i = 1; i <= $nr_files; i++)); do |
| $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 3900" \ |
| $SCRATCH_MNT/$n/"${prefix}_$i" &> /dev/null |
| if [ $? -ne 0 ]; then |
| echo "Failed creating file $n/${prefix}_$i" >>$seqres.full |
| break |
| fi |
| done |
| ) & |
| create_pids[$n]=$! |
| done |
| |
| wait ${create_pids[@]} |
| |
| } |
| |
| _scratch_mkfs >>$seqres.full 2>&1 |
| _scratch_mount |
| _require_fs_space $SCRATCH_MNT $((10 * 1024 * 1024)) |
| _require_batched_discard $SCRATCH_MNT |
| |
| for ((i = 0; i < $((4 * $LOAD_FACTOR)); i++)); do |
| trim_loop & |
| trim_pids[$i]=$! |
| done |
| |
| for ((i = 0; i < $((1 * $LOAD_FACTOR)); i++)); do |
| fallocate_loop "falloc_file_$i" & |
| fallocate_pids[$i]=$! |
| done |
| |
| create_files "foobar" |
| |
| kill ${fallocate_pids[@]} |
| kill ${trim_pids[@]} |
| wait |
| |
| # The fstests framework will now check for fs consistency with fsck. |
| # The trimming was racy and caused some btree nodes to get full of zeroes on |
| # disk, which obviously caused fs metadata corruption. The race often lead |
| # to missing free space entries in a block group's free space cache too. |
| |
| echo "Silence is golden" |
| status=0 |
| exit |