| #! /bin/bash |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright (C) 2019 SUSE Linux Products GmbH. All Rights Reserved. |
| # |
| # FSQA Test No. 187 |
| # |
| # Stress send running in parallel with balance and deduplication against files |
| # that belong to the snapshots used by send. The goal is to verify that these |
| # operations running in parallel do not lead to send crashing (trigger assertion |
| # failures and BUG_ONs), or send finding an inconsistent snapshot that leads to |
| # a failure (reported in dmesg/syslog). The test needs big trees (snapshots) |
| # with large differences between the parent and send snapshots in order to hit |
| # such issues with a good probability. |
| # |
| . ./common/preamble |
| _begin_fstest auto send dedupe clone balance |
| |
| . ./common/attr |
| . ./common/filter |
| . ./common/reflink |
| |
| _require_scratch_dedupe |
| _require_attrs |
| |
| # We at least need 8GB of free space on $SCRATCH_DEV |
| _require_scratch_size $((8 * 1024 * 1024)) |
| |
| _scratch_mkfs >>$seqres.full 2>&1 |
| _scratch_mount |
| |
| dedupe_two_files() |
| { |
| trap "wait; exit" SIGTERM |
| |
| local f1=$(find $SCRATCH_MNT/snap1 -type f | shuf -n 1) |
| local f2=$(find $SCRATCH_MNT/snap2 -type f | shuf -n 1) |
| |
| if (( RANDOM % 2 )); then |
| local tmp=$f1 |
| f1=$f2 |
| f2=$tmp |
| fi |
| |
| # Ignore errors from dedupe. We just want to test for crashes and |
| # deadlocks. |
| $XFS_IO_PROG -r -c "dedupe $f1 0 0 64K" $f2 &> /dev/null |
| } |
| |
| dedupe_files_loop() |
| { |
| local stop=0 |
| |
| # Avoid executing 'wait' inside the trap, because when we receive |
| # SIGTERM we might be already executing the wait command in the while |
| # loop below. When that is the case, bash 5.0+ with debug enabled prints |
| # a warning message that makes the test fail due to a mismatch with the |
| # golden output. That warning message is the following: |
| # |
| # warning: wait_for: recursively setting old_sigint_handler to wait_sigint_handler: running_trap = 16 |
| # |
| trap "stop=1" SIGTERM |
| |
| while [ $stop -eq 0 ]; do |
| for ((i = 1; i <= 5; i++)); do |
| dedupe_two_files & |
| done |
| wait |
| done |
| } |
| |
| balance_loop() |
| { |
| trap "wait; exit" SIGTERM |
| |
| while true; do |
| # Balance only metadata block groups, since this is makes it |
| # easier to hit problems (crashes and errors) in send. |
| # Ignore errors from balance. We just want to test for crashes |
| # and deadlocks. |
| _run_btrfs_balance_start -f -m $SCRATCH_MNT &> /dev/null |
| sleep $((RANDOM % 3)) |
| done |
| } |
| |
| full_send_loop() |
| { |
| trap "wait; exit" SIGTERM |
| |
| local count=$1 |
| |
| for ((i = 1; i <= $count; i++)); do |
| # Ignore errors from send. We will check for errors later in |
| # dmesg/syslog. |
| $BTRFS_UTIL_PROG send -f /dev/null \ |
| $SCRATCH_MNT/snap1 &> /dev/null |
| sleep $((RANDOM % 3)) |
| done |
| } |
| |
| inc_send_loop() |
| { |
| trap "wait; exit" SIGTERM |
| |
| local count=$1 |
| |
| for ((i = 1; i <= $count; i++)); do |
| # Ignore errors from send. We will check for errors later in |
| # dmesg/syslog. |
| $BTRFS_UTIL_PROG send -f /dev/null \ |
| -p $SCRATCH_MNT/snap1 $SCRATCH_MNT/snap2 &> /dev/null |
| sleep $((RANDOM % 3)) |
| done |
| } |
| |
| write_files_loop() |
| { |
| local count=$1 |
| local offset=$2 |
| |
| for ((i = 1; i <= $count; i++)); do |
| $XFS_IO_PROG -f -c "pwrite -S 0xea 0 64K" \ |
| $SCRATCH_MNT/file_$((i + offset)) >/dev/null |
| done |
| } |
| |
| set_xattrs_loop() |
| { |
| local count=$1 |
| local offset=$2 |
| |
| for ((i = 1; i <= $count; i++)); do |
| $SETFATTR_PROG -n 'user.x1' -v $xattr_value \ |
| $SCRATCH_MNT/file_$((i + offset)) |
| done |
| } |
| |
| # Number of files created before first snapshot. Must be divisable by 4. |
| nr_initial_files=40000 |
| # Number of files created after the first snapshot. Must be divisable by 4. |
| nr_more_files=40000 |
| |
| # Create initial files. |
| step=$((nr_initial_files / 4)) |
| for ((n = 0; n < 4; n++)); do |
| offset=$((step * $n)) |
| write_files_loop $step $offset & |
| create_pids[$n]=$! |
| done |
| wait ${create_pids[@]} |
| |
| _btrfs subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/snap1 |
| |
| # Add some more files, so that that are substantial differences between the |
| # two test snapshots used for an incremental send later. |
| |
| # Create more files. |
| step=$((nr_more_files / 4)) |
| for ((n = 0; n < 4; n++)); do |
| offset=$((nr_initial_files + step * $n)) |
| write_files_loop $step $offset & |
| create_pids[$n]=$! |
| done |
| wait ${create_pids[@]} |
| |
| # Add some xattrs to all files, so that every leaf and node of the fs tree is |
| # COWed. Adding more files does only adds leafs and nodes to the tree's right |
| # side, since inode numbers are based on a counter and form the first part |
| # (objectid) of btree keys (we only modifying the right most leaf of the tree). |
| # Use large values for the xattrs to quickly increase the height of the tree. |
| xattr_value=$(printf '%0.sX' $(seq 1 3800)) |
| |
| # Split the work into 4 workers working on consecutive ranges to avoid contention |
| # on the same leafs as much as possible. |
| step=$(((nr_more_files + nr_initial_files) / 4)) |
| for ((n = 0; n < 4; n++)); do |
| offset=$((step * $n)) |
| set_xattrs_loop $step $offset & |
| setxattr_pids[$n]=$! |
| done |
| wait ${setxattr_pids[@]} |
| |
| _btrfs subvolume snapshot -r $SCRATCH_MNT $SCRATCH_MNT/snap2 |
| |
| full_send_loop 5 & |
| full_send_pid=$! |
| |
| inc_send_loop 10 & |
| inc_send_pid=$! |
| |
| dedupe_files_loop & |
| dedupe_pid=$! |
| |
| balance_loop & |
| balance_pid=$! |
| |
| wait $full_send_pid |
| wait $inc_send_pid |
| |
| kill $dedupe_pid |
| wait $dedupe_pid |
| |
| kill $balance_pid |
| wait $balance_pid |
| |
| # Check for errors messages that happen due to inconsistent snapshot caused by |
| # deduplication and balance running in parallel with send, causing btree nodes |
| # and leafs to disappear and getting reused while send is using them. |
| # |
| # Example messages: |
| # |
| # BTRFS error (device sdc): did not find backref in send_root. inode=63292, \ |
| # offset=0, disk_byte=5228134400 found extent=5228134400 |
| # |
| # BTRFS error (device sdc): parent transid verify failed on 32243712 wanted 24 \ |
| # found 27 |
| # |
| _dmesg_since_test_start | grep -E -e '\bBTRFS error \(device .*?\):' |
| |
| status=0 |
| exit |