| #! /bin/bash |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright (c) 2021 Oracle, Inc. All Rights Reserved. |
| # |
| # FS QA Test No. 648 |
| # |
| # Test nested log recovery with repeated (simulated) disk failures. We kick |
| # off fsstress on a loopback filesystem mounted on the scratch fs, then switch |
| # out the underlying scratch device with dm-error to see what happens when the |
| # disk goes down. Having taken down both fses in this manner, remount them and |
| # repeat. This test simulates VM hosts crashing to try to shake out CoW bugs |
| # in writeback on the host that cause VM guests to fail to recover. |
| # |
| . ./common/preamble |
| _begin_fstest shutdown auto log metadata eio recoveryloop |
| |
| _cleanup() |
| { |
| _kill_fsstress |
| if [ -n "$loopmnt" ]; then |
| _unmount $loopmnt 2>/dev/null |
| rm -r -f $loopmnt |
| fi |
| _dmerror_unmount |
| _dmerror_cleanup |
| cd / |
| rm -f $tmp.* |
| } |
| |
| # Import common functions. |
| . ./common/dmerror |
| . ./common/reflink |
| |
| # Modify as appropriate. |
| |
| _require_scratch_reflink |
| _require_cp_reflink |
| _require_dm_target error |
| _require_loop |
| |
| echo "Silence is golden." |
| |
| _scratch_mkfs >> $seqres.full 2>&1 |
| _require_metadata_journaling $SCRATCH_DEV |
| _dmerror_init |
| _dmerror_mount |
| |
| # Create a fs image consuming 1/3 of the scratch fs |
| scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT) |
| loopimg_bytes=$((scratch_freesp_bytes / 3)) |
| |
| loopimg=$SCRATCH_MNT/testfs |
| truncate -s $loopimg_bytes $loopimg |
| _mkfs_dev $loopimg |
| |
| loopmnt=$tmp.mount |
| mkdir -p $loopmnt |
| |
| scratch_aliveflag=$tmp.runsnap |
| snap_aliveflag=$tmp.snapping |
| |
| snap_loop_fs() { |
| touch "$snap_aliveflag" |
| while [ -e "$scratch_aliveflag" ]; do |
| rm -f $loopimg.a |
| _cp_reflink $loopimg $loopimg.a |
| sleep 1 |
| done |
| rm -f "$snap_aliveflag" |
| } |
| |
| while _soak_loop_running $((25 * TIME_FACTOR)); do |
| touch $scratch_aliveflag |
| snap_loop_fs >> $seqres.full 2>&1 & |
| |
| if ! _mount $loopimg $loopmnt -o loop; then |
| rm -f $scratch_aliveflag |
| _metadump_dev $loopimg $seqres.loop.$i.md |
| _fail "iteration $SOAK_LOOPIDX loopimg mount failed" |
| break |
| fi |
| |
| _run_fsstress_bg -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))" |
| |
| # purposely include 0 second sleeps to test shutdown immediately after |
| # recovery |
| sleep $((RANDOM % (3 * TIME_FACTOR) )) |
| rm -f $scratch_aliveflag |
| |
| # This test aims to simulate sudden disk failure, which means that we |
| # do not want to quiesce the filesystem or otherwise give it a chance |
| # to flush its logs. Therefore we want to call dmsetup with the |
| # --nolockfs parameter; to make this happen we must call the load |
| # error table helper *without* 'lockfs'. |
| _dmerror_load_error_table |
| |
| _kill_fsstress |
| for ((j = 0; j < 10; j++)); do |
| test -e "$snap_aliveflag" || break |
| sleep 1 |
| done |
| |
| # Mount again to replay log after loading working table, so we have a |
| # consistent fs after test. |
| _unmount $loopmnt |
| is_unmounted=1 |
| # We must unmount dmerror at here, or whole later testing will crash. |
| # So try to umount enough times, before we have no choice. |
| for ((j = 0; j < 100; j++)); do |
| sleep 1 |
| _dmerror_unmount > $tmp.unmount.err 2>&1 |
| if [ $? -eq 0 ];then |
| is_unmounted=0 |
| break |
| fi |
| done |
| if [ $is_unmounted -ne 0 ];then |
| cat $tmp.unmount.err |
| _fail "iteration $SOAK_LOOPIDX scratch unmount failed" |
| fi |
| _dmerror_load_working_table |
| if ! _dmerror_mount; then |
| _metadump_dev $DMERROR_DEV $seqres.scratch.$i.md |
| _fail "iteration $SOAK_LOOPIDX scratch mount failed" |
| fi |
| done |
| |
| # Make sure the fs image file is ok |
| if [ -f "$loopimg" ]; then |
| if _mount $loopimg $loopmnt -o loop; then |
| _unmount $loopmnt &> /dev/null |
| else |
| _metadump_dev $DMERROR_DEV $seqres.scratch.final.md |
| echo "final scratch mount failed" |
| fi |
| SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg |
| fi |
| |
| # success, all done; let the test harness check the scratch fs |
| status=0 |
| exit |