|  | #! /bin/bash | 
|  | # SPDX-License-Identifier: GPL-2.0 | 
|  | # Copyright (c) 2021 Oracle, Inc.  All Rights Reserved. | 
|  | # | 
|  | # FS QA Test No. 648 | 
|  | # | 
|  | # Test nested log recovery with repeated (simulated) disk failures.  We kick | 
|  | # off fsstress on a loopback filesystem mounted on the scratch fs, then switch | 
|  | # out the underlying scratch device with dm-error to see what happens when the | 
|  | # disk goes down.  Having taken down both fses in this manner, remount them and | 
|  | # repeat.  This test simulates VM hosts crashing to try to shake out CoW bugs | 
|  | # in writeback on the host that cause VM guests to fail to recover. | 
|  | # | 
|  | . ./common/preamble | 
|  | _begin_fstest shutdown auto log metadata eio recoveryloop | 
|  |  | 
|  | _cleanup() | 
|  | { | 
|  | cd / | 
|  | $KILLALL_PROG -9 fsstress > /dev/null 2>&1 | 
|  | wait | 
|  | if [ -n "$loopmnt" ]; then | 
|  | $UMOUNT_PROG $loopmnt 2>/dev/null | 
|  | rm -r -f $loopmnt | 
|  | fi | 
|  | rm -f $tmp.* | 
|  | _dmerror_unmount | 
|  | _dmerror_cleanup | 
|  | } | 
|  |  | 
|  | # Import common functions. | 
|  | . ./common/dmerror | 
|  | . ./common/reflink | 
|  |  | 
|  | # Modify as appropriate. | 
|  |  | 
|  | _require_scratch_reflink | 
|  | _require_cp_reflink | 
|  | _require_dm_target error | 
|  | _require_command "$KILLALL_PROG" "killall" | 
|  | _require_loop | 
|  |  | 
|  | echo "Silence is golden." | 
|  |  | 
|  | _scratch_mkfs >> $seqres.full 2>&1 | 
|  | _require_metadata_journaling $SCRATCH_DEV | 
|  | _dmerror_init | 
|  | _dmerror_mount | 
|  |  | 
|  | # Create a fs image consuming 1/3 of the scratch fs | 
|  | scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT) | 
|  | loopimg_bytes=$((scratch_freesp_bytes / 3)) | 
|  |  | 
|  | loopimg=$SCRATCH_MNT/testfs | 
|  | truncate -s $loopimg_bytes $loopimg | 
|  | _mkfs_dev $loopimg | 
|  |  | 
|  | loopmnt=$tmp.mount | 
|  | mkdir -p $loopmnt | 
|  |  | 
|  | scratch_aliveflag=$tmp.runsnap | 
|  | snap_aliveflag=$tmp.snapping | 
|  |  | 
|  | snap_loop_fs() { | 
|  | touch "$snap_aliveflag" | 
|  | while [ -e "$scratch_aliveflag" ]; do | 
|  | rm -f $loopimg.a | 
|  | _cp_reflink $loopimg $loopimg.a | 
|  | sleep 1 | 
|  | done | 
|  | rm -f "$snap_aliveflag" | 
|  | } | 
|  |  | 
|  | fsstress=($FSSTRESS_PROG $FSSTRESS_AVOID -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))") | 
|  |  | 
|  | while _soak_loop_running $((25 * TIME_FACTOR)); do | 
|  | touch $scratch_aliveflag | 
|  | snap_loop_fs >> $seqres.full 2>&1 & | 
|  |  | 
|  | if ! _mount $loopimg $loopmnt -o loop; then | 
|  | rm -f $scratch_aliveflag | 
|  | _metadump_dev $loopimg $seqres.loop.$i.md | 
|  | _fail "iteration $SOAK_LOOPIDX loopimg mount failed" | 
|  | break | 
|  | fi | 
|  |  | 
|  | ("${fsstress[@]}" >> $seqres.full &) > /dev/null 2>&1 | 
|  |  | 
|  | # purposely include 0 second sleeps to test shutdown immediately after | 
|  | # recovery | 
|  | sleep $((RANDOM % (3 * TIME_FACTOR) )) | 
|  | rm -f $scratch_aliveflag | 
|  |  | 
|  | # This test aims to simulate sudden disk failure, which means that we | 
|  | # do not want to quiesce the filesystem or otherwise give it a chance | 
|  | # to flush its logs.  Therefore we want to call dmsetup with the | 
|  | # --nolockfs parameter; to make this happen we must call the load | 
|  | # error table helper *without* 'lockfs'. | 
|  | _dmerror_load_error_table | 
|  |  | 
|  | ps -e | grep fsstress > /dev/null 2>&1 | 
|  | while [ $? -eq 0 ]; do | 
|  | $KILLALL_PROG -9 fsstress > /dev/null 2>&1 | 
|  | wait > /dev/null 2>&1 | 
|  | ps -e | grep fsstress > /dev/null 2>&1 | 
|  | done | 
|  | for ((j = 0; j < 10; j++)); do | 
|  | test -e "$snap_aliveflag" || break | 
|  | sleep 1 | 
|  | done | 
|  |  | 
|  | # Mount again to replay log after loading working table, so we have a | 
|  | # consistent fs after test. | 
|  | $UMOUNT_PROG $loopmnt | 
|  | is_unmounted=1 | 
|  | # We must unmount dmerror at here, or whole later testing will crash. | 
|  | # So try to umount enough times, before we have no choice. | 
|  | for ((j = 0; j < 100; j++)); do | 
|  | sleep 1 | 
|  | _dmerror_unmount > $tmp.unmount.err 2>&1 | 
|  | if [ $? -eq 0 ];then | 
|  | is_unmounted=0 | 
|  | break | 
|  | fi | 
|  | done | 
|  | if [ $is_unmounted -ne 0 ];then | 
|  | cat $tmp.unmount.err | 
|  | _fail "iteration $SOAK_LOOPIDX scratch unmount failed" | 
|  | fi | 
|  | _dmerror_load_working_table | 
|  | if ! _dmerror_mount; then | 
|  | _metadump_dev $DMERROR_DEV $seqres.scratch.$i.md | 
|  | _fail "iteration $SOAK_LOOPIDX scratch mount failed" | 
|  | fi | 
|  | done | 
|  |  | 
|  | # Make sure the fs image file is ok | 
|  | if [ -f "$loopimg" ]; then | 
|  | if _mount $loopimg $loopmnt -o loop; then | 
|  | $UMOUNT_PROG $loopmnt &> /dev/null | 
|  | else | 
|  | _metadump_dev $DMERROR_DEV $seqres.scratch.final.md | 
|  | echo "final scratch mount failed" | 
|  | fi | 
|  | SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg | 
|  | fi | 
|  |  | 
|  | # success, all done; let the test harness check the scratch fs | 
|  | status=0 | 
|  | exit |