tests/generic/019 - pub/scm/linux/kernel/git/brauner/xfstests-dev - Git at Google

 #! /bin/bash
 # SPDX-License-Identifier: GPL-2.0

 #
 # FSQA Test No. generic/019
 #
 # Run fsstress and fio(dio/aio and mmap) and simulate disk failure
 # check filesystem consistency at the end.
 #
 . ./common/preamble
 _begin_fstest aio dangerous enospc rw stress recoveryloop

 fio_config=$tmp.fio

 # Import common functions.
 . ./common/filter
 _supported_fs generic
 _require_scratch
 _require_block_device $SCRATCH_DEV
 _require_fail_make_request

 SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`

 allow_fail_make_request()
 {
     echo "Allow global fail_make_request feature"
     echo 100 > $DEBUGFS_MNT/fail_make_request/probability
     echo 9999999 > $DEBUGFS_MNT/fail_make_request/times
     echo 0 >  /sys/kernel/debug/fail_make_request/verbose
 }

 disallow_fail_make_request()
 {
     echo "Disallow global fail_make_request feature"
     echo 0 > $DEBUGFS_MNT/fail_make_request/probability
     echo 0 > $DEBUGFS_MNT/fail_make_request/times
 }

 start_fail_scratch_dev()
 {
     echo "Force SCRATCH_DEV device failure"
     echo " echo 1 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
     echo 1 > $SYSFS_BDEV/make-it-fail
 }

 stop_fail_scratch_dev()
 {
     echo "Make SCRATCH_DEV device operable again"
     echo " echo 0 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
     echo 0 > $SYSFS_BDEV/make-it-fail
 }

 # Override the default cleanup function.
 _cleanup()
 {
 	kill $fs_pid $fio_pid &> /dev/null
 	disallow_fail_make_request
 	cd /
 	rm -r -f $tmp.*
 }

 RUN_TIME=$((20+10*$TIME_FACTOR))
 NUM_JOBS=$((4*LOAD_FACTOR))
 BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
 FILE_SIZE=$((BLK_DEV_SIZE * 512))

 # Don't fail the test just because fio or fsstress dump cores
 ulimit -c 0

 cat >$fio_config <<EOF
 ###########
 # $seq test's fio activity
 # Filenames derived from jobsname and jobid like follows:
 # ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
 [global]
 ioengine=libaio
 bs=4k
 directory=${SCRATCH_MNT}
 filesize=${FILE_SIZE}
 size=9999T
 continue_on_error=write
 ignore_error=EIO,ENOSPC:EIO
 error_dump=0

 [stress_dio_aio_activity]
 create_on_open=1
 fallocate=none
 iodepth=128*${LOAD_FACTOR}
 direct=1
 buffered=0
 numjobs=${NUM_JOBS}
 rw=randwrite
 runtime=40+${RUN_TIME}
 time_based

 [stress_mmap_activity]
 ioengine=mmap
 create_on_open=0
 fallocate=1
 fdatasync=40960
 filesize=8M
 size=9999T
 numjobs=${NUM_JOBS}
 rw=randwrite
 runtime=40+${RUN_TIME}
 time_based

 EOF

 _require_fio $fio_config

 # Disable all sync operations to get higher load
 FSSTRESS_AVOID="$FSSTRESS_AVOID -ffsync=0 -fsync=0 -ffdatasync=0 -f setattr=1"

 _workout()
 {
 	out=$SCRATCH_MNT/fsstress.$$
 	args=`_scale_fsstress_args -p 1 -n999999999 -f setattr=0 $FSSTRESS_AVOID -d $out`
 	echo ""
 	echo "Start fsstress.."
 	echo ""
 	echo "fsstress $args" >> $seqres.full
 	$FSSTRESS_PROG $args > /dev/null 2>&1 &
 	fs_pid=$!
 	echo "Start fio.."
 	cat $fio_config >>  $seqres.full
 	$FIO_PROG $fio_config >> $seqres.full 2>&1 &
 	fio_pid=$!

 	# Let's it work for awhile, and force device failure
 	sleep $RUN_TIME
 	start_fail_scratch_dev
 	# After device turns in to failed state filesystem may yet not know about
 	# that so buffered write(2) may succeed, but any integrity operations
 	# such as (sync, fsync, fdatasync, direct-io) should fail.
 	dd if=/dev/zero of=$SCRATCH_MNT/touch_failed_filesystem count=1 bs=4k conv=fsync \
 	    >> $seqres.full 2>&1 && \
 	    _fail "failed: still able to perform integrity fsync on $SCRATCH_MNT"

 	kill $fs_pid &> /dev/null
 	wait $fs_pid
 	wait $fio_pid
 	unset fs_pid
 	unset fio_pid

 	# We expect that broken FS still can be umounted
 	run_check _scratch_unmount
 	# Once filesystem was umounted no one is able to write to block device
 	# It is now safe to bring device back to normal state
 	stop_fail_scratch_dev

 	# In order to check that filesystem is able to recover journal on mount(2)
 	# perform mount/umount, after that all errors should be fixed
 	_scratch_mount
 	run_check _scratch_unmount
 }

 # real QA test starts here

 _scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed"
 _scratch_mount
 allow_fail_make_request
 _workout
 status=$?
 exit
	#! /bin/bash
	# SPDX-License-Identifier: GPL-2.0

	#
	# FSQA Test No. generic/019
	#
	# Run fsstress and fio(dio/aio and mmap) and simulate disk failure
	# check filesystem consistency at the end.
	#
	. ./common/preamble
	_begin_fstest aio dangerous enospc rw stress recoveryloop

	fio_config=$tmp.fio

	# Import common functions.
	. ./common/filter
	_supported_fs generic
	_require_scratch
	_require_block_device $SCRATCH_DEV
	_require_fail_make_request

	SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`

	allow_fail_make_request()
	{
	echo "Allow global fail_make_request feature"
	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
	echo 9999999 > $DEBUGFS_MNT/fail_make_request/times
	echo 0 > /sys/kernel/debug/fail_make_request/verbose
	}

	disallow_fail_make_request()
	{
	echo "Disallow global fail_make_request feature"
	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
	echo 0 > $DEBUGFS_MNT/fail_make_request/times
	}

	start_fail_scratch_dev()
	{
	echo "Force SCRATCH_DEV device failure"
	echo " echo 1 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
	echo 1 > $SYSFS_BDEV/make-it-fail
	}

	stop_fail_scratch_dev()
	{
	echo "Make SCRATCH_DEV device operable again"
	echo " echo 0 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
	echo 0 > $SYSFS_BDEV/make-it-fail
	}

	# Override the default cleanup function.
	_cleanup()
	{
	kill $fs_pid $fio_pid &> /dev/null
	disallow_fail_make_request
	cd /
	rm -r -f $tmp.*
	}

	RUN_TIME=$((20+10*$TIME_FACTOR))
	NUM_JOBS=$((4*LOAD_FACTOR))
	BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
	FILE_SIZE=$((BLK_DEV_SIZE * 512))

	# Don't fail the test just because fio or fsstress dump cores
	ulimit -c 0

	cat >$fio_config <<EOF
	###########
	# $seq test's fio activity
	# Filenames derived from jobsname and jobid like follows:
	# ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
	[global]
	ioengine=libaio
	bs=4k
	directory=${SCRATCH_MNT}
	filesize=${FILE_SIZE}
	size=9999T
	continue_on_error=write
	ignore_error=EIO,ENOSPC:EIO
	error_dump=0

	[stress_dio_aio_activity]
	create_on_open=1
	fallocate=none
	iodepth=128*${LOAD_FACTOR}
	direct=1
	buffered=0
	numjobs=${NUM_JOBS}
	rw=randwrite
	runtime=40+${RUN_TIME}
	time_based

	[stress_mmap_activity]
	ioengine=mmap
	create_on_open=0
	fallocate=1
	fdatasync=40960
	filesize=8M
	size=9999T
	numjobs=${NUM_JOBS}
	rw=randwrite
	runtime=40+${RUN_TIME}
	time_based

	EOF

	_require_fio $fio_config

	# Disable all sync operations to get higher load
	FSSTRESS_AVOID="$FSSTRESS_AVOID -ffsync=0 -fsync=0 -ffdatasync=0 -f setattr=1"

	_workout()
	{
	out=$SCRATCH_MNT/fsstress.$$
	args=`_scale_fsstress_args -p 1 -n999999999 -f setattr=0 $FSSTRESS_AVOID -d $out`
	echo ""
	echo "Start fsstress.."
	echo ""
	echo "fsstress $args" >> $seqres.full
	$FSSTRESS_PROG $args > /dev/null 2>&1 &
	fs_pid=$!
	echo "Start fio.."
	cat $fio_config >> $seqres.full
	$FIO_PROG $fio_config >> $seqres.full 2>&1 &
	fio_pid=$!

	# Let's it work for awhile, and force device failure
	sleep $RUN_TIME
	start_fail_scratch_dev
	# After device turns in to failed state filesystem may yet not know about
	# that so buffered write(2) may succeed, but any integrity operations
	# such as (sync, fsync, fdatasync, direct-io) should fail.
	dd if=/dev/zero of=$SCRATCH_MNT/touch_failed_filesystem count=1 bs=4k conv=fsync \
	>> $seqres.full 2>&1 && \
	_fail "failed: still able to perform integrity fsync on $SCRATCH_MNT"

	kill $fs_pid &> /dev/null
	wait $fs_pid
	wait $fio_pid
	unset fs_pid
	unset fio_pid

	# We expect that broken FS still can be umounted
	run_check _scratch_unmount
	# Once filesystem was umounted no one is able to write to block device
	# It is now safe to bring device back to normal state
	stop_fail_scratch_dev

	# In order to check that filesystem is able to recover journal on mount(2)
	# perform mount/umount, after that all errors should be fixed
	_scratch_mount
	run_check _scratch_unmount
	}

	# real QA test starts here

	_scratch_mkfs >> $seqres.full 2>&1 \|\| _fail "mkfs failed"
	_scratch_mount
	allow_fail_make_request
	_workout
	status=$?
	exit