blob: 1105253cf109ad941e2725f9a62ee64e336293cf [file] [log] [blame]
#! /bin/bash
# FS QA Test 157
#
# The test case is to reproduce a bug in raid6 reconstruction process that
# would end up with read failure if there is data corruption on two disks in
# the same horizontal stripe, e.g. due to bitrot.
#
# The bug happens when
# a) all disks are good to read,
# b) there is corrupted data on two disks in the same horizontal stripe due to
# something like bitrot,
# c) when rebuilding data after crc fails, btrfs is not able to tell whether
# other copies are good or corrupted because btrfs doesn't have crc for
# unallocated blocks.
#
# The kernel fixes are
# Btrfs: do not merge rbios if their fail stripe index are not identical
# Btrfs: make raid6 rebuild retry more
#
#-----------------------------------------------------------------------
# Copyright (c) 2017 Oracle. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#-----------------------------------------------------------------------
#
seq=`basename $0`
seqres=$RESULT_DIR/$seq
echo "QA output created by $seq"
here=`pwd`
tmp=/tmp/$$
status=1 # failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15
_cleanup()
{
cd /
rm -f $tmp.*
}
# get standard environment, filters and checks
. ./common/rc
. ./common/filter
# remove previous $seqres.full before test
rm -f $seqres.full
# real QA test starts here
# Modify as appropriate.
_supported_fs btrfs
_supported_os Linux
_require_scratch_dev_pool 4
_require_btrfs_command inspect-internal dump-tree
get_physical_stripe0()
{
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep " DATA\|RAID6" -A 10 | \
$AWK_PROG '($1 ~ /stripe/ && $3 ~ /devid/ && $2 ~ /0/) { print $6 }'
}
get_physical_stripe1()
{
$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
grep " DATA\|RAID6" -A 10 | \
$AWK_PROG '($1 ~ /stripe/ && $3 ~ /devid/ && $2 ~ /1/) { print $6 }'
}
_scratch_dev_pool_get 4
# step 1: create a raid6 btrfs and create a 4K file
echo "step 1......mkfs.btrfs" >>$seqres.full
mkfs_opts="-d raid6 -b 1G"
_scratch_pool_mkfs $mkfs_opts >>$seqres.full 2>&1
# -o nospace_cache makes sure data is written to the start position of the data
# chunk
_scratch_mount -o nospace_cache
# [0,64K) is written to stripe 0 and [64K, 128K) is written to stripe 1
$XFS_IO_PROG -f -d -c "pwrite -S 0xaa 0 128K" -c "fsync" \
"$SCRATCH_MNT/foobar" | _filter_xfs_io
_scratch_unmount
stripe_0=`get_physical_stripe0`
stripe_1=`get_physical_stripe1`
dev4=`echo $SCRATCH_DEV_POOL | awk '{print $4}'`
dev3=`echo $SCRATCH_DEV_POOL | awk '{print $3}'`
# step 2: corrupt the 1st and 2nd stripe (stripe 0 and 1)
echo "step 2......simulate bitrot at offset $stripe_0 of device_4($dev4) and offset $stripe_1 of device_3($dev3)" >>$seqres.full
$XFS_IO_PROG -f -d -c "pwrite -S 0xbb $stripe_0 64K" $dev4 | _filter_xfs_io
$XFS_IO_PROG -f -d -c "pwrite -S 0xbb $stripe_1 64K" $dev3 | _filter_xfs_io
# step 3: read foobar to repair the bitrot
echo "step 3......repair the bitrot" >> $seqres.full
_scratch_mount -o nospace_cache
# read the 2nd stripe, i.e. [64K, 128K), to trigger repair
od -x -j 64K $SCRATCH_MNT/foobar
_scratch_dev_pool_put
# success, all done
status=0
exit