Btrfs: add a recovery_errors mount option to limp along with a corrupted FS
When btrfs isn't able to read a tree root, it generally aborts the
mount. This patch adds mount -o recovery_errors to force it to
try and continue along.
Right now it only deals with corrupt csum roots, but it will also
gain support for extent allocation tree roots as well. In this mode,
the FS is forced readonly and we ignore all csums.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a..c2b0c98 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -113,7 +113,8 @@
u32 csum;
u32 *cb_sum = &cb->sums;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ btrfs_test_opt(root, NODATASUM))
return 0;
for (i = 0; i < cb->nr_pages; i++) {
@@ -577,8 +578,13 @@
u64 em_start;
struct extent_map *em;
int ret = -ENOMEM;
+ int skip_sum = 0;
u32 *sums;
+ if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ btrfs_test_opt(root, NODATASUM))
+ skip_sum = 1;
+
tree = &BTRFS_I(inode)->io_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
@@ -670,7 +676,7 @@
*/
atomic_inc(&cb->pending_bios);
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ if (!skip_sum) {
ret = btrfs_lookup_bio_sums(root, inode,
comp_bio, sums);
BUG_ON(ret);
@@ -698,7 +704,7 @@
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ if (!skip_sum) {
ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
BUG_ON(ret);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9ba59f..660364d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1410,6 +1410,7 @@
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
#define BTRFS_MOUNT_RECOVERY (1 << 18)
+#define BTRFS_MOUNT_RECOVERY_ERR (1 << 19)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b6a5c0d..5b9d380 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1773,11 +1773,14 @@
btrfs_set_backup_dev_root_level(root_backup,
btrfs_header_level(info->dev_root->node));
- btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start);
- btrfs_set_backup_csum_root_gen(root_backup,
+ if (info->csum_root->node) {
+ btrfs_set_backup_csum_root(root_backup,
+ info->csum_root->node->start);
+ btrfs_set_backup_csum_root_gen(root_backup,
btrfs_header_generation(info->csum_root->node));
- btrfs_set_backup_csum_root_level(root_backup,
+ btrfs_set_backup_csum_root_level(root_backup,
btrfs_header_level(info->csum_root->node));
+ }
btrfs_set_backup_total_bytes(root_backup,
btrfs_super_total_bytes(info->super_copy));
@@ -2306,8 +2309,25 @@
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_CSUM_TREE_OBJECTID, csum_root);
- if (ret)
- goto recovery_tree_root;
+ if (ret) {
+ if (!btrfs_test_opt(tree_root, RECOVERY_ERR))
+ goto recovery_tree_root;
+
+ /* don't use the log in recovery mode, it won't be valid */
+ btrfs_set_super_log_root(disk_super, 0);
+
+ /* we can't trust the free space cache either */
+ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
+
+ /* we must set ourselves readonly */
+ sb->s_flags |= MS_RDONLY;
+
+ /* the crc tree is dead, don't use it */
+ btrfs_set_opt(fs_info->mount_opt, NODATASUM);
+
+ printk("btrfs failed to read the csum root, forcing readonly\n");
+ ret = 0;
+ }
csum_root->track_dirty = 1;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c7fb3a4..1ae703b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -167,6 +167,9 @@
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ if (!root->fs_info->csum_root->node)
+ return -EIO;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -550,6 +553,8 @@
int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
root = root->fs_info->csum_root;
+ if (!root->node)
+ return -EIO;
path = btrfs_alloc_path();
if (!path)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e16215f..c182bf4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1479,6 +1479,10 @@
int ret = 0;
int skip_sum;
+ /*
+ * even if we're mounted nodatasum, once the inode has sums we have
+ * to keep summing it for writes.
+ */
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (btrfs_is_free_space_inode(root, inode))
@@ -1488,10 +1492,11 @@
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
+ /* for reads, we can skip the crc if we're mounted NODATASUM */
if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
mirror_num, bio_flags);
- } else if (!skip_sum) {
+ } else if (!skip_sum && !btrfs_test_opt(root, NODATASUM)) {
ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
if (ret)
return ret;
@@ -1849,6 +1854,9 @@
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
goto good;
+ if (btrfs_test_opt(root, NODATASUM))
+ goto good;
+
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
@@ -5580,10 +5588,15 @@
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start;
u32 *private = dip->csums;
+ int check_sums = 1;
+
+ if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ btrfs_test_opt(root, NODATASUM))
+ check_sums = 0;
start = dip->logical_offset;
do {
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ if (check_sums) {
struct page *page = bvec->bv_page;
char *kaddr;
u32 csum = ~(u32)0;
@@ -5949,6 +5962,8 @@
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ if (!write)
+ skip_sum |= btrfs_test_opt(root, NODATASUM);
dip = kmalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8bd9d6d..a95ebb9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -164,7 +164,8 @@
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
- Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
+ Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_recovery_errors,
+ Opt_err,
};
static match_table_t tokens = {
@@ -199,6 +200,7 @@
{Opt_inode_cache, "inode_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_recovery, "recovery"},
+ {Opt_recovery_errors, "recovery_errors"},
{Opt_err, NULL},
};
@@ -397,6 +399,10 @@
printk(KERN_INFO "btrfs: enabling auto recovery");
btrfs_set_opt(info->mount_opt, RECOVERY);
break;
+ case Opt_recovery_errors:
+ printk(KERN_INFO "btrfs: enabling recovery failure mode");
+ btrfs_set_opt(info->mount_opt, RECOVERY_ERR);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);