blob: 9aaf38147e7bcbc94e48e7df0e0b079f3986e159 [file] [log] [blame]
From 761543eb4d86d1ce4ce7e6200fbb15972da5147d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 15 Feb 2020 16:40:37 -0500
Subject: [PATCH] ext4: fix potential race between online resizing and write
operations
commit 1d0c3924a92e69bfa91163bda83c12a994b4d106 upstream.
During an online resize an array of pointers to buffer heads gets
replaced so it can get enlarged. If there is a racing block
allocation or deallocation which uses the old array, and the old array
has gotten reused this can lead to a GPF or some other random kernel
memory getting modified.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443
Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu
Reported-by: Suraj Jitindar Singh <surajjs@amazon.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e5d6ee61ff48..f9645de9d04c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh_p;
if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@@ -280,7 +281,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- if (!sbi->s_group_desc[group_desc]) {
+ bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
+ /*
+ * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
+ * the pointer being dereferenced won't be dereferenced again. By
+ * looking at the usage in add_new_gdb() the value isn't modified,
+ * just the pointer, and so it remains valid.
+ */
+ if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
@@ -288,10 +296,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
desc = (struct ext4_group_desc *)(
- (__u8 *)sbi->s_group_desc[group_desc]->b_data +
+ (__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
- *bh = sbi->s_group_desc[group_desc];
+ *bh = bh_p;
return desc;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 67b3c9218abe..39a69089a40c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1379,7 +1379,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
- struct buffer_head **s_group_desc;
+ struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
@@ -2074,6 +2074,23 @@ struct dx_hash_info
/*
+ * Returns: sbi->field[index]
+ * Used to access an array element from the following sbi fields which require
+ * rcu protection to avoid dereferencing an invalid pointer due to reassignment
+ * - s_group_desc
+ * - s_group_info
+ * - s_flex_group
+ */
+#define sbi_array_rcu_deref(sbi, field, index) \
+({ \
+ typeof(*((sbi)->field)) _v; \
+ rcu_read_lock(); \
+ _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
+ rcu_read_unlock(); \
+ _v; \
+})
+
+/*
* Control parameters used by ext4_htree_next_block
*/
#define HASH_NB_ALWAYS 1
@@ -2635,6 +2652,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern bool ext4_empty_dir(struct inode *inode);
/* resize.c */
+extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c0e9aef376a7..a5285b4b72fd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -17,6 +17,33 @@
#include "ext4_jbd2.h"
+struct ext4_rcu_ptr {
+ struct rcu_head rcu;
+ void *ptr;
+};
+
+static void ext4_rcu_ptr_callback(struct rcu_head *head)
+{
+ struct ext4_rcu_ptr *ptr;
+
+ ptr = container_of(head, struct ext4_rcu_ptr, rcu);
+ kvfree(ptr->ptr);
+ kfree(ptr);
+}
+
+void ext4_kvfree_array_rcu(void *to_free)
+{
+ struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+
+ if (ptr) {
+ ptr->ptr = to_free;
+ call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
+ return;
+ }
+ synchronize_rcu();
+ kvfree(to_free);
+}
+
int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -560,8 +587,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
- memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
- gdb->b_size);
+ memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
+ s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
@@ -879,13 +906,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
@@ -929,9 +958,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- o_group_desc = EXT4_SB(sb)->s_group_desc;
+ rcu_read_lock();
+ o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+ rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
BUFFER_TRACE(gdb_bh, "get_write_access");
@@ -942,9 +973,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
- EXT4_SB(sb)->s_group_desc = n_group_desc;
+ rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
- kvfree(o_group_desc);
+ ext4_kvfree_array_rcu(o_group_desc);
return err;
}
@@ -1210,7 +1241,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
@@ -1292,7 +1324,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
@@ -1519,7 +1551,8 @@ static int ext4_flex_group_add(struct super_block *sb,
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
- gdb_bh = sbi->s_group_desc[gdb_num];
+ gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
+ gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8b5c44347c4a..52e1d03dbfaf 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -970,6 +970,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head **group_desc;
int aborted = 0;
int i, err;
@@ -1000,9 +1001,12 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb))
ext4_commit_super(sb, 1);
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -3575,7 +3579,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
+ struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
@@ -4228,9 +4232,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
- sbi->s_group_desc = kvmalloc_array(db_count,
- sizeof(struct buffer_head *),
- GFP_KERNEL);
+ rcu_assign_pointer(sbi->s_group_desc,
+ kvmalloc_array(db_count,
+ sizeof(struct buffer_head *),
+ GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
@@ -4246,14 +4251,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
- if (!sbi->s_group_desc[i]) {
+ bh = sb_bread_unmovable(sb, block);
+ if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
@@ -4654,9 +4664,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
--
2.7.4