| From 9a65d460e2411c1f950768a5aeba178e30d9c3e8 Mon Sep 17 00:00:00 2001 |
| From: Eric Biggers <ebiggers@google.com> |
| Date: Wed, 19 Feb 2020 10:30:47 -0800 |
| Subject: [PATCH] ext4: fix race between writepages and enabling |
| EXT4_EXTENTS_FL |
| |
| commit cb85f4d23f794e24127f3e562cb3b54b0803f456 upstream. |
| |
| If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running |
| on it, the following warning in ext4_add_complete_io() can be hit: |
| |
| WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120 |
| |
| Here's a minimal reproducer (not 100% reliable) (root isn't required): |
| |
| while true; do |
| sync |
| done & |
| while true; do |
| rm -f file |
| touch file |
| chattr -e file |
| echo X >> file |
| chattr +e file |
| done |
| |
| The problem is that in ext4_writepages(), ext4_should_dioread_nolock() |
| (which only returns true on extent-based files) is checked once to set |
| the number of reserved journal credits, and also again later to select |
| the flags for ext4_map_blocks() and copy the reserved journal handle to |
| ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set, |
| the first check can see dioread_nolock disabled while the later one can |
| see it enabled, causing the reserved handle to unexpectedly be NULL. |
| |
| Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races |
| related to doing so as well, fix this by synchronizing changing |
| EXT4_EXTENTS_FL with ext4_writepages() via the existing |
| s_writepages_rwsem (previously called s_journal_flag_rwsem). |
| |
| This was originally reported by syzbot without a reproducer at |
| https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf, |
| but now that dioread_nolock is the default I also started seeing this |
| when running syzkaller locally. |
| |
| Link: https://lore.kernel.org/r/20200219183047.47417-3-ebiggers@kernel.org |
| Reported-by: syzbot+2202a584a00fffd19fbf@syzkaller.appspotmail.com |
| Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") |
| Signed-off-by: Eric Biggers <ebiggers@google.com> |
| Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
| Reviewed-by: Jan Kara <jack@suse.cz> |
| Cc: stable@kernel.org |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h |
| index 05e103b25b5f..490e34ad927c 100644 |
| --- a/fs/ext4/ext4.h |
| +++ b/fs/ext4/ext4.h |
| @@ -1531,7 +1531,10 @@ struct ext4_sb_info { |
| struct ratelimit_state s_warning_ratelimit_state; |
| struct ratelimit_state s_msg_ratelimit_state; |
| |
| - /* Barrier between changing inodes' journal flags and writepages ops. */ |
| + /* |
| + * Barrier between writepages ops and changing any inode's JOURNAL_DATA |
| + * or EXTENTS flag. |
| + */ |
| struct percpu_rw_semaphore s_writepages_rwsem; |
| struct dax_device *s_daxdev; |
| }; |
| diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c |
| index b1e4d359f73b..be4ee3dcc5cf 100644 |
| --- a/fs/ext4/migrate.c |
| +++ b/fs/ext4/migrate.c |
| @@ -427,6 +427,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) |
| |
| int ext4_ext_migrate(struct inode *inode) |
| { |
| + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| handle_t *handle; |
| int retval = 0, i; |
| __le32 *i_data; |
| @@ -451,6 +452,8 @@ int ext4_ext_migrate(struct inode *inode) |
| */ |
| return retval; |
| |
| + percpu_down_write(&sbi->s_writepages_rwsem); |
| + |
| /* |
| * Worst case we can touch the allocation bitmaps, a bgd |
| * block, and a block to link in the orphan list. We do need |
| @@ -461,7 +464,7 @@ int ext4_ext_migrate(struct inode *inode) |
| |
| if (IS_ERR(handle)) { |
| retval = PTR_ERR(handle); |
| - return retval; |
| + goto out_unlock; |
| } |
| goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * |
| EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; |
| @@ -472,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode) |
| if (IS_ERR(tmp_inode)) { |
| retval = PTR_ERR(tmp_inode); |
| ext4_journal_stop(handle); |
| - return retval; |
| + goto out_unlock; |
| } |
| i_size_write(tmp_inode, i_size_read(inode)); |
| /* |
| @@ -514,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode) |
| */ |
| ext4_orphan_del(NULL, tmp_inode); |
| retval = PTR_ERR(handle); |
| - goto out; |
| + goto out_tmp_inode; |
| } |
| |
| ei = EXT4_I(inode); |
| @@ -595,10 +598,11 @@ int ext4_ext_migrate(struct inode *inode) |
| /* Reset the extent details */ |
| ext4_ext_tree_init(handle, tmp_inode); |
| ext4_journal_stop(handle); |
| -out: |
| +out_tmp_inode: |
| unlock_new_inode(tmp_inode); |
| iput(tmp_inode); |
| - |
| +out_unlock: |
| + percpu_up_write(&sbi->s_writepages_rwsem); |
| return retval; |
| } |
| |
| @@ -608,7 +612,8 @@ int ext4_ext_migrate(struct inode *inode) |
| int ext4_ind_migrate(struct inode *inode) |
| { |
| struct ext4_extent_header *eh; |
| - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
| + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| + struct ext4_super_block *es = sbi->s_es; |
| struct ext4_inode_info *ei = EXT4_I(inode); |
| struct ext4_extent *ex; |
| unsigned int i, len; |
| @@ -632,9 +637,13 @@ int ext4_ind_migrate(struct inode *inode) |
| if (test_opt(inode->i_sb, DELALLOC)) |
| ext4_alloc_da_blocks(inode); |
| |
| + percpu_down_write(&sbi->s_writepages_rwsem); |
| + |
| handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); |
| - if (IS_ERR(handle)) |
| - return PTR_ERR(handle); |
| + if (IS_ERR(handle)) { |
| + ret = PTR_ERR(handle); |
| + goto out_unlock; |
| + } |
| |
| down_write(&EXT4_I(inode)->i_data_sem); |
| ret = ext4_ext_check_inode(inode); |
| @@ -669,5 +678,7 @@ int ext4_ind_migrate(struct inode *inode) |
| errout: |
| ext4_journal_stop(handle); |
| up_write(&EXT4_I(inode)->i_data_sem); |
| +out_unlock: |
| + percpu_up_write(&sbi->s_writepages_rwsem); |
| return ret; |
| } |
| -- |
| 2.7.4 |
| |