| From 8a0e56d34fcae7f78baa08bb54bc98173cd33293 Mon Sep 17 00:00:00 2001 |
| From: Theodore Ts'o <tytso@mit.edu> |
| Date: Wed, 11 Jan 2017 21:50:46 -0500 |
| Subject: [PATCH] ext4: fix deadlock between inline_data and |
| ext4_expand_extra_isize_ea() |
| |
| commit c755e251357a0cee0679081f08c3f4ba797a8009 upstream. |
| |
| The xattr_sem deadlock problems fixed in commit 2e81a4eeedca: "ext4: |
| avoid deadlock when expanding inode size" didn't include the use of |
| xattr_sem in fs/ext4/inline.c. With the addition of project quota |
| which added a new extra inode field, this exposed deadlocks in the |
| inline_data code similar to the ones fixed by 2e81a4eeedca. |
| |
| The deadlock can be reproduced via: |
| |
| dmesg -n 7 |
| mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768 |
| mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc |
| mkdir /vdc/a |
| umount /vdc |
| mount -t ext4 /dev/vdc /vdc |
| echo foo > /vdc/a/foo |
| |
| and looks like this: |
| |
| [ 11.158815] |
| [ 11.160276] ============================================= |
| [ 11.161960] [ INFO: possible recursive locking detected ] |
| [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W |
| [ 11.161960] --------------------------------------------- |
| [ 11.161960] bash/2519 is trying to acquire lock: |
| [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd |
| [ 11.161960] |
| [ 11.161960] but task is already holding lock: |
| [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 |
| [ 11.161960] |
| [ 11.161960] other info that might help us debug this: |
| [ 11.161960] Possible unsafe locking scenario: |
| [ 11.161960] |
| [ 11.161960] CPU0 |
| [ 11.161960] ---- |
| [ 11.161960] lock(&ei->xattr_sem); |
| [ 11.161960] lock(&ei->xattr_sem); |
| [ 11.161960] |
| [ 11.161960] *** DEADLOCK *** |
| [ 11.161960] |
| [ 11.161960] May be due to missing lock nesting notation |
| [ 11.161960] |
| [ 11.161960] 4 locks held by bash/2519: |
| [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e |
| [ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a |
| [ 11.161960] #2: (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622 |
| [ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 |
| [ 11.161960] |
| [ 11.161960] stack backtrace: |
| [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160 |
| [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 |
| [ 11.161960] Call Trace: |
| [ 11.161960] dump_stack+0x72/0xa3 |
| [ 11.161960] __lock_acquire+0xb7c/0xcb9 |
| [ 11.161960] ? kvm_clock_read+0x1f/0x29 |
| [ 11.161960] ? __lock_is_held+0x36/0x66 |
| [ 11.161960] ? __lock_is_held+0x36/0x66 |
| [ 11.161960] lock_acquire+0x106/0x18a |
| [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd |
| [ 11.161960] down_write+0x39/0x72 |
| [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd |
| [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd |
| [ 11.161960] ? _raw_read_unlock+0x22/0x2c |
| [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262 |
| [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60 |
| [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d |
| [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 |
| [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 |
| [ 11.161960] ext4_try_add_inline_entry+0x69/0x152 |
| [ 11.161960] ext4_add_entry+0xa3/0x848 |
| [ 11.161960] ? __brelse+0x14/0x2f |
| [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f |
| [ 11.161960] ext4_add_nondir+0x17/0x5b |
| [ 11.161960] ext4_create+0xcf/0x133 |
| [ 11.161960] ? ext4_mknod+0x12f/0x12f |
| [ 11.161960] lookup_open+0x39e/0x3fb |
| [ 11.161960] ? __wake_up+0x1a/0x40 |
| [ 11.161960] ? lock_acquire+0x11e/0x18a |
| [ 11.161960] path_openat+0x35c/0x67a |
| [ 11.161960] ? sched_clock_cpu+0xd7/0xf2 |
| [ 11.161960] do_filp_open+0x36/0x7c |
| [ 11.161960] ? _raw_spin_unlock+0x22/0x2c |
| [ 11.161960] ? __alloc_fd+0x169/0x173 |
| [ 11.161960] do_sys_open+0x59/0xcc |
| [ 11.161960] SyS_open+0x1d/0x1f |
| [ 11.161960] do_int80_syscall_32+0x4f/0x61 |
| [ 11.161960] entry_INT80_32+0x2f/0x2f |
| [ 11.161960] EIP: 0xb76ad469 |
| [ 11.161960] EFLAGS: 00000286 CPU: 0 |
| [ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6 |
| [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0 |
| [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b |
| |
| Cc: stable@vger.kernel.org # 3.10 (requires 2e81a4eeedca as a prereq) |
| Reported-by: George Spelvin <linux@sciencehorizons.net> |
| Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c |
| index d8ca4b9f9dd6..028329721bbe 100644 |
| --- a/fs/ext4/inline.c |
| +++ b/fs/ext4/inline.c |
| @@ -376,7 +376,7 @@ out: |
| static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, |
| unsigned int len) |
| { |
| - int ret, size; |
| + int ret, size, no_expand; |
| struct ext4_inode_info *ei = EXT4_I(inode); |
| |
| if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) |
| @@ -386,15 +386,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, |
| if (size < len) |
| return -ENOSPC; |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| |
| if (ei->i_inline_off) |
| ret = ext4_update_inline_data(handle, inode, len); |
| else |
| ret = ext4_create_inline_data(handle, inode, len); |
| |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| - |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| return ret; |
| } |
| |
| @@ -523,7 +522,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, |
| struct inode *inode, |
| unsigned flags) |
| { |
| - int ret, needed_blocks; |
| + int ret, needed_blocks, no_expand; |
| handle_t *handle = NULL; |
| int retries = 0, sem_held = 0; |
| struct page *page = NULL; |
| @@ -563,7 +562,7 @@ retry: |
| goto out; |
| } |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| sem_held = 1; |
| /* If some one has already done this for us, just exit. */ |
| if (!ext4_has_inline_data(inode)) { |
| @@ -600,7 +599,7 @@ retry: |
| put_page(page); |
| page = NULL; |
| ext4_orphan_add(handle, inode); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| sem_held = 0; |
| ext4_journal_stop(handle); |
| handle = NULL; |
| @@ -626,7 +625,7 @@ out: |
| put_page(page); |
| } |
| if (sem_held) |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| if (handle) |
| ext4_journal_stop(handle); |
| brelse(iloc.bh); |
| @@ -719,7 +718,7 @@ convert: |
| int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, |
| unsigned copied, struct page *page) |
| { |
| - int ret; |
| + int ret, no_expand; |
| void *kaddr; |
| struct ext4_iloc iloc; |
| |
| @@ -737,7 +736,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, |
| goto out; |
| } |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| BUG_ON(!ext4_has_inline_data(inode)); |
| |
| kaddr = kmap_atomic(page); |
| @@ -747,7 +746,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, |
| /* clear page dirty so that writepages wouldn't work for us. */ |
| ClearPageDirty(page); |
| |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| brelse(iloc.bh); |
| out: |
| return copied; |
| @@ -758,7 +757,7 @@ ext4_journalled_write_inline_data(struct inode *inode, |
| unsigned len, |
| struct page *page) |
| { |
| - int ret; |
| + int ret, no_expand; |
| void *kaddr; |
| struct ext4_iloc iloc; |
| |
| @@ -768,11 +767,11 @@ ext4_journalled_write_inline_data(struct inode *inode, |
| return NULL; |
| } |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| kaddr = kmap_atomic(page); |
| ext4_write_inline_data(inode, &iloc, kaddr, 0, len); |
| kunmap_atomic(kaddr); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| |
| return iloc.bh; |
| } |
| @@ -1249,7 +1248,7 @@ out: |
| int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, |
| struct inode *dir, struct inode *inode) |
| { |
| - int ret, inline_size; |
| + int ret, inline_size, no_expand; |
| void *inline_start; |
| struct ext4_iloc iloc; |
| |
| @@ -1257,7 +1256,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, |
| if (ret) |
| return ret; |
| |
| - down_write(&EXT4_I(dir)->xattr_sem); |
| + ext4_write_lock_xattr(dir, &no_expand); |
| if (!ext4_has_inline_data(dir)) |
| goto out; |
| |
| @@ -1303,7 +1302,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, |
| |
| out: |
| ext4_mark_inode_dirty(handle, dir); |
| - up_write(&EXT4_I(dir)->xattr_sem); |
| + ext4_write_unlock_xattr(dir, &no_expand); |
| brelse(iloc.bh); |
| return ret; |
| } |
| @@ -1663,7 +1662,7 @@ int ext4_delete_inline_entry(handle_t *handle, |
| struct buffer_head *bh, |
| int *has_inline_data) |
| { |
| - int err, inline_size; |
| + int err, inline_size, no_expand; |
| struct ext4_iloc iloc; |
| void *inline_start; |
| |
| @@ -1671,7 +1670,7 @@ int ext4_delete_inline_entry(handle_t *handle, |
| if (err) |
| return err; |
| |
| - down_write(&EXT4_I(dir)->xattr_sem); |
| + ext4_write_lock_xattr(dir, &no_expand); |
| if (!ext4_has_inline_data(dir)) { |
| *has_inline_data = 0; |
| goto out; |
| @@ -1705,7 +1704,7 @@ int ext4_delete_inline_entry(handle_t *handle, |
| |
| ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); |
| out: |
| - up_write(&EXT4_I(dir)->xattr_sem); |
| + ext4_write_unlock_xattr(dir, &no_expand); |
| brelse(iloc.bh); |
| if (err != -ENOENT) |
| ext4_std_error(dir->i_sb, err); |
| @@ -1804,11 +1803,11 @@ out: |
| |
| int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) |
| { |
| - int ret; |
| + int ret, no_expand; |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| ret = ext4_destroy_inline_data_nolock(handle, inode); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| |
| return ret; |
| } |
| @@ -1893,7 +1892,7 @@ out: |
| void ext4_inline_data_truncate(struct inode *inode, int *has_inline) |
| { |
| handle_t *handle; |
| - int inline_size, value_len, needed_blocks; |
| + int inline_size, value_len, needed_blocks, no_expand; |
| size_t i_size; |
| void *value = NULL; |
| struct ext4_xattr_ibody_find is = { |
| @@ -1910,7 +1909,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) |
| if (IS_ERR(handle)) |
| return; |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| if (!ext4_has_inline_data(inode)) { |
| *has_inline = 0; |
| ext4_journal_stop(handle); |
| @@ -1968,7 +1967,7 @@ out_error: |
| up_write(&EXT4_I(inode)->i_data_sem); |
| out: |
| brelse(is.iloc.bh); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| kfree(value); |
| if (inode->i_nlink) |
| ext4_orphan_del(handle, inode); |
| @@ -1984,7 +1983,7 @@ out: |
| |
| int ext4_convert_inline_data(struct inode *inode) |
| { |
| - int error, needed_blocks; |
| + int error, needed_blocks, no_expand; |
| handle_t *handle; |
| struct ext4_iloc iloc; |
| |
| @@ -2006,15 +2005,10 @@ int ext4_convert_inline_data(struct inode *inode) |
| goto out_free; |
| } |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| - if (!ext4_has_inline_data(inode)) { |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| - goto out; |
| - } |
| - |
| - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| -out: |
| + ext4_write_lock_xattr(inode, &no_expand); |
| + if (ext4_has_inline_data(inode)) |
| + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| ext4_journal_stop(handle); |
| out_free: |
| brelse(iloc.bh); |
| diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c |
| index 2eb935ca5d9e..78f778719635 100644 |
| --- a/fs/ext4/xattr.c |
| +++ b/fs/ext4/xattr.c |
| @@ -1179,16 +1179,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, |
| struct ext4_xattr_block_find bs = { |
| .s = { .not_found = -ENODATA, }, |
| }; |
| - unsigned long no_expand; |
| + int no_expand; |
| int error; |
| |
| if (!name) |
| return -EINVAL; |
| if (strlen(name) > 255) |
| return -ERANGE; |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + ext4_write_lock_xattr(inode, &no_expand); |
| |
| error = ext4_reserve_inode_write(handle, inode, &is.iloc); |
| if (error) |
| @@ -1256,7 +1254,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, |
| ext4_xattr_update_super_block(handle, inode->i_sb); |
| inode->i_ctime = ext4_current_time(inode); |
| if (!value) |
| - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + no_expand = 0; |
| error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); |
| /* |
| * The bh is consumed by ext4_mark_iloc_dirty, even with |
| @@ -1270,9 +1268,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, |
| cleanup: |
| brelse(is.iloc.bh); |
| brelse(bs.bh); |
| - if (no_expand == 0) |
| - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| return error; |
| } |
| |
| @@ -1356,12 +1352,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
| int error = 0, tried_min_extra_isize = 0; |
| int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); |
| int isize_diff; /* How much do we need to grow i_extra_isize */ |
| + int no_expand; |
| + |
| + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) |
| + return 0; |
| |
| - down_write(&EXT4_I(inode)->xattr_sem); |
| - /* |
| - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty |
| - */ |
| - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| retry: |
| isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; |
| if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
| @@ -1555,8 +1550,7 @@ retry: |
| } |
| brelse(bh); |
| out: |
| - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| return 0; |
| |
| cleanup: |
| @@ -1568,10 +1562,10 @@ cleanup: |
| kfree(bs); |
| brelse(bh); |
| /* |
| - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode |
| - * size expansion failed. |
| + * Inode size expansion failed; don't try again |
| */ |
| - up_write(&EXT4_I(inode)->xattr_sem); |
| + no_expand = 1; |
| + ext4_write_unlock_xattr(inode, &no_expand); |
| return error; |
| } |
| |
| diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h |
| index a92e783fa057..099c8b670ef5 100644 |
| --- a/fs/ext4/xattr.h |
| +++ b/fs/ext4/xattr.h |
| @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; |
| |
| #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" |
| |
| +/* |
| + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. |
| + * The first is to signal that there the inline xattrs and data are |
| + * taking up so much space that we might as well not keep trying to |
| + * expand it. The second is that xattr_sem is taken for writing, so |
| + * we shouldn't try to recurse into the inode expansion. For this |
| + * second case, we need to make sure that we take save and restore the |
| + * NO_EXPAND state flag appropriately. |
| + */ |
| +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) |
| +{ |
| + down_write(&EXT4_I(inode)->xattr_sem); |
| + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| +} |
| + |
| +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) |
| +{ |
| + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) |
| + return 0; |
| + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + return 1; |
| +} |
| + |
| +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) |
| +{ |
| + if (*save == 0) |
| + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); |
| + up_write(&EXT4_I(inode)->xattr_sem); |
| +} |
| + |
| extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); |
| |
| extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); |
| -- |
| 2.12.0 |
| |