| From 8c0bec2151a47906bf779c6715a10ce04453ab77 Mon Sep 17 00:00:00 2001 |
| From: Jiaying Zhang <jiayingz@google.com> |
| Date: Wed, 31 Aug 2011 11:50:51 -0400 |
| Subject: ext4: remove i_mutex lock in ext4_evict_inode to fix lockdep complaining |
| |
| From: Jiaying Zhang <jiayingz@google.com> |
| |
| commit 8c0bec2151a47906bf779c6715a10ce04453ab77 upstream. |
| |
| The i_mutex lock and flush_completed_IO() added by commit 2581fdc810 |
| in ext4_evict_inode() causes lockdep complaining about potential |
| deadlock in several places. In most/all of these LOCKDEP complaints |
| it looks like it's a false positive, since many of the potential |
| circular locking cases can't take place by the time the |
| ext4_evict_inode() is called; but since at the very least it may mask |
| real problems, we need to address this. |
| |
| This change removes the flush_completed_IO() and i_mutex lock in |
| ext4_evict_inode(). Instead, we take a different approach to resolve |
| the software lockup that commit 2581fdc810 intends to fix. Rather |
| than having ext4-dio-unwritten thread wait for grabing the i_mutex |
| lock of an inode, we use mutex_trylock() instead, and simply requeue |
| the work item if we fail to grab the inode's i_mutex lock. |
| |
| This should speed up work queue processing in general and also |
| prevents the following deadlock scenario: During page fault, |
| shrink_icache_memory is called that in turn evicts another inode B. |
| Inode B has some pending io_end work so it calls ext4_ioend_wait() |
| that waits for inode B's i_ioend_count to become zero. However, inode |
| B's ioend work was queued behind some of inode A's ioend work on the |
| same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten |
| thread on that cpu is processing inode A's ioend work, it tries to |
| grab inode A's i_mutex lock. Since the i_mutex lock of inode A is |
| still hold before the page fault happened, we enter a deadlock. |
| |
| Signed-off-by: Jiaying Zhang <jiayingz@google.com> |
| Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| fs/ext4/ext4.h | 1 + |
| fs/ext4/inode.c | 3 --- |
| fs/ext4/page-io.c | 18 +++++++++++++++++- |
| 3 files changed, 18 insertions(+), 4 deletions(-) |
| |
| --- a/fs/ext4/ext4.h |
| +++ b/fs/ext4/ext4.h |
| @@ -175,6 +175,7 @@ struct mpage_da_data { |
| */ |
| #define EXT4_IO_END_UNWRITTEN 0x0001 |
| #define EXT4_IO_END_ERROR 0x0002 |
| +#define EXT4_IO_END_QUEUED 0x0004 |
| |
| struct ext4_io_page { |
| struct page *p_page; |
| --- a/fs/ext4/inode.c |
| +++ b/fs/ext4/inode.c |
| @@ -190,9 +190,6 @@ void ext4_evict_inode(struct inode *inod |
| |
| trace_ext4_evict_inode(inode); |
| |
| - mutex_lock(&inode->i_mutex); |
| - ext4_flush_completed_IO(inode); |
| - mutex_unlock(&inode->i_mutex); |
| ext4_ioend_wait(inode); |
| |
| if (inode->i_nlink) { |
| --- a/fs/ext4/page-io.c |
| +++ b/fs/ext4/page-io.c |
| @@ -142,7 +142,23 @@ static void ext4_end_io_work(struct work |
| unsigned long flags; |
| int ret; |
| |
| - mutex_lock(&inode->i_mutex); |
| + if (!mutex_trylock(&inode->i_mutex)) { |
| + /* |
| + * Requeue the work instead of waiting so that the work |
| + * items queued after this can be processed. |
| + */ |
| + queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); |
| + /* |
| + * To prevent the ext4-dio-unwritten thread from keeping |
| + * requeueing end_io requests and occupying cpu for too long, |
| + * yield the cpu if it sees an end_io request that has already |
| + * been requeued. |
| + */ |
| + if (io->flag & EXT4_IO_END_QUEUED) |
| + yield(); |
| + io->flag |= EXT4_IO_END_QUEUED; |
| + return; |
| + } |
| ret = ext4_end_io_nolock(io); |
| if (ret < 0) { |
| mutex_unlock(&inode->i_mutex); |