| From ed960ac61e12e298d1cdcd52b21b9ee07d36755c Mon Sep 17 00:00:00 2001 |
| From: Miklos Szeredi <mszeredi@suse.cz> |
| Date: Tue, 7 Apr 2009 16:25:02 +0000 |
| Subject: splice: fix deadlock in splicing to file |
| |
| From: Miklos Szeredi <mszeredi@suse.cz> |
| |
| upstream commit: 7bfac9ecf0585962fe13584f5cf526d8c8e76f17 |
| |
| There's a possible deadlock in generic_file_splice_write(), |
| splice_from_pipe() and ocfs2_file_splice_write(): |
| |
| - task A calls generic_file_splice_write() |
| - this calls inode_double_lock(), which locks i_mutex on both |
| pipe->inode and target inode |
| - ordering depends on inode pointers, can happen that pipe->inode is |
| locked first |
| - __splice_from_pipe() needs more data, calls pipe_wait() |
| - this releases lock on pipe->inode, goes to interruptible sleep |
| - task B calls generic_file_splice_write(), similarly to the first |
| - this locks pipe->inode, then tries to lock inode, but that is |
| already held by task A |
| - task A is interrupted, it tries to lock pipe->inode, but fails, as |
| it is already held by task B |
| - ABBA deadlock |
| |
| Fix this by explicitly ordering locks: the outer lock must be on |
| target inode and the inner lock (which is later unlocked and relocked) |
| must be on pipe->inode. This is OK, pipe inodes and target inodes |
| form two nonoverlapping sets, generic_file_splice_write() and friends |
| are not called with a target which is a pipe. |
| |
| Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> |
| Acked-by: Mark Fasheh <mfasheh@suse.com> |
| Acked-by: Jens Axboe <jens.axboe@oracle.com> |
| Cc: stable@kernel.org |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Chris Wright <chrisw@sous-sol.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| --- |
| fs/ocfs2/file.c | 8 ++++++-- |
| fs/splice.c | 25 ++++++++++++++++++++----- |
| 2 files changed, 26 insertions(+), 7 deletions(-) |
| |
| --- a/fs/ocfs2/file.c |
| +++ b/fs/ocfs2/file.c |
| @@ -2089,7 +2089,7 @@ static ssize_t ocfs2_file_splice_write(s |
| out->f_path.dentry->d_name.len, |
| out->f_path.dentry->d_name.name); |
| |
| - inode_double_lock(inode, pipe->inode); |
| + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
| |
| ret = ocfs2_rw_lock(inode, 1); |
| if (ret < 0) { |
| @@ -2104,12 +2104,16 @@ static ssize_t ocfs2_file_splice_write(s |
| goto out_unlock; |
| } |
| |
| + if (pipe->inode) |
| + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); |
| ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); |
| + if (pipe->inode) |
| + mutex_unlock(&pipe->inode->i_mutex); |
| |
| out_unlock: |
| ocfs2_rw_unlock(inode, 1); |
| out: |
| - inode_double_unlock(inode, pipe->inode); |
| + mutex_unlock(&inode->i_mutex); |
| |
| mlog_exit(ret); |
| return ret; |
| --- a/fs/splice.c |
| +++ b/fs/splice.c |
| @@ -735,10 +735,19 @@ ssize_t splice_from_pipe(struct pipe_ino |
| * ->commit_write. Most of the time, these expect i_mutex to |
| * be held. Since this may result in an ABBA deadlock with |
| * pipe->inode, we have to order lock acquiry here. |
| + * |
| + * Outer lock must be inode->i_mutex, as pipe_wait() will |
| + * release and reacquire pipe->inode->i_mutex, AND inode must |
| + * never be a pipe. |
| */ |
| - inode_double_lock(inode, pipe->inode); |
| + WARN_ON(S_ISFIFO(inode->i_mode)); |
| + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
| + if (pipe->inode) |
| + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); |
| ret = __splice_from_pipe(pipe, &sd, actor); |
| - inode_double_unlock(inode, pipe->inode); |
| + if (pipe->inode) |
| + mutex_unlock(&pipe->inode->i_mutex); |
| + mutex_unlock(&inode->i_mutex); |
| |
| return ret; |
| } |
| @@ -829,11 +838,17 @@ generic_file_splice_write(struct pipe_in |
| }; |
| ssize_t ret; |
| |
| - inode_double_lock(inode, pipe->inode); |
| + WARN_ON(S_ISFIFO(inode->i_mode)); |
| + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
| ret = file_remove_suid(out); |
| - if (likely(!ret)) |
| + if (likely(!ret)) { |
| + if (pipe->inode) |
| + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); |
| ret = __splice_from_pipe(pipe, &sd, pipe_to_file); |
| - inode_double_unlock(inode, pipe->inode); |
| + if (pipe->inode) |
| + mutex_unlock(&pipe->inode->i_mutex); |
| + } |
| + mutex_unlock(&inode->i_mutex); |
| if (ret > 0) { |
| unsigned long nr_pages; |
| |