| From: Jan Kara <jack@suse.com> |
| Date: Mon, 7 Dec 2015 14:31:11 -0500 |
| Subject: ext4: fix races between buffered IO and collapse / insert range |
| |
| commit 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 upstream. |
| |
| Current code implementing FALLOC_FL_COLLAPSE_RANGE and |
| FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page |
| faults. If buffered write or write via mmap manages to squeeze between |
| filemap_write_and_wait_range() and truncate_pagecache() in the fallocate |
| implementations, the written data is simply discarded by |
| truncate_pagecache() although it should have been shifted. |
| |
| Fix the problem by moving filemap_write_and_wait_range() call inside |
| i_mutex and i_mmap_sem. That way we are protected against races with |
| both buffered writes and page faults. |
| |
| Signed-off-by: Jan Kara <jack@suse.com> |
| Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
| [bwh: Backported to 3.16: drop changes in ext4_insert_range()] |
| Signed-off-by: Ben Hutchings <ben@decadent.org.uk> |
| --- |
| --- a/fs/ext4/extents.c |
| +++ b/fs/ext4/extents.c |
| @@ -5453,21 +5453,7 @@ int ext4_collapse_range(struct inode *in |
| return ret; |
| } |
| |
| - /* |
| - * Need to round down offset to be aligned with page size boundary |
| - * for page size > block size. |
| - */ |
| - ioffset = round_down(offset, PAGE_SIZE); |
| - |
| - /* Write out all dirty pages */ |
| - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, |
| - LLONG_MAX); |
| - if (ret) |
| - return ret; |
| - |
| - /* Take mutex lock */ |
| mutex_lock(&inode->i_mutex); |
| - |
| /* |
| * There is no need to overlap collapse range with EOF, in which case |
| * it is effectively a truncate operation |
| @@ -5492,6 +5478,27 @@ int ext4_collapse_range(struct inode *in |
| * page cache. |
| */ |
| down_write(&EXT4_I(inode)->i_mmap_sem); |
| + /* |
| + * Need to round down offset to be aligned with page size boundary |
| + * for page size > block size. |
| + */ |
| + ioffset = round_down(offset, PAGE_SIZE); |
| + /* |
| + * Write tail of the last page before removed range since it will get |
| + * removed from the page cache below. |
| + */ |
| + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); |
| + if (ret) |
| + goto out_mmap; |
| + /* |
| + * Write data that will be shifted to preserve them when discarding |
| + * page cache below. We are also protected from pages becoming dirty |
| + * by i_mmap_sem. |
| + */ |
| + ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, |
| + LLONG_MAX); |
| + if (ret) |
| + goto out_mmap; |
| truncate_pagecache(inode, ioffset); |
| |
| credits = ext4_writepage_trans_blocks(inode); |