| From 08012d2b3ed6cbe8759c47eeb32c81ca52a4c9c9 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Tue, 31 Dec 2019 12:11:49 -0600 |
| Subject: ext4: fix deadlock allocating crypto bounce page from mempool |
| |
| From: Eric Biggers <ebiggers@google.com> |
| |
| [ Upstream commit 547c556f4db7c09447ecf5f833ab6aaae0c5ab58 ] |
| |
| ext4_writepages() on an encrypted file has to encrypt the data, but it |
| can't modify the pagecache pages in-place, so it encrypts the data into |
| bounce pages and writes those instead. All bounce pages are allocated |
| from a mempool using GFP_NOFS. |
| |
| This is not correct use of a mempool, and it can deadlock. This is |
| because GFP_NOFS includes __GFP_DIRECT_RECLAIM, which enables the "never |
| fail" mode for mempool_alloc() where a failed allocation will fall back |
| to waiting for one of the preallocated elements in the pool. |
| |
| But since this mode is used for all a bio's pages and not just the |
| first, it can deadlock waiting for pages already in the bio to be freed. |
| |
| This deadlock can be reproduced by patching mempool_alloc() to pretend |
| that pool->alloc() always fails (so that it always falls back to the |
| preallocations), and then creating an encrypted file of size > 128 KiB. |
| |
| Fix it by only using GFP_NOFS for the first page in the bio. For |
| subsequent pages just use GFP_NOWAIT, and if any of those fail, just |
| submit the bio and start a new one. |
| |
| This will need to be fixed in f2fs too, but that's less straightforward. |
| |
| Fixes: c9af28fdd449 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM") |
| Cc: stable@vger.kernel.org |
| Signed-off-by: Eric Biggers <ebiggers@google.com> |
| Link: https://lore.kernel.org/r/20191231181149.47619-1-ebiggers@kernel.org |
| Signed-off-by: Theodore Ts'o <tytso@mit.edu> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| fs/ext4/page-io.c | 19 ++++++++++++++----- |
| 1 file changed, 14 insertions(+), 5 deletions(-) |
| |
| diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c |
| index db7590178dfcf..9cc79b7b0df11 100644 |
| --- a/fs/ext4/page-io.c |
| +++ b/fs/ext4/page-io.c |
| @@ -481,17 +481,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io, |
| nr_to_submit) { |
| gfp_t gfp_flags = GFP_NOFS; |
| |
| + /* |
| + * Since bounce page allocation uses a mempool, we can only use |
| + * a waiting mask (i.e. request guaranteed allocation) on the |
| + * first page of the bio. Otherwise it can deadlock. |
| + */ |
| + if (io->io_bio) |
| + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; |
| retry_encrypt: |
| data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, |
| page->index, gfp_flags); |
| if (IS_ERR(data_page)) { |
| ret = PTR_ERR(data_page); |
| - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { |
| - if (io->io_bio) { |
| + if (ret == -ENOMEM && |
| + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { |
| + gfp_flags = GFP_NOFS; |
| + if (io->io_bio) |
| ext4_io_submit(io); |
| - congestion_wait(BLK_RW_ASYNC, HZ/50); |
| - } |
| - gfp_flags |= __GFP_NOFAIL; |
| + else |
| + gfp_flags |= __GFP_NOFAIL; |
| + congestion_wait(BLK_RW_ASYNC, HZ/50); |
| goto retry_encrypt; |
| } |
| data_page = NULL; |
| -- |
| 2.20.1 |
| |