| From: Yafang Shao <laoar.shao@gmail.com> |
| Subject: mm: allow read-ahead with IOCB_NOWAIT set |
| Date: Tue, 20 Aug 2024 10:26:39 +0800 |
| |
| Readahead support for IOCB_NOWAIT was introduced in commit 2e85abf053b9 |
| ("mm: allow read-ahead with IOCB_NOWAIT set"). However, this |
| implementation broke the semantics of IOCB_NOWAIT by potentially causing |
| it to wait on I/O during memory reclamation. This behavior was later |
| modified in commit efa8480a8316 ("fs: RWF_NOWAIT should imply IOCB_NOIO"). |
| |
| To resolve the blocking issue during memory reclamation, we can use |
| memalloc_noio_{save,restore} to ensure non-blocking behavior. This change |
| restores the original functionality, allowing preadv2(IOCB_NOWAIT) to |
| trigger readahead if the file content is not present in the page cache. |
| |
| While this process may trigger direct memory reclamation, the |
| __GFP_NORETRY flag is set in the readahead GFP flags, ensuring it won't |
| block. |
| |
| A use case for this change is when we want to trigger readahead in the |
| preadv2(2) syscall if the file cache is absent, but without waiting for |
| certain filesystem locks, like xfs_ilock. A simple example is as follows: |
| |
| retry: |
| if (preadv2(fd, iovec, cnt, offset, RWF_NOWAIT) < 0) { |
| do_other_work(); |
| goto retry; |
| } |
| |
| Link: https://lore.gnuweeb.org/io-uring/20200624164127.GP21350@casper.infradead.org/ |
| Link: https://lkml.kernel.org/r/20240820022639.89562-1-laoar.shao@gmail.com |
| Signed-off-by: Yafang Shao <laoar.shao@gmail.com> |
| Cc: Jens Axboe <axboe@kernel.dk> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Cc: Dave Chinner <david@fromorbit.com> |
| Cc: Jan Kara <jack@suse.cz> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/fs.h | 1 - |
| mm/filemap.c | 6 ++++++ |
| 2 files changed, 6 insertions(+), 1 deletion(-) |
| |
| --- a/include/linux/fs.h~mm-allow-read-ahead-with-iocb_nowait-set |
| +++ a/include/linux/fs.h |
| @@ -3461,7 +3461,6 @@ static inline int kiocb_set_rw_flags(str |
| if (flags & RWF_NOWAIT) { |
| if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) |
| return -EOPNOTSUPP; |
| - kiocb_flags |= IOCB_NOIO; |
| } |
| if (flags & RWF_ATOMIC) { |
| if (rw_type != WRITE) |
| --- a/mm/filemap.c~mm-allow-read-ahead-with-iocb_nowait-set |
| +++ a/mm/filemap.c |
| @@ -46,6 +46,7 @@ |
| #include <linux/pipe_fs_i.h> |
| #include <linux/splice.h> |
| #include <linux/rcupdate_wait.h> |
| +#include <linux/sched/mm.h> |
| #include <asm/pgalloc.h> |
| #include <asm/tlbflush.h> |
| #include "internal.h" |
| @@ -2519,6 +2520,7 @@ static int filemap_get_pages(struct kioc |
| pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; |
| pgoff_t last_index; |
| struct folio *folio; |
| + unsigned int flags; |
| int err = 0; |
| |
| /* "last_index" is the index of the page beyond the end of the read */ |
| @@ -2531,8 +2533,12 @@ retry: |
| if (!folio_batch_count(fbatch)) { |
| if (iocb->ki_flags & IOCB_NOIO) |
| return -EAGAIN; |
| + if (iocb->ki_flags & IOCB_NOWAIT) |
| + flags = memalloc_noio_save(); |
| page_cache_sync_readahead(mapping, ra, filp, index, |
| last_index - index); |
| + if (iocb->ki_flags & IOCB_NOWAIT) |
| + memalloc_noio_restore(flags); |
| filemap_get_read_batch(mapping, index, last_index - 1, fbatch); |
| } |
| if (!folio_batch_count(fbatch)) { |
| _ |