| From: Baolin Wang <baolin.wang@linux.alibaba.com> |
| Subject: mm: shmem: improve the tmpfs large folio read performance |
| Date: Fri, 18 Oct 2024 11:00:28 +0800 |
| |
| tmpfs already supports PMD-sized large folios, but the tmpfs read |
| operation still performs copying at PAGE_SIZE granularity, which is |
| unreasonable. This patch changes tmpfs to copy data at folio granularity, |
| which can improve the read performance, as well as changing to use folio |
| related functions. |
| |
| Moreover, if a large folio has a subpage that is hwpoisoned, it will |
| still fall back to page granularity copying. |
| |
| Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can |
| see about 20% performance improvement, and no regression with bs=4k. |
| Before the patch: |
| READ: bw=10.0GiB/s |
| |
| After the patch: |
| READ: bw=12.0GiB/s |
| |
| Link: https://lkml.kernel.org/r/2129a21a5b9f77d3bb7ddec152c009ce7c5653c4.1729218573.git.baolin.wang@linux.alibaba.com |
| Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> |
| Reviewed-by: Yang Shi <shy828301@gmail.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: Hugh Dickins <hughd@google.com> |
| Cc: Kefeng Wang <wangkefeng.wang@huawei.com> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/shmem.c | 34 ++++++++++++++++++++++++---------- |
| 1 file changed, 24 insertions(+), 10 deletions(-) |
| |
| --- a/mm/shmem.c~mm-shmem-improve-the-tmpfs-large-folio-read-performance |
| +++ a/mm/shmem.c |
| @@ -3094,13 +3094,13 @@ static ssize_t shmem_file_read_iter(stru |
| int error = 0; |
| ssize_t retval = 0; |
| |
| - offset = iocb->ki_pos & ~PAGE_MASK; |
| - |
| for (;;) { |
| struct folio *folio = NULL; |
| struct page *page = NULL; |
| unsigned long nr, ret; |
| loff_t end_offset, i_size = i_size_read(inode); |
| + bool fallback_page_copy = false; |
| + size_t fsize; |
| |
| if (unlikely(iocb->ki_pos >= i_size)) |
| break; |
| @@ -3121,6 +3121,10 @@ static ssize_t shmem_file_read_iter(stru |
| error = -EIO; |
| break; |
| } |
| + |
| + if (folio_test_large(folio) && |
| + folio_test_has_hwpoisoned(folio)) |
| + fallback_page_copy = true; |
| } |
| |
| /* |
| @@ -3134,7 +3138,12 @@ static ssize_t shmem_file_read_iter(stru |
| break; |
| } |
| end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count); |
| - nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset); |
| + if (folio && likely(!fallback_page_copy)) |
| + fsize = folio_size(folio); |
| + else |
| + fsize = PAGE_SIZE; |
| + offset = iocb->ki_pos & (fsize - 1); |
| + nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset); |
| |
| if (folio) { |
| /* |
| @@ -3142,10 +3151,15 @@ static ssize_t shmem_file_read_iter(stru |
| * virtual addresses, take care about potential aliasing |
| * before reading the page on the kernel side. |
| */ |
| - if (mapping_writably_mapped(mapping)) |
| - flush_dcache_page(page); |
| + if (mapping_writably_mapped(mapping)) { |
| + if (likely(!fallback_page_copy)) |
| + flush_dcache_folio(folio); |
| + else |
| + flush_dcache_page(page); |
| + } |
| + |
| /* |
| - * Mark the page accessed if we read the beginning. |
| + * Mark the folio accessed if we read the beginning. |
| */ |
| if (!offset) |
| folio_mark_accessed(folio); |
| @@ -3153,9 +3167,11 @@ static ssize_t shmem_file_read_iter(stru |
| * Ok, we have the page, and it's up-to-date, so |
| * now we can copy it to user space... |
| */ |
| - ret = copy_page_to_iter(page, offset, nr, to); |
| + if (likely(!fallback_page_copy)) |
| + ret = copy_folio_to_iter(folio, offset, nr, to); |
| + else |
| + ret = copy_page_to_iter(page, offset, nr, to); |
| folio_put(folio); |
| - |
| } else if (user_backed_iter(to)) { |
| /* |
| * Copy to user tends to be so well optimized, but |
| @@ -3173,8 +3189,6 @@ static ssize_t shmem_file_read_iter(stru |
| } |
| |
| retval += ret; |
| - offset += ret; |
| - offset &= ~PAGE_MASK; |
| iocb->ki_pos += ret; |
| |
| if (!iov_iter_count(to)) |
| _ |