patches/old/mm-shmem-improve-the-tmpfs-large-folio-read-performance.patch - pub/scm/linux/kernel/git/akpm/25-new - Git at Google

 From: Baolin Wang <baolin.wang@linux.alibaba.com>
 Subject: mm: shmem: improve the tmpfs large folio read performance
 Date: Fri, 18 Oct 2024 11:00:28 +0800

 tmpfs already supports PMD-sized large folios, but the tmpfs read
 operation still performs copying at PAGE_SIZE granularity, which is
 unreasonable.  This patch changes tmpfs to copy data at folio granularity,
 which can improve the read performance, as well as changing to use folio
 related functions.

 Moreover, if a large folio has a subpage that is hwpoisoned, it will
 still fall back to page granularity copying.

 Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
 see about 20% performance improvement, and no regression with bs=4k.
 Before the patch:
 READ: bw=10.0GiB/s

 After the patch:
 READ: bw=12.0GiB/s

 Link: https://lkml.kernel.org/r/2129a21a5b9f77d3bb7ddec152c009ce7c5653c4.1729218573.git.baolin.wang@linux.alibaba.com
 Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
 Reviewed-by: Yang Shi <shy828301@gmail.com>
 Cc: David Hildenbrand <david@redhat.com>
 Cc: Hugh Dickins <hughd@google.com>
 Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
 Cc: Matthew Wilcox <willy@infradead.org>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 ---

  mm/shmem.c |   34 ++++++++++++++++++++++++----------
  1 file changed, 24 insertions(+), 10 deletions(-)

 --- a/mm/shmem.c~mm-shmem-improve-the-tmpfs-large-folio-read-performance
 +++ a/mm/shmem.c
 @@ -3094,13 +3094,13 @@ static ssize_t shmem_file_read_iter(stru
  	int error = 0;
  	ssize_t retval = 0;

 -	offset = iocb->ki_pos & ~PAGE_MASK;
 -
  	for (;;) {
  		struct folio *folio = NULL;
  		struct page *page = NULL;
  		unsigned long nr, ret;
  		loff_t end_offset, i_size = i_size_read(inode);
 +		bool fallback_page_copy = false;
 +		size_t fsize;

  		if (unlikely(iocb->ki_pos >= i_size))
  			break;
 @@ -3121,6 +3121,10 @@ static ssize_t shmem_file_read_iter(stru
  				error = -EIO;
  				break;
  			}
 +
 +			if (folio_test_large(folio) &&
 +			    folio_test_has_hwpoisoned(folio))
 +				fallback_page_copy = true;
  		}

  		/*
 @@ -3134,7 +3138,12 @@ static ssize_t shmem_file_read_iter(stru
  			break;
  		}
  		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
 -		nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
 +		if (folio && likely(!fallback_page_copy))
 +			fsize = folio_size(folio);
 +		else
 +			fsize = PAGE_SIZE;
 +		offset = iocb->ki_pos & (fsize - 1);
 +		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);

  		if (folio) {
  			/*
 @@ -3142,10 +3151,15 @@ static ssize_t shmem_file_read_iter(stru
  			 * virtual addresses, take care about potential aliasing
  			 * before reading the page on the kernel side.
  			 */
 -			if (mapping_writably_mapped(mapping))
 -				flush_dcache_page(page);
 +			if (mapping_writably_mapped(mapping)) {
 +				if (likely(!fallback_page_copy))
 +					flush_dcache_folio(folio);
 +				else
 +					flush_dcache_page(page);
 +			}
 +
  			/*
 -			 * Mark the page accessed if we read the beginning.
 +			 * Mark the folio accessed if we read the beginning.
  			 */
  			if (!offset)
  				folio_mark_accessed(folio);
 @@ -3153,9 +3167,11 @@ static ssize_t shmem_file_read_iter(stru
  			 * Ok, we have the page, and it's up-to-date, so
  			 * now we can copy it to user space...
  			 */
 -			ret = copy_page_to_iter(page, offset, nr, to);
 +			if (likely(!fallback_page_copy))
 +				ret = copy_folio_to_iter(folio, offset, nr, to);
 +			else
 +				ret = copy_page_to_iter(page, offset, nr, to);
  			folio_put(folio);
 -
  		} else if (user_backed_iter(to)) {
  			/*
  			 * Copy to user tends to be so well optimized, but
 @@ -3173,8 +3189,6 @@ static ssize_t shmem_file_read_iter(stru
  		}

  		retval += ret;
 -		offset += ret;
 -		offset &= ~PAGE_MASK;
  		iocb->ki_pos += ret;

  		if (!iov_iter_count(to))
 _
	From: Baolin Wang <baolin.wang@linux.alibaba.com>
	Subject: mm: shmem: improve the tmpfs large folio read performance
	Date: Fri, 18 Oct 2024 11:00:28 +0800

	tmpfs already supports PMD-sized large folios, but the tmpfs read
	operation still performs copying at PAGE_SIZE granularity, which is
	unreasonable. This patch changes tmpfs to copy data at folio granularity,
	which can improve the read performance, as well as changing to use folio
	related functions.

	Moreover, if a large folio has a subpage that is hwpoisoned, it will
	still fall back to page granularity copying.

	Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
	see about 20% performance improvement, and no regression with bs=4k.
	Before the patch:
	READ: bw=10.0GiB/s

	After the patch:
	READ: bw=12.0GiB/s

	Link: https://lkml.kernel.org/r/2129a21a5b9f77d3bb7ddec152c009ce7c5653c4.1729218573.git.baolin.wang@linux.alibaba.com
	Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
	Reviewed-by: Yang Shi <shy828301@gmail.com>
	Cc: David Hildenbrand <david@redhat.com>
	Cc: Hugh Dickins <hughd@google.com>
	Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
	Cc: Matthew Wilcox <willy@infradead.org>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	---

	mm/shmem.c \| 34 ++++++++++++++++++++++++----------
	1 file changed, 24 insertions(+), 10 deletions(-)

	--- a/mm/shmem.c~mm-shmem-improve-the-tmpfs-large-folio-read-performance
	+++ a/mm/shmem.c
	@@ -3094,13 +3094,13 @@ static ssize_t shmem_file_read_iter(stru
	int error = 0;
	ssize_t retval = 0;

	- offset = iocb->ki_pos & ~PAGE_MASK;
	-
	for (;;) {
	struct folio *folio = NULL;
	struct page *page = NULL;
	unsigned long nr, ret;
	loff_t end_offset, i_size = i_size_read(inode);
	+ bool fallback_page_copy = false;
	+ size_t fsize;

	if (unlikely(iocb->ki_pos >= i_size))
	break;
	@@ -3121,6 +3121,10 @@ static ssize_t shmem_file_read_iter(stru
	error = -EIO;
	break;
	}
	+
	+ if (folio_test_large(folio) &&
	+ folio_test_has_hwpoisoned(folio))
	+ fallback_page_copy = true;
	}

	/*
	@@ -3134,7 +3138,12 @@ static ssize_t shmem_file_read_iter(stru
	break;
	}
	end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
	- nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
	+ if (folio && likely(!fallback_page_copy))
	+ fsize = folio_size(folio);
	+ else
	+ fsize = PAGE_SIZE;
	+ offset = iocb->ki_pos & (fsize - 1);
	+ nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);

	if (folio) {
	/*
	@@ -3142,10 +3151,15 @@ static ssize_t shmem_file_read_iter(stru
	* virtual addresses, take care about potential aliasing
	* before reading the page on the kernel side.
	*/
	- if (mapping_writably_mapped(mapping))
	- flush_dcache_page(page);
	+ if (mapping_writably_mapped(mapping)) {
	+ if (likely(!fallback_page_copy))
	+ flush_dcache_folio(folio);
	+ else
	+ flush_dcache_page(page);
	+ }
	+
	/*
	- * Mark the page accessed if we read the beginning.
	+ * Mark the folio accessed if we read the beginning.
	*/
	if (!offset)
	folio_mark_accessed(folio);
	@@ -3153,9 +3167,11 @@ static ssize_t shmem_file_read_iter(stru
	* Ok, we have the page, and it's up-to-date, so
	* now we can copy it to user space...
	*/
	- ret = copy_page_to_iter(page, offset, nr, to);
	+ if (likely(!fallback_page_copy))
	+ ret = copy_folio_to_iter(folio, offset, nr, to);
	+ else
	+ ret = copy_page_to_iter(page, offset, nr, to);
	folio_put(folio);
	-
	} else if (user_backed_iter(to)) {
	/*
	* Copy to user tends to be so well optimized, but
	@@ -3173,8 +3189,6 @@ static ssize_t shmem_file_read_iter(stru
	}

	retval += ret;
	- offset += ret;
	- offset &= ~PAGE_MASK;
	iocb->ki_pos += ret;

	if (!iov_iter_count(to))
	_