| From 52f476a323f9efc959be1c890d0cdcf12e1582e0 Mon Sep 17 00:00:00 2001 |
| From: Dan Williams <dan.j.williams@intel.com> |
| Date: Thu, 16 May 2019 17:05:21 -0700 |
| Subject: libnvdimm/pmem: Bypass CONFIG_HARDENED_USERCOPY overhead |
| |
| From: Dan Williams <dan.j.williams@intel.com> |
| |
| commit 52f476a323f9efc959be1c890d0cdcf12e1582e0 upstream. |
| |
| Jeff discovered that performance improves from ~375K iops to ~519K iops |
| on a simple psync-write fio workload when moving the location of 'struct |
| page' from the default PMEM location to DRAM. This result is surprising |
| because the expectation is that 'struct page' for dax is only needed for |
| third party references to dax mappings. For example, a dax-mapped buffer |
| passed to another system call for direct-I/O requires 'struct page' for |
| sending the request down the driver stack and pinning the page. There is |
| no usage of 'struct page' for first party access to a file via |
| read(2)/write(2) and friends. |
| |
| However, this "no page needed" expectation is violated by |
| CONFIG_HARDENED_USERCOPY and the check_copy_size() performed in |
| copy_from_iter_full_nocache() and copy_to_iter_mcsafe(). The |
| check_heap_object() helper routine assumes the buffer is backed by a |
| slab allocator (DRAM) page and applies some checks. Those checks are |
| invalid, dax pages do not originate from the slab, and redundant, |
| dax_iomap_actor() has already validated that the I/O is within bounds. |
| Specifically that routine validates that the logical file offset is |
| within bounds of the file, then it does a sector-to-pfn translation |
| which validates that the physical mapping is within bounds of the block |
| device. |
| |
| Bypass additional hardened usercopy overhead and call the 'no check' |
| versions of the copy_{to,from}_iter operations directly. |
| |
| Fixes: 0aed55af8834 ("x86, uaccess: introduce copy_from_iter_flushcache...") |
| Cc: <stable@vger.kernel.org> |
| Cc: Jeff Moyer <jmoyer@redhat.com> |
| Cc: Ingo Molnar <mingo@redhat.com> |
| Cc: Christoph Hellwig <hch@lst.de> |
| Cc: Al Viro <viro@zeniv.linux.org.uk> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Reported-and-tested-by: Jeff Smits <jeff.smits@intel.com> |
| Acked-by: Kees Cook <keescook@chromium.org> |
| Acked-by: Jan Kara <jack@suse.cz> |
| Signed-off-by: Dan Williams <dan.j.williams@intel.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| drivers/nvdimm/pmem.c | 10 ++++++++-- |
| 1 file changed, 8 insertions(+), 2 deletions(-) |
| |
| --- a/drivers/nvdimm/pmem.c |
| +++ b/drivers/nvdimm/pmem.c |
| @@ -281,16 +281,22 @@ static long pmem_dax_direct_access(struc |
| return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); |
| } |
| |
| +/* |
| + * Use the 'no check' versions of copy_from_iter_flushcache() and |
| + * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds |
| + * checking, both file offset and device offset, is handled by |
| + * dax_iomap_actor() |
| + */ |
| static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, |
| void *addr, size_t bytes, struct iov_iter *i) |
| { |
| - return copy_from_iter_flushcache(addr, bytes, i); |
| + return _copy_from_iter_flushcache(addr, bytes, i); |
| } |
| |
| static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, |
| void *addr, size_t bytes, struct iov_iter *i) |
| { |
| - return copy_to_iter_mcsafe(addr, bytes, i); |
| + return _copy_to_iter_mcsafe(addr, bytes, i); |
| } |
| |
| static const struct dax_operations pmem_dax_ops = { |