| From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> |
| Subject: mm: update mem char driver to use mmap_prepare |
| Date: Mon, 20 Oct 2025 13:11:31 +0100 |
| |
| Update the mem char driver (backing /dev/mem and /dev/zero) to use |
| f_op->mmap_prepare hook rather than the deprecated f_op->mmap. |
| |
| The /dev/zero implementation has a very unique and rather concerning |
| characteristic in that it converts MAP_PRIVATE mmap() mappings anonymous |
| when they are, in fact, not. |
| |
| The new f_op->mmap_prepare() can support this, but rather than introducing |
| a helper function to perform this hack (and risk introducing other users), |
| utilise the success hook to do so. |
| |
| We utilise the newly introduced shmem_zero_setup_desc() to allow for the |
| shared mapping case via an f_op->mmap_prepare() hook. |
| |
| We also use the desc->action_error_hook to filter the remap error to |
| -EAGAIN to keep behaviour consistent. |
| |
| Link: https://lkml.kernel.org/r/48f60764d7a6901819d1af778fa33b775d2e8c77.1760959442.git.lorenzo.stoakes@oracle.com |
| Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> |
| Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> |
| Cc: Alexander Gordeev <agordeev@linux.ibm.com> |
| Cc: Al Viro <viro@zeniv.linux.org.uk> |
| Cc: Andreas Larsson <andreas@gaisler.com> |
| Cc: Andrey Konovalov <andreyknvl@gmail.com> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Cc: Baolin Wang <baolin.wang@linux.alibaba.com> |
| Cc: Baoquan He <bhe@redhat.com> |
| Cc: Chatre, Reinette <reinette.chatre@intel.com> |
| Cc: Christian Borntraeger <borntraeger@linux.ibm.com> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Dan Williams <dan.j.williams@intel.com> |
| Cc: Dave Jiang <dave.jiang@intel.com> |
| Cc: Dave Martin <dave.martin@arm.com> |
| Cc: Dave Young <dyoung@redhat.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: David S. Miller <davem@davemloft.net> |
| Cc: Dmitriy Vyukov <dvyukov@google.com> |
| Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| Cc: Guo Ren <guoren@kernel.org> |
| Cc: Heiko Carstens <hca@linux.ibm.com> |
| Cc: Hugh Dickins <hughd@google.com> |
| Cc: James Morse <james.morse@arm.com> |
| Cc: Jan Kara <jack@suse.cz> |
| Cc: Jann Horn <jannh@google.com> |
| Cc: Jonathan Corbet <corbet@lwn.net> |
| Cc: Kevin Tian <kevin.tian@intel.com> |
| Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com> |
| Cc: Liam Howlett <liam.howlett@oracle.com> |
| Cc: "Luck, Tony" <tony.luck@intel.com> |
| Cc: Matthew Wilcox (Oracle) <willy@infradead.org> |
| Cc: Michal Hocko <mhocko@suse.com> |
| Cc: Mike Rapoport <rppt@kernel.org> |
| Cc: Muchun Song <muchun.song@linux.dev> |
| Cc: Nicolas Pitre <nico@fluxnic.net> |
| Cc: Oscar Salvador <osalvador@suse.de> |
| Cc: Pedro Falcato <pfalcato@suse.de> |
| Cc: Robin Murohy <robin.murphy@arm.com> |
| Cc: Sumanth Korikkar <sumanthk@linux.ibm.com> |
| Cc: Suren Baghdasaryan <surenb@google.com> |
| Cc: Sven Schnelle <svens@linux.ibm.com> |
| Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> |
| Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com> |
| Cc: Vasily Gorbik <gor@linux.ibm.com> |
| Cc: Vishal Verma <vishal.l.verma@intel.com> |
| Cc: Vivek Goyal <vgoyal@redhat.com> |
| Cc: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Will Deacon <will@kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| drivers/char/mem.c | 88 +++++++++++++++++++++++++------------------ |
| 1 file changed, 52 insertions(+), 36 deletions(-) |
| |
| --- a/drivers/char/mem.c~mm-update-mem-char-driver-to-use-mmap_prepare |
| +++ a/drivers/char/mem.c |
| @@ -304,13 +304,13 @@ static unsigned zero_mmap_capabilities(s |
| } |
| |
| /* can't do an in-place private mapping if there's no MMU */ |
| -static inline int private_mapping_ok(struct vm_area_struct *vma) |
| +static inline int private_mapping_ok(struct vm_area_desc *desc) |
| { |
| - return is_nommu_shared_mapping(vma->vm_flags); |
| + return is_nommu_shared_mapping(desc->vm_flags); |
| } |
| #else |
| |
| -static inline int private_mapping_ok(struct vm_area_struct *vma) |
| +static inline int private_mapping_ok(struct vm_area_desc *desc) |
| { |
| return 1; |
| } |
| @@ -322,46 +322,49 @@ static const struct vm_operations_struct |
| #endif |
| }; |
| |
| -static int mmap_mem(struct file *file, struct vm_area_struct *vma) |
| +static int mmap_filter_error(int err) |
| { |
| - size_t size = vma->vm_end - vma->vm_start; |
| - phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; |
| + return -EAGAIN; |
| +} |
| + |
| +static int mmap_mem_prepare(struct vm_area_desc *desc) |
| +{ |
| + struct file *file = desc->file; |
| + const size_t size = vma_desc_size(desc); |
| + const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT; |
| |
| /* Does it even fit in phys_addr_t? */ |
| - if (offset >> PAGE_SHIFT != vma->vm_pgoff) |
| + if (offset >> PAGE_SHIFT != desc->pgoff) |
| return -EINVAL; |
| |
| /* It's illegal to wrap around the end of the physical address space. */ |
| if (offset + (phys_addr_t)size - 1 < offset) |
| return -EINVAL; |
| |
| - if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) |
| + if (!valid_mmap_phys_addr_range(desc->pgoff, size)) |
| return -EINVAL; |
| |
| - if (!private_mapping_ok(vma)) |
| + if (!private_mapping_ok(desc)) |
| return -ENOSYS; |
| |
| - if (!range_is_allowed(vma->vm_pgoff, size)) |
| + if (!range_is_allowed(desc->pgoff, size)) |
| return -EPERM; |
| |
| - if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, |
| - &vma->vm_page_prot)) |
| + if (!phys_mem_access_prot_allowed(file, desc->pgoff, size, |
| + &desc->page_prot)) |
| return -EINVAL; |
| |
| - vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, |
| - size, |
| - vma->vm_page_prot); |
| - |
| - vma->vm_ops = &mmap_mem_ops; |
| - |
| - /* Remap-pfn-range will mark the range VM_IO */ |
| - if (remap_pfn_range(vma, |
| - vma->vm_start, |
| - vma->vm_pgoff, |
| - size, |
| - vma->vm_page_prot)) { |
| - return -EAGAIN; |
| - } |
| + desc->page_prot = phys_mem_access_prot(file, desc->pgoff, |
| + size, |
| + desc->page_prot); |
| + |
| + desc->vm_ops = &mmap_mem_ops; |
| + |
| + /* Remap-pfn-range will mark the range VM_IO. */ |
| + mmap_action_remap_full(desc, desc->pgoff); |
| + /* We filter remap errors to -EAGAIN. */ |
| + desc->action.error_hook = mmap_filter_error; |
| + |
| return 0; |
| } |
| |
| @@ -501,14 +504,26 @@ static ssize_t read_zero(struct file *fi |
| return cleared; |
| } |
| |
| -static int mmap_zero(struct file *file, struct vm_area_struct *vma) |
| +static int mmap_zero_private_success(const struct vm_area_struct *vma) |
| +{ |
| + /* |
| + * This is a highly unique situation where we mark a MAP_PRIVATE mapping |
| + * of /dev/zero anonymous, despite it not being. |
| + */ |
| + vma_set_anonymous((struct vm_area_struct *)vma); |
| + |
| + return 0; |
| +} |
| + |
| +static int mmap_zero_prepare(struct vm_area_desc *desc) |
| { |
| #ifndef CONFIG_MMU |
| return -ENOSYS; |
| #endif |
| - if (vma->vm_flags & VM_SHARED) |
| - return shmem_zero_setup(vma); |
| - vma_set_anonymous(vma); |
| + if (desc->vm_flags & VM_SHARED) |
| + return shmem_zero_setup_desc(desc); |
| + |
| + desc->action.success_hook = mmap_zero_private_success; |
| return 0; |
| } |
| |
| @@ -526,10 +541,11 @@ static unsigned long get_unmapped_area_z |
| { |
| if (flags & MAP_SHARED) { |
| /* |
| - * mmap_zero() will call shmem_zero_setup() to create a file, |
| - * so use shmem's get_unmapped_area in case it can be huge; |
| - * and pass NULL for file as in mmap.c's get_unmapped_area(), |
| - * so as not to confuse shmem with our handle on "/dev/zero". |
| + * mmap_zero_prepare() will call shmem_zero_setup() to create a |
| + * file, so use shmem's get_unmapped_area in case it can be |
| + * huge; and pass NULL for file as in mmap.c's |
| + * get_unmapped_area(), so as not to confuse shmem with our |
| + * handle on "/dev/zero". |
| */ |
| return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); |
| } |
| @@ -632,7 +648,7 @@ static const struct file_operations __ma |
| .llseek = memory_lseek, |
| .read = read_mem, |
| .write = write_mem, |
| - .mmap = mmap_mem, |
| + .mmap_prepare = mmap_mem_prepare, |
| .open = open_mem, |
| #ifndef CONFIG_MMU |
| .get_unmapped_area = get_unmapped_area_mem, |
| @@ -668,7 +684,7 @@ static const struct file_operations zero |
| .write_iter = write_iter_zero, |
| .splice_read = copy_splice_read, |
| .splice_write = splice_write_zero, |
| - .mmap = mmap_zero, |
| + .mmap_prepare = mmap_zero_prepare, |
| .get_unmapped_area = get_unmapped_area_zero, |
| #ifndef CONFIG_MMU |
| .mmap_capabilities = zero_mmap_capabilities, |
| _ |