| From: Alistair Popple <apopple@nvidia.com> |
| Subject: mm/memremap.c: take a pgmap reference on page allocation |
| Date: Wed, 28 Sep 2022 22:01:17 +1000 |
| |
| ZONE_DEVICE pages have a struct dev_pagemap which is allocated by a |
| driver. When the struct page is first allocated by the kernel in |
| memremap_pages() a reference is taken on the associated pagemap to ensure |
| it is not freed prior to the pages being freed. |
| |
| Prior to 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page |
| refcount") pages were considered free and returned to the driver when the |
| reference count dropped to one. However the pagemap reference was not |
| dropped until the page reference count hit zero. This would occur as part |
| of the final put_page() in memunmap_pages() which would wait for all pages |
| to be freed prior to returning. |
| |
| When the extra refcount was removed the pagemap reference was no longer |
| being dropped in put_page(). Instead memunmap_pages() was changed to |
| explicitly drop the pagemap references. This means that memunmap_pages() |
| can complete even though pages are still mapped by the kernel which can |
| lead to kernel crashes, particularly if a driver frees the pagemap. |
| |
| To fix this drivers should take a pagemap reference when allocating the |
| page. This reference can then be returned when the page is freed. |
| |
| Link: https://lkml.kernel.org/r/12d155ec727935ebfbb4d639a03ab374917ea51b.1664366292.git-series.apopple@nvidia.com |
| Signed-off-by: Alistair Popple <apopple@nvidia.com> |
| Fixes: 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page refcount") |
| Cc: Jason Gunthorpe <jgg@nvidia.com> |
| Cc: Felix Kuehling <Felix.Kuehling@amd.com> |
| Cc: Alex Deucher <alexander.deucher@amd.com> |
| Cc: Christian Kรถnig <christian.koenig@amd.com> |
| Cc: Ben Skeggs <bskeggs@redhat.com> |
| Cc: Lyude Paul <lyude@redhat.com> |
| Cc: Ralph Campbell <rcampbell@nvidia.com> |
| Cc: Alex Sierra <alex.sierra@amd.com> |
| Cc: John Hubbard <jhubbard@nvidia.com> |
| Cc: Dan Williams <dan.j.williams@intel.com> |
| |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: "Huang, Ying" <ying.huang@intel.com> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Cc: Michael Ellerman <mpe@ellerman.id.au> |
| Cc: Yang Shi <shy828301@gmail.com> |
| Cc: Zi Yan <ziy@nvidia.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/memremap.c | 25 +++++++++++++++++++------ |
| 1 file changed, 19 insertions(+), 6 deletions(-) |
| |
| --- a/mm/memremap.c~mm-memremapc-take-a-pgmap-reference-on-page-allocation |
| +++ a/mm/memremap.c |
| @@ -138,8 +138,11 @@ void memunmap_pages(struct dev_pagemap * |
| int i; |
| |
| percpu_ref_kill(&pgmap->ref); |
| - for (i = 0; i < pgmap->nr_range; i++) |
| - percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i)); |
| + if (pgmap->type != MEMORY_DEVICE_PRIVATE && |
| + pgmap->type != MEMORY_DEVICE_COHERENT) |
| + for (i = 0; i < pgmap->nr_range; i++) |
| + percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i)); |
| + |
| wait_for_completion(&pgmap->done); |
| |
| for (i = 0; i < pgmap->nr_range; i++) |
| @@ -264,7 +267,9 @@ static int pagemap_range(struct dev_page |
| memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], |
| PHYS_PFN(range->start), |
| PHYS_PFN(range_len(range)), pgmap); |
| - percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id)); |
| + if (pgmap->type != MEMORY_DEVICE_PRIVATE && |
| + pgmap->type != MEMORY_DEVICE_COHERENT) |
| + percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id)); |
| return 0; |
| |
| err_add_memory: |
| @@ -502,16 +507,24 @@ void free_zone_device_page(struct page * |
| page->mapping = NULL; |
| page->pgmap->ops->page_free(page); |
| |
| - /* |
| - * Reset the page count to 1 to prepare for handing out the page again. |
| - */ |
| if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && |
| page->pgmap->type != MEMORY_DEVICE_COHERENT) |
| + /* |
| + * Reset the page count to 1 to prepare for handing out the page |
| + * again. |
| + */ |
| set_page_count(page, 1); |
| + else |
| + put_dev_pagemap(page->pgmap); |
| } |
| |
| void zone_device_page_init(struct page *page) |
| { |
| + /* |
| + * Drivers shouldn't be allocating pages after calling |
| + * memunmap_pages(). |
| + */ |
| + WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref)); |
| set_page_count(page, 1); |
| lock_page(page); |
| } |
| _ |