| From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com> |
| Subject: s390: convert various pgalloc functions to use ptdescs |
| Date: Mon, 7 Aug 2023 16:04:56 -0700 |
| |
| As part of the conversions to replace pgtable constructor/destructors with |
| ptdesc equivalents, convert various page table functions to use ptdescs. |
| |
| Some of the functions use the *get*page*() helper functions. Convert |
| these to use pagetable_alloc() and ptdesc_address() instead to help |
| standardize page tables further. |
| |
| Link: https://lkml.kernel.org/r/20230807230513.102486-15-vishal.moola@gmail.com |
| Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> |
| Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> |
| Cc: Arnd Bergmann <arnd@arndb.de> |
| Cc: Catalin Marinas <catalin.marinas@arm.com> |
| Cc: Christophe Leroy <christophe.leroy@csgroup.eu> |
| Cc: Claudio Imbrenda <imbrenda@linux.ibm.com> |
| Cc: Dave Hansen <dave.hansen@linux.intel.com> |
| Cc: David Hildenbrand <david@redhat.com> |
| Cc: "David S. Miller" <davem@davemloft.net> |
| Cc: Dinh Nguyen <dinguyen@kernel.org> |
| Cc: Geert Uytterhoeven <geert@linux-m68k.org> |
| Cc: Geert Uytterhoeven <geert+renesas@glider.be> |
| Cc: Guo Ren <guoren@kernel.org> |
| Cc: Huacai Chen <chenhuacai@kernel.org> |
| Cc: Hugh Dickins <hughd@google.com> |
| Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> |
| Cc: Jonas Bonn <jonas@southpole.se> |
| Cc: Matthew Wilcox <willy@infradead.org> |
| Cc: Palmer Dabbelt <palmer@rivosinc.com> |
| Cc: Paul Walmsley <paul.walmsley@sifive.com> |
| Cc: Richard Weinberger <richard@nod.at> |
| Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> |
| Cc: Yoshinori Sato <ysato@users.sourceforge.jp> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| arch/s390/include/asm/pgalloc.h | 4 |
| arch/s390/include/asm/tlb.h | 4 |
| arch/s390/mm/pgalloc.c | 128 +++++++++++++++--------------- |
| 3 files changed, 69 insertions(+), 67 deletions(-) |
| |
| --- a/arch/s390/include/asm/pgalloc.h~s390-convert-various-pgalloc-functions-to-use-ptdescs |
| +++ a/arch/s390/include/asm/pgalloc.h |
| @@ -86,7 +86,7 @@ static inline pmd_t *pmd_alloc_one(struc |
| if (!table) |
| return NULL; |
| crst_table_init(table, _SEGMENT_ENTRY_EMPTY); |
| - if (!pgtable_pmd_page_ctor(virt_to_page(table))) { |
| + if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { |
| crst_table_free(mm, table); |
| return NULL; |
| } |
| @@ -97,7 +97,7 @@ static inline void pmd_free(struct mm_st |
| { |
| if (mm_pmd_folded(mm)) |
| return; |
| - pgtable_pmd_page_dtor(virt_to_page(pmd)); |
| + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); |
| crst_table_free(mm, (unsigned long *) pmd); |
| } |
| |
| --- a/arch/s390/include/asm/tlb.h~s390-convert-various-pgalloc-functions-to-use-ptdescs |
| +++ a/arch/s390/include/asm/tlb.h |
| @@ -89,12 +89,12 @@ static inline void pmd_free_tlb(struct m |
| { |
| if (mm_pmd_folded(tlb->mm)) |
| return; |
| - pgtable_pmd_page_dtor(virt_to_page(pmd)); |
| + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); |
| __tlb_adjust_range(tlb, address, PAGE_SIZE); |
| tlb->mm->context.flush_mm = 1; |
| tlb->freed_tables = 1; |
| tlb->cleared_puds = 1; |
| - tlb_remove_table(tlb, pmd); |
| + tlb_remove_ptdesc(tlb, pmd); |
| } |
| |
| /* |
| --- a/arch/s390/mm/pgalloc.c~s390-convert-various-pgalloc-functions-to-use-ptdescs |
| +++ a/arch/s390/mm/pgalloc.c |
| @@ -43,17 +43,17 @@ __initcall(page_table_register_sysctl); |
| |
| unsigned long *crst_table_alloc(struct mm_struct *mm) |
| { |
| - struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); |
| + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); |
| |
| - if (!page) |
| + if (!ptdesc) |
| return NULL; |
| - arch_set_page_dat(page, CRST_ALLOC_ORDER); |
| - return (unsigned long *) page_to_virt(page); |
| + arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); |
| + return (unsigned long *) ptdesc_to_virt(ptdesc); |
| } |
| |
| void crst_table_free(struct mm_struct *mm, unsigned long *table) |
| { |
| - free_pages((unsigned long)table, CRST_ALLOC_ORDER); |
| + pagetable_free(virt_to_ptdesc(table)); |
| } |
| |
| static void __crst_table_upgrade(void *arg) |
| @@ -140,21 +140,21 @@ static inline unsigned int atomic_xor_bi |
| |
| struct page *page_table_alloc_pgste(struct mm_struct *mm) |
| { |
| - struct page *page; |
| + struct ptdesc *ptdesc; |
| u64 *table; |
| |
| - page = alloc_page(GFP_KERNEL); |
| - if (page) { |
| - table = (u64 *)page_to_virt(page); |
| + ptdesc = pagetable_alloc(GFP_KERNEL, 0); |
| + if (ptdesc) { |
| + table = (u64 *)ptdesc_to_virt(ptdesc); |
| memset64(table, _PAGE_INVALID, PTRS_PER_PTE); |
| memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); |
| } |
| - return page; |
| + return ptdesc_page(ptdesc); |
| } |
| |
| void page_table_free_pgste(struct page *page) |
| { |
| - __free_page(page); |
| + pagetable_free(page_ptdesc(page)); |
| } |
| |
| #endif /* CONFIG_PGSTE */ |
| @@ -242,7 +242,7 @@ void page_table_free_pgste(struct page * |
| unsigned long *page_table_alloc(struct mm_struct *mm) |
| { |
| unsigned long *table; |
| - struct page *page; |
| + struct ptdesc *ptdesc; |
| unsigned int mask, bit; |
| |
| /* Try to get a fragment of a 4K page as a 2K page table */ |
| @@ -250,9 +250,9 @@ unsigned long *page_table_alloc(struct m |
| table = NULL; |
| spin_lock_bh(&mm->context.lock); |
| if (!list_empty(&mm->context.pgtable_list)) { |
| - page = list_first_entry(&mm->context.pgtable_list, |
| - struct page, lru); |
| - mask = atomic_read(&page->_refcount) >> 24; |
| + ptdesc = list_first_entry(&mm->context.pgtable_list, |
| + struct ptdesc, pt_list); |
| + mask = atomic_read(&ptdesc->_refcount) >> 24; |
| /* |
| * The pending removal bits must also be checked. |
| * Failure to do so might lead to an impossible |
| @@ -264,13 +264,13 @@ unsigned long *page_table_alloc(struct m |
| */ |
| mask = (mask | (mask >> 4)) & 0x03U; |
| if (mask != 0x03U) { |
| - table = (unsigned long *) page_to_virt(page); |
| + table = (unsigned long *) ptdesc_to_virt(ptdesc); |
| bit = mask & 1; /* =1 -> second 2K */ |
| if (bit) |
| table += PTRS_PER_PTE; |
| - atomic_xor_bits(&page->_refcount, |
| + atomic_xor_bits(&ptdesc->_refcount, |
| 0x01U << (bit + 24)); |
| - list_del_init(&page->lru); |
| + list_del_init(&ptdesc->pt_list); |
| } |
| } |
| spin_unlock_bh(&mm->context.lock); |
| @@ -278,28 +278,28 @@ unsigned long *page_table_alloc(struct m |
| return table; |
| } |
| /* Allocate a fresh page */ |
| - page = alloc_page(GFP_KERNEL); |
| - if (!page) |
| + ptdesc = pagetable_alloc(GFP_KERNEL, 0); |
| + if (!ptdesc) |
| return NULL; |
| - if (!pgtable_pte_page_ctor(page)) { |
| - __free_page(page); |
| + if (!pagetable_pte_ctor(ptdesc)) { |
| + pagetable_free(ptdesc); |
| return NULL; |
| } |
| - arch_set_page_dat(page, 0); |
| + arch_set_page_dat(ptdesc_page(ptdesc), 0); |
| /* Initialize page table */ |
| - table = (unsigned long *) page_to_virt(page); |
| + table = (unsigned long *) ptdesc_to_virt(ptdesc); |
| if (mm_alloc_pgste(mm)) { |
| /* Return 4K page table with PGSTEs */ |
| - INIT_LIST_HEAD(&page->lru); |
| - atomic_xor_bits(&page->_refcount, 0x03U << 24); |
| + INIT_LIST_HEAD(&ptdesc->pt_list); |
| + atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); |
| memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); |
| memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); |
| } else { |
| /* Return the first 2K fragment of the page */ |
| - atomic_xor_bits(&page->_refcount, 0x01U << 24); |
| + atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24); |
| memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); |
| spin_lock_bh(&mm->context.lock); |
| - list_add(&page->lru, &mm->context.pgtable_list); |
| + list_add(&ptdesc->pt_list, &mm->context.pgtable_list); |
| spin_unlock_bh(&mm->context.lock); |
| } |
| return table; |
| @@ -322,19 +322,18 @@ static void page_table_release_check(str |
| |
| static void pte_free_now(struct rcu_head *head) |
| { |
| - struct page *page; |
| + struct ptdesc *ptdesc; |
| |
| - page = container_of(head, struct page, rcu_head); |
| - pgtable_pte_page_dtor(page); |
| - __free_page(page); |
| + ptdesc = container_of(head, struct ptdesc, pt_rcu_head); |
| + pagetable_pte_dtor(ptdesc); |
| + pagetable_free(ptdesc); |
| } |
| |
| void page_table_free(struct mm_struct *mm, unsigned long *table) |
| { |
| unsigned int mask, bit, half; |
| - struct page *page; |
| + struct ptdesc *ptdesc = virt_to_ptdesc(table); |
| |
| - page = virt_to_page(table); |
| if (!mm_alloc_pgste(mm)) { |
| /* Free 2K page table fragment of a 4K page */ |
| bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); |
| @@ -344,51 +343,50 @@ void page_table_free(struct mm_struct *m |
| * will happen outside of the critical section from this |
| * function or from __tlb_remove_table() |
| */ |
| - mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); |
| mask >>= 24; |
| - if ((mask & 0x03U) && !PageActive(page)) { |
| + if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { |
| /* |
| * Other half is allocated, and neither half has had |
| * its free deferred: add page to head of list, to make |
| * this freed half available for immediate reuse. |
| */ |
| - list_add(&page->lru, &mm->context.pgtable_list); |
| + list_add(&ptdesc->pt_list, &mm->context.pgtable_list); |
| } else { |
| /* If page is on list, now remove it. */ |
| - list_del_init(&page->lru); |
| + list_del_init(&ptdesc->pt_list); |
| } |
| spin_unlock_bh(&mm->context.lock); |
| - mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24)); |
| mask >>= 24; |
| if (mask != 0x00U) |
| return; |
| half = 0x01U << bit; |
| } else { |
| half = 0x03U; |
| - mask = atomic_xor_bits(&page->_refcount, 0x03U << 24); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); |
| mask >>= 24; |
| } |
| |
| - page_table_release_check(page, table, half, mask); |
| - if (TestClearPageActive(page)) |
| - call_rcu(&page->rcu_head, pte_free_now); |
| + page_table_release_check(ptdesc_page(ptdesc), table, half, mask); |
| + if (folio_test_clear_active(ptdesc_folio(ptdesc))) |
| + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); |
| else |
| - pte_free_now(&page->rcu_head); |
| + pte_free_now(&ptdesc->pt_rcu_head); |
| } |
| |
| void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, |
| unsigned long vmaddr) |
| { |
| struct mm_struct *mm; |
| - struct page *page; |
| unsigned int bit, mask; |
| + struct ptdesc *ptdesc = virt_to_ptdesc(table); |
| |
| mm = tlb->mm; |
| - page = virt_to_page(table); |
| if (mm_alloc_pgste(mm)) { |
| gmap_unlink(mm, table, vmaddr); |
| table = (unsigned long *) ((unsigned long)table | 0x03U); |
| - tlb_remove_table(tlb, table); |
| + tlb_remove_ptdesc(tlb, table); |
| return; |
| } |
| bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); |
| @@ -398,19 +396,19 @@ void page_table_free_rcu(struct mmu_gath |
| * outside of the critical section from __tlb_remove_table() or from |
| * page_table_free() |
| */ |
| - mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24)); |
| mask >>= 24; |
| - if ((mask & 0x03U) && !PageActive(page)) { |
| + if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) { |
| /* |
| * Other half is allocated, and neither half has had |
| * its free deferred: add page to end of list, to make |
| * this freed half available for reuse once its pending |
| * bit has been cleared by __tlb_remove_table(). |
| */ |
| - list_add_tail(&page->lru, &mm->context.pgtable_list); |
| + list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list); |
| } else { |
| /* If page is on list, now remove it. */ |
| - list_del_init(&page->lru); |
| + list_del_init(&ptdesc->pt_list); |
| } |
| spin_unlock_bh(&mm->context.lock); |
| table = (unsigned long *) ((unsigned long) table | (0x01U << bit)); |
| @@ -421,30 +419,30 @@ void __tlb_remove_table(void *_table) |
| { |
| unsigned int mask = (unsigned long) _table & 0x03U, half = mask; |
| void *table = (void *)((unsigned long) _table ^ mask); |
| - struct page *page = virt_to_page(table); |
| + struct ptdesc *ptdesc = virt_to_ptdesc(table); |
| |
| switch (half) { |
| case 0x00U: /* pmd, pud, or p4d */ |
| - free_pages((unsigned long)table, CRST_ALLOC_ORDER); |
| + pagetable_free(ptdesc); |
| return; |
| case 0x01U: /* lower 2K of a 4K page table */ |
| case 0x02U: /* higher 2K of a 4K page table */ |
| - mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24)); |
| mask >>= 24; |
| if (mask != 0x00U) |
| return; |
| break; |
| case 0x03U: /* 4K page table with pgstes */ |
| - mask = atomic_xor_bits(&page->_refcount, 0x03U << 24); |
| + mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24); |
| mask >>= 24; |
| break; |
| } |
| |
| - page_table_release_check(page, table, half, mask); |
| - if (TestClearPageActive(page)) |
| - call_rcu(&page->rcu_head, pte_free_now); |
| + page_table_release_check(ptdesc_page(ptdesc), table, half, mask); |
| + if (folio_test_clear_active(ptdesc_folio(ptdesc))) |
| + call_rcu(&ptdesc->pt_rcu_head, pte_free_now); |
| else |
| - pte_free_now(&page->rcu_head); |
| + pte_free_now(&ptdesc->pt_rcu_head); |
| } |
| |
| #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| @@ -488,16 +486,20 @@ static void base_pgt_free(unsigned long |
| static unsigned long *base_crst_alloc(unsigned long val) |
| { |
| unsigned long *table; |
| + struct ptdesc *ptdesc; |
| |
| - table = (unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER); |
| - if (table) |
| - crst_table_init(table, val); |
| + ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER); |
| + if (!ptdesc) |
| + return NULL; |
| + table = ptdesc_address(ptdesc); |
| + |
| + crst_table_init(table, val); |
| return table; |
| } |
| |
| static void base_crst_free(unsigned long *table) |
| { |
| - free_pages((unsigned long)table, CRST_ALLOC_ORDER); |
| + pagetable_free(virt_to_ptdesc(table)); |
| } |
| |
| #define BASE_ADDR_END_FUNC(NAME, SIZE) \ |
| _ |