| From: Christophe Leroy <christophe.leroy@csgroup.eu> |
| Subject: powerpc/e500: use contiguous PMD instead of hugepd |
| Date: Tue, 2 Jul 2024 15:51:32 +0200 |
| |
| e500 supports many page sizes among which the following size are |
| implemented in the kernel at the time being: 4M, 16M, 64M, 256M, 1G. |
| |
| On e500, TLB miss for hugepages is exclusively handled by SW even on e6500 |
| which has HW assistance for 4k pages, so there are no constraints like on |
| the 8xx. |
| |
| On e500/32, all are at PGD/PMD level and can be handled as cont-PMD. |
| |
| On e500/64, smaller ones are on PMD while bigger ones are on PUD. Again, |
| they can easily be handled as cont-PMD and cont-PUD instead of hugepd. |
| |
| On e500/32, use the pagesize bits in PTE to know if it is a PMD or a leaf |
| entry. This works because the pagesize bits are in the last 12 bits and |
| page tables are 4k aligned. |
| |
| On e500/64, use highest bit which is always 1 on PxD (Because PxD contains |
| virtual address of a kernel memory) and always 0 on PTEs because not all |
| bits of RPN are used/possible. |
| |
| Link: https://lkml.kernel.org/r/dd085987816ed2a0c70adb7e34966cb833fc03e1.1719928057.git.christophe.leroy@csgroup.eu |
| Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> |
| Cc: Jason Gunthorpe <jgg@nvidia.com> |
| Cc: Michael Ellerman <mpe@ellerman.id.au> |
| Cc: Nicholas Piggin <npiggin@gmail.com> |
| Cc: Oscar Salvador <osalvador@suse.de> |
| Cc: Peter Xu <peterx@redhat.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| arch/powerpc/include/asm/nohash/hugetlb-e500.h | 32 +---------- |
| arch/powerpc/include/asm/nohash/pgalloc.h | 2 |
| arch/powerpc/include/asm/nohash/pgtable.h | 42 +++++++++++---- |
| arch/powerpc/include/asm/nohash/pte-e500.h | 33 +++++++++++ |
| arch/powerpc/include/asm/page.h | 15 ----- |
| arch/powerpc/kernel/head_85xx.S | 21 ++----- |
| arch/powerpc/mm/hugetlbpage.c | 2 |
| arch/powerpc/mm/nohash/tlb_low_64e.S | 7 -- |
| arch/powerpc/mm/pgtable.c | 31 +++++++++++ |
| arch/powerpc/platforms/Kconfig.cputype | 1 |
| 10 files changed, 107 insertions(+), 79 deletions(-) |
| |
| --- a/arch/powerpc/include/asm/nohash/hugetlb-e500.h~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/include/asm/nohash/hugetlb-e500.h |
| @@ -2,38 +2,12 @@ |
| #ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H |
| #define _ASM_POWERPC_NOHASH_HUGETLB_E500_H |
| |
| -static inline pte_t *hugepd_page(hugepd_t hpd) |
| -{ |
| - if (WARN_ON(!hugepd_ok(hpd))) |
| - return NULL; |
| - |
| - return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); |
| -} |
| - |
| -static inline unsigned int hugepd_shift(hugepd_t hpd) |
| -{ |
| - return hpd_val(hpd) & HUGEPD_SHIFT_MASK; |
| -} |
| - |
| -static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, |
| - unsigned int pdshift) |
| -{ |
| - /* |
| - * On FSL BookE, we have multiple higher-level table entries that |
| - * point to the same hugepte. Just use the first one since they're all |
| - * identical. So for that case, idx=0. |
| - */ |
| - return hugepd_page(hpd); |
| -} |
| +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT |
| +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, |
| + pte_t pte, unsigned long sz); |
| |
| void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); |
| |
| -static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) |
| -{ |
| - /* We use the old format for PPC_E500 */ |
| - *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); |
| -} |
| - |
| static inline int check_and_get_huge_psize(int shift) |
| { |
| if (shift & 1) /* Not a power of 4 */ |
| --- a/arch/powerpc/include/asm/nohash/pgalloc.h~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/include/asm/nohash/pgalloc.h |
| @@ -44,8 +44,6 @@ static inline void pgtable_free(void *ta |
| } |
| } |
| |
| -#define get_hugepd_cache_index(x) (x) |
| - |
| static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) |
| { |
| unsigned long pgf = (unsigned long)table; |
| --- a/arch/powerpc/include/asm/nohash/pgtable.h~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/include/asm/nohash/pgtable.h |
| @@ -31,6 +31,13 @@ static inline pte_basic_t pte_update(str |
| |
| extern int icache_44x_need_flush; |
| |
| +#ifndef pte_huge_size |
| +static inline unsigned long pte_huge_size(pte_t pte) |
| +{ |
| + return PAGE_SIZE; |
| +} |
| +#endif |
| + |
| /* |
| * PTE updates. This function is called whenever an existing |
| * valid PTE is updated. This does -not- include set_pte_at() |
| @@ -52,11 +59,34 @@ static inline pte_basic_t pte_update(str |
| { |
| pte_basic_t old = pte_val(*p); |
| pte_basic_t new = (old & ~(pte_basic_t)clr) | set; |
| + unsigned long sz; |
| + unsigned long pdsize; |
| + int i; |
| |
| if (new == old) |
| return old; |
| |
| - *p = __pte(new); |
| + if (huge) |
| + sz = pte_huge_size(__pte(old)); |
| + else |
| + sz = PAGE_SIZE; |
| + |
| + if (sz < PMD_SIZE) |
| + pdsize = PAGE_SIZE; |
| + else if (sz < PUD_SIZE) |
| + pdsize = PMD_SIZE; |
| + else if (sz < P4D_SIZE) |
| + pdsize = PUD_SIZE; |
| + else if (sz < PGDIR_SIZE) |
| + pdsize = P4D_SIZE; |
| + else |
| + pdsize = PGDIR_SIZE; |
| + |
| + for (i = 0; i < sz / pdsize; i++, p++) { |
| + *p = __pte(new); |
| + if (new) |
| + new += (unsigned long long)(pdsize / PAGE_SIZE) << PTE_RPN_SHIFT; |
| + } |
| |
| if (IS_ENABLED(CONFIG_44x) && !is_kernel_addr(addr) && (old & _PAGE_EXEC)) |
| icache_44x_need_flush = 1; |
| @@ -340,16 +370,6 @@ static inline void __set_pte_at(struct m |
| |
| #define pgprot_writecombine pgprot_noncached_wc |
| |
| -#ifdef CONFIG_ARCH_HAS_HUGEPD |
| -static inline int hugepd_ok(hugepd_t hpd) |
| -{ |
| - /* We clear the top bit to indicate hugepd */ |
| - return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); |
| -} |
| - |
| -#define is_hugepd(hpd) (hugepd_ok(hpd)) |
| -#endif |
| - |
| int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); |
| void unmap_kernel_page(unsigned long va); |
| |
| --- a/arch/powerpc/include/asm/nohash/pte-e500.h~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/include/asm/nohash/pte-e500.h |
| @@ -101,6 +101,39 @@ static inline unsigned long pte_huge_siz |
| } |
| #define pte_huge_size pte_huge_size |
| |
| +static inline int pmd_leaf(pmd_t pmd) |
| +{ |
| + if (IS_ENABLED(CONFIG_PPC64)) |
| + return (long)pmd_val(pmd) > 0; |
| + else |
| + return pmd_val(pmd) & _PAGE_PSIZE_MSK; |
| +} |
| +#define pmd_leaf pmd_leaf |
| + |
| +static inline unsigned long pmd_leaf_size(pmd_t pmd) |
| +{ |
| + return pte_huge_size(__pte(pmd_val(pmd))); |
| +} |
| +#define pmd_leaf_size pmd_leaf_size |
| + |
| +#ifdef CONFIG_PPC64 |
| +static inline int pud_leaf(pud_t pud) |
| +{ |
| + if (IS_ENABLED(CONFIG_PPC64)) |
| + return (long)pud_val(pud) > 0; |
| + else |
| + return pud_val(pud) & _PAGE_PSIZE_MSK; |
| +} |
| +#define pud_leaf pud_leaf |
| + |
| +static inline unsigned long pud_leaf_size(pud_t pud) |
| +{ |
| + return pte_huge_size(__pte(pud_val(pud))); |
| +} |
| +#define pud_leaf_size pud_leaf_size |
| + |
| +#endif |
| + |
| #endif /* __ASSEMBLY__ */ |
| |
| #endif /* __KERNEL__ */ |
| --- a/arch/powerpc/include/asm/page.h~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/include/asm/page.h |
| @@ -269,20 +269,7 @@ static inline const void *pfn_to_kaddr(u |
| #define is_kernel_addr(x) ((x) >= TASK_SIZE) |
| #endif |
| |
| -#ifndef CONFIG_PPC_BOOK3S_64 |
| -/* |
| - * Use the top bit of the higher-level page table entries to indicate whether |
| - * the entries we point to contain hugepages. This works because we know that |
| - * the page tables live in kernel space. If we ever decide to support having |
| - * page tables at arbitrary addresses, this breaks and will have to change. |
| - */ |
| -#ifdef CONFIG_PPC64 |
| -#define PD_HUGE 0x8000000000000000UL |
| -#else |
| -#define PD_HUGE 0x80000000 |
| -#endif |
| - |
| -#else /* CONFIG_PPC_BOOK3S_64 */ |
| +#ifdef CONFIG_PPC_BOOK3S_64 |
| /* |
| * Book3S 64 stores real addresses in the hugepd entries to |
| * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE. |
| --- a/arch/powerpc/kernel/head_85xx.S~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/kernel/head_85xx.S |
| @@ -311,16 +311,14 @@ set_ivor: |
| rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ |
| add r12, r11, r12; \ |
| lwz r11, 4(r12); /* Get pgd/pmd entry */ \ |
| + rlwinm. r10, r11, 32 - _PAGE_PSIZE_SHIFT, 0x1e; /* get tsize*/ \ |
| + bne 1000f; /* Huge page (leaf entry) */ \ |
| rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ |
| - blt 1000f; /* Normal non-huge page */ \ |
| beq 2f; /* Bail if no table */ \ |
| - oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ |
| - andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ |
| - xor r12, r10, r11; /* drop size bits from pointer */ \ |
| - b 1001f; \ |
| -1000: rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ |
| + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ |
| li r10, 0; /* clear r10 */ \ |
| -1001: lwz r11, 4(r12); /* Get pte entry */ |
| + lwz r11, 4(r12); /* Get pte entry */ \ |
| +1000: |
| #else |
| #define FIND_PTE \ |
| rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ |
| @@ -735,17 +733,12 @@ finish_tlb_load: |
| lwz r15, 0(r14) |
| 100: stw r15, 0(r17) |
| |
| - /* |
| - * Calc MAS1_TSIZE from r10 (which has pshift encoded) |
| - * tlb_enc = (pshift - 10). |
| - */ |
| - subi r15, r10, 10 |
| mfspr r16, SPRN_MAS1 |
| - rlwimi r16, r15, 7, 20, 24 |
| + rlwimi r16, r10, MAS1_TSIZE_SHIFT, MAS1_TSIZE_MASK |
| mtspr SPRN_MAS1, r16 |
| |
| /* copy the pshift for use later */ |
| - mr r14, r10 |
| + addi r14, r10, _PAGE_PSIZE_SHIFT_OFFSET |
| |
| /* fall through */ |
| |
| --- a/arch/powerpc/mm/hugetlbpage.c~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/mm/hugetlbpage.c |
| @@ -625,8 +625,6 @@ static int __init hugetlbpage_init(void) |
| */ |
| if (pdshift > shift) { |
| pgtable_cache_add(pdshift - shift); |
| - } else if (IS_ENABLED(CONFIG_PPC_E500)) { |
| - pgtable_cache_add(PTE_T_ORDER); |
| } |
| |
| configured = true; |
| --- a/arch/powerpc/mm/nohash/tlb_low_64e.S~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/mm/nohash/tlb_low_64e.S |
| @@ -450,11 +450,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) |
| |
| tlb_miss_huge_e6500: |
| beq tlb_miss_fault_e6500 |
| - li r10,1 |
| - andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ |
| - rldimi r14,r10,63,0 /* Set PD_HUGE */ |
| - xor r14,r14,r15 /* Clear size bits */ |
| - ldx r14,0,r14 |
| + rlwinm r15,r14,32-_PAGE_PSIZE_SHIFT,0x1e |
| |
| /* |
| * Now we build the MAS for a huge page. |
| @@ -465,7 +461,6 @@ tlb_miss_huge_e6500: |
| * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler |
| */ |
| |
| - subi r15,r15,10 /* Convert psize to tsize */ |
| mfspr r10,SPRN_MAS1 |
| rlwinm r10,r10,0,~MAS1_IND |
| rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK |
| --- a/arch/powerpc/mm/pgtable.c~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/mm/pgtable.c |
| @@ -331,6 +331,37 @@ void set_huge_pte_at(struct mm_struct *m |
| __set_huge_pte_at(pmdp, ptep, pte_val(pte)); |
| } |
| } |
| +#elif defined(CONFIG_PPC_E500) |
| +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, |
| + pte_t pte, unsigned long sz) |
| +{ |
| + unsigned long pdsize; |
| + int i; |
| + |
| + pte = set_pte_filter(pte, addr); |
| + |
| + /* |
| + * Make sure hardware valid bit is not set. We don't do |
| + * tlb flush for this update. |
| + */ |
| + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); |
| + |
| + if (sz < PMD_SIZE) |
| + pdsize = PAGE_SIZE; |
| + else if (sz < PUD_SIZE) |
| + pdsize = PMD_SIZE; |
| + else if (sz < P4D_SIZE) |
| + pdsize = PUD_SIZE; |
| + else if (sz < PGDIR_SIZE) |
| + pdsize = P4D_SIZE; |
| + else |
| + pdsize = PGDIR_SIZE; |
| + |
| + for (i = 0; i < sz / pdsize; i++, ptep++, addr += pdsize) { |
| + __set_pte_at(mm, addr, ptep, pte, 0); |
| + pte = __pte(pte_val(pte) + ((unsigned long long)pdsize / PAGE_SIZE << PFN_PTE_SHIFT)); |
| + } |
| +} |
| #endif |
| #endif /* CONFIG_HUGETLB_PAGE */ |
| |
| --- a/arch/powerpc/platforms/Kconfig.cputype~powerpc-e500-use-contiguous-pmd-instead-of-hugepd |
| +++ a/arch/powerpc/platforms/Kconfig.cputype |
| @@ -291,7 +291,6 @@ config PPC_BOOK3S |
| config PPC_E500 |
| select FSL_EMB_PERFMON |
| bool |
| - select ARCH_HAS_HUGEPD if HUGETLB_PAGE |
| select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 |
| select PPC_SMP_MUXED_IPI |
| select PPC_DOORBELL |
| _ |