| From: "Matthew Wilcox (Oracle)" <willy@infradead.org> |
| Subject: mm: add default definition of set_ptes() |
| Date: Wed, 2 Aug 2023 16:13:34 +0100 |
| |
| Most architectures can just define set_pte() and PFN_PTE_SHIFT to use this |
| definition. It's also a handy spot to document the guarantees provided by |
| the MM. |
| |
| Link: https://lkml.kernel.org/r/20230802151406.3735276-7-willy@infradead.org |
| Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> |
| Suggested-by: Mike Rapoport (IBM) <rppt@kernel.org> |
| Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/pgtable.h | 81 ++++++++++++++++++++++++++++---------- |
| 1 file changed, 60 insertions(+), 21 deletions(-) |
| |
| --- a/include/linux/pgtable.h~mm-add-default-definition-of-set_ptes |
| +++ a/include/linux/pgtable.h |
| @@ -182,6 +182,66 @@ static inline int pmd_young(pmd_t pmd) |
| } |
| #endif |
| |
| +/* |
| + * A facility to provide lazy MMU batching. This allows PTE updates and |
| + * page invalidations to be delayed until a call to leave lazy MMU mode |
| + * is issued. Some architectures may benefit from doing this, and it is |
| + * beneficial for both shadow and direct mode hypervisors, which may batch |
| + * the PTE updates which happen during this window. Note that using this |
| + * interface requires that read hazards be removed from the code. A read |
| + * hazard could result in the direct mode hypervisor case, since the actual |
| + * write to the page tables may not yet have taken place, so reads though |
| + * a raw PTE pointer after it has been modified are not guaranteed to be |
| + * up to date. This mode can only be entered and left under the protection of |
| + * the page table locks for all page tables which may be modified. In the UP |
| + * case, this is required so that preemption is disabled, and in the SMP case, |
| + * it must synchronize the delayed page table writes properly on other CPUs. |
| + */ |
| +#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
| +#define arch_enter_lazy_mmu_mode() do {} while (0) |
| +#define arch_leave_lazy_mmu_mode() do {} while (0) |
| +#define arch_flush_lazy_mmu_mode() do {} while (0) |
| +#endif |
| + |
| +#ifndef set_ptes |
| +#ifdef PFN_PTE_SHIFT |
| +/** |
| + * set_ptes - Map consecutive pages to a contiguous range of addresses. |
| + * @mm: Address space to map the pages into. |
| + * @addr: Address to map the first page at. |
| + * @ptep: Page table pointer for the first entry. |
| + * @pte: Page table entry for the first page. |
| + * @nr: Number of pages to map. |
| + * |
| + * May be overridden by the architecture, or the architecture can define |
| + * set_pte() and PFN_PTE_SHIFT. |
| + * |
| + * Context: The caller holds the page table lock. The pages all belong |
| + * to the same folio. The PTEs are all in the same PMD. |
| + */ |
| +static inline void set_ptes(struct mm_struct *mm, unsigned long addr, |
| + pte_t *ptep, pte_t pte, unsigned int nr) |
| +{ |
| + page_table_check_ptes_set(mm, ptep, pte, nr); |
| + |
| + arch_enter_lazy_mmu_mode(); |
| + for (;;) { |
| + set_pte(ptep, pte); |
| + if (--nr == 0) |
| + break; |
| + ptep++; |
| + pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); |
| + } |
| + arch_leave_lazy_mmu_mode(); |
| +} |
| +#ifndef set_pte_at |
| +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) |
| +#endif |
| +#endif |
| +#else |
| +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) |
| +#endif |
| + |
| #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
| extern int ptep_set_access_flags(struct vm_area_struct *vma, |
| unsigned long address, pte_t *ptep, |
| @@ -1052,27 +1112,6 @@ static inline pgprot_t pgprot_modify(pgp |
| #endif |
| |
| /* |
| - * A facility to provide lazy MMU batching. This allows PTE updates and |
| - * page invalidations to be delayed until a call to leave lazy MMU mode |
| - * is issued. Some architectures may benefit from doing this, and it is |
| - * beneficial for both shadow and direct mode hypervisors, which may batch |
| - * the PTE updates which happen during this window. Note that using this |
| - * interface requires that read hazards be removed from the code. A read |
| - * hazard could result in the direct mode hypervisor case, since the actual |
| - * write to the page tables may not yet have taken place, so reads though |
| - * a raw PTE pointer after it has been modified are not guaranteed to be |
| - * up to date. This mode can only be entered and left under the protection of |
| - * the page table locks for all page tables which may be modified. In the UP |
| - * case, this is required so that preemption is disabled, and in the SMP case, |
| - * it must synchronize the delayed page table writes properly on other CPUs. |
| - */ |
| -#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
| -#define arch_enter_lazy_mmu_mode() do {} while (0) |
| -#define arch_leave_lazy_mmu_mode() do {} while (0) |
| -#define arch_flush_lazy_mmu_mode() do {} while (0) |
| -#endif |
| - |
| -/* |
| * A facility to provide batching of the reload of page tables and |
| * other process state with the actual context switch code for |
| * paravirtualized guests. By convention, only one of the batched |
| _ |