| From: Miaohe Lin <linmiaohe@huawei.com> |
| Subject: mm/hugetlb: make detecting shared pte more reliable |
| Date: Tue, 16 Aug 2022 21:05:53 +0800 |
| |
| If the pagetables are shared, we shouldn't copy or take references. Since |
| src could have unshared and dst shares with another vma, huge_pte_none() |
| is thus used to determine whether dst_pte is shared. But this check isn't |
| reliable. A shared pte could have pte none in pagetable in fact. The |
| page count of ptep page should be checked here in order to reliably |
| determine whether pte is shared. |
| |
| [lukas.bulwahn@gmail.com: remove unused local variable dst_entry in copy_hugetlb_page_range()] |
| Link: https://lkml.kernel.org/r/20220822082525.26071-1-lukas.bulwahn@gmail.com |
| Link: https://lkml.kernel.org/r/20220816130553.31406-7-linmiaohe@huawei.com |
| Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> |
| Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com> |
| Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> |
| Cc: Muchun Song <songmuchun@bytedance.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| mm/hugetlb.c | 21 ++++++++------------- |
| 1 file changed, 8 insertions(+), 13 deletions(-) |
| |
| --- a/mm/hugetlb.c~mm-hugetlb-make-detecting-shared-pte-more-reliable |
| +++ a/mm/hugetlb.c |
| @@ -4750,7 +4750,7 @@ int copy_hugetlb_page_range(struct mm_st |
| struct vm_area_struct *dst_vma, |
| struct vm_area_struct *src_vma) |
| { |
| - pte_t *src_pte, *dst_pte, entry, dst_entry; |
| + pte_t *src_pte, *dst_pte, entry; |
| struct page *ptepage; |
| unsigned long addr; |
| bool cow = is_cow_mapping(src_vma->vm_flags); |
| @@ -4795,15 +4795,13 @@ int copy_hugetlb_page_range(struct mm_st |
| |
| /* |
| * If the pagetables are shared don't copy or take references. |
| - * dst_pte == src_pte is the common case of src/dest sharing. |
| * |
| + * dst_pte == src_pte is the common case of src/dest sharing. |
| * However, src could have 'unshared' and dst shares with |
| - * another vma. If dst_pte !none, this implies sharing. |
| - * Check here before taking page table lock, and once again |
| - * after taking the lock below. |
| + * another vma. So page_count of ptep page is checked instead |
| + * to reliably determine whether pte is shared. |
| */ |
| - dst_entry = huge_ptep_get(dst_pte); |
| - if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) { |
| + if (page_count(virt_to_page(dst_pte)) > 1) { |
| addr |= last_addr_mask; |
| continue; |
| } |
| @@ -4812,13 +4810,10 @@ int copy_hugetlb_page_range(struct mm_st |
| src_ptl = huge_pte_lockptr(h, src, src_pte); |
| spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
| entry = huge_ptep_get(src_pte); |
| - dst_entry = huge_ptep_get(dst_pte); |
| again: |
| - if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { |
| + if (huge_pte_none(entry)) { |
| /* |
| - * Skip if src entry none. Also, skip in the |
| - * unlikely case dst entry !none as this implies |
| - * sharing with another vma. |
| + * Skip if src entry none. |
| */ |
| ; |
| } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { |
| @@ -4897,7 +4892,7 @@ again: |
| restore_reserve_on_error(h, dst_vma, addr, |
| new); |
| put_page(new); |
| - /* dst_entry won't change as in child */ |
| + /* huge_ptep of dst_pte won't change as in child */ |
| goto again; |
| } |
| hugetlb_install_page(dst_vma, dst_pte, addr, new); |
| _ |