| From e4dc3489143f84f7ed30be58b886bb6772f229b9 Mon Sep 17 00:00:00 2001 |
| From: Qi Zheng <zhengqi.arch@bytedance.com> |
| Date: Fri, 23 Jul 2021 15:50:41 -0700 |
| Subject: mm: fix the deadlock in finish_fault() |
| |
| From: Qi Zheng <zhengqi.arch@bytedance.com> |
| |
| commit e4dc3489143f84f7ed30be58b886bb6772f229b9 upstream. |
| |
| Commit 63f3655f9501 ("mm, memcg: fix reclaim deadlock with writeback") |
| fix the following ABBA deadlock by pre-allocating the pte page table |
| without holding the page lock. |
| |
| lock_page(A) |
| SetPageWriteback(A) |
| unlock_page(A) |
| lock_page(B) |
| lock_page(B) |
| pte_alloc_one |
| shrink_page_list |
| wait_on_page_writeback(A) |
| SetPageWriteback(B) |
| unlock_page(B) |
| |
| # flush A, B to clear the writeback |
| |
| Commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() |
| codepaths") reworked the relevant code but ignored this race. This will |
| cause the deadlock above to appear again, so fix it. |
| |
| Link: https://lkml.kernel.org/r/20210721074849.57004-1-zhengqi.arch@bytedance.com |
| Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") |
| Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> |
| Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> |
| Cc: Thomas Gleixner <tglx@linutronix.de> |
| Cc: Johannes Weiner <hannes@cmpxchg.org> |
| Cc: Michal Hocko <mhocko@kernel.org> |
| Cc: Vladimir Davydov <vdavydov.dev@gmail.com> |
| Cc: Muchun Song <songmuchun@bytedance.com> |
| Cc: <stable@vger.kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| mm/memory.c | 11 ++++++++++- |
| 1 file changed, 10 insertions(+), 1 deletion(-) |
| |
| --- a/mm/memory.c |
| +++ b/mm/memory.c |
| @@ -3891,8 +3891,17 @@ vm_fault_t finish_fault(struct vm_fault |
| return ret; |
| } |
| |
| - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) |
| + if (vmf->prealloc_pte) { |
| + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); |
| + if (likely(pmd_none(*vmf->pmd))) { |
| + mm_inc_nr_ptes(vma->vm_mm); |
| + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); |
| + vmf->prealloc_pte = NULL; |
| + } |
| + spin_unlock(vmf->ptl); |
| + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { |
| return VM_FAULT_OOM; |
| + } |
| } |
| |
| /* See comment in handle_pte_fault() */ |