| From: "Liam R. Howlett" <Liam.Howlett@oracle.com> |
| Subject: mm: enable maple tree RCU mode by default |
| Date: Mon, 27 Feb 2023 09:36:07 -0800 |
| |
| Use the maple tree in RCU mode for VMA tracking. |
| |
| The maple tree tracks the stack and is able to update the pivot |
| (lower/upper boundary) in-place to allow the page fault handler to write |
| to the tree while holding just the mmap read lock. This is safe as the |
| writes to the stack have a guard VMA which ensures there will always be a |
| NULL in the direction of the growth and thus will only update a pivot. |
| |
| It is possible, but not recommended, to have VMAs that grow up/down |
| without guard VMAs. syzbot has constructed a testcase which sets up a VMA |
| to grow and consume the empty space. Overwriting the entire NULL entry |
| causes the tree to be altered in a way that is not safe for concurrent |
| readers; the readers may see a node being rewritten or one that does not |
| match the maple state they are using. |
| |
| Enabling RCU mode allows the concurrent readers to see a stable node and |
| will return the expected result. |
| |
| [Liam.Howlett@Oracle.com: we don't need to free the nodes with RCU[ |
| Link: https://lore.kernel.org/linux-mm/000000000000b0a65805f663ace6@google.com/ |
| Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com |
| Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") |
| Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> |
| Signed-off-by: Suren Baghdasaryan <surenb@google.com> |
| Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com |
| Cc: <stable@vger.kernel.org> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| --- |
| |
| include/linux/mm_types.h | 3 ++- |
| kernel/fork.c | 3 +++ |
| mm/mmap.c | 3 ++- |
| 3 files changed, 7 insertions(+), 2 deletions(-) |
| |
| --- a/include/linux/mm_types.h~mm-enable-maple-tree-rcu-mode-by-default |
| +++ a/include/linux/mm_types.h |
| @@ -774,7 +774,8 @@ struct mm_struct { |
| unsigned long cpu_bitmap[]; |
| }; |
| |
| -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) |
| +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ |
| + MT_FLAGS_USE_RCU) |
| extern struct mm_struct init_mm; |
| |
| /* Pointer magic because the dynamic array size confuses some compilers. */ |
| --- a/kernel/fork.c~mm-enable-maple-tree-rcu-mode-by-default |
| +++ a/kernel/fork.c |
| @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(str |
| if (retval) |
| goto out; |
| |
| + mt_clear_in_rcu(vmi.mas.tree); |
| for_each_vma(old_vmi, mpnt) { |
| struct file *file; |
| |
| @@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(str |
| retval = arch_dup_mmap(oldmm, mm); |
| loop_out: |
| vma_iter_free(&vmi); |
| + if (!retval) |
| + mt_set_in_rcu(vmi.mas.tree); |
| out: |
| mmap_write_unlock(mm); |
| flush_tlb_mm(oldmm); |
| --- a/mm/mmap.c~mm-enable-maple-tree-rcu-mode-by-default |
| +++ a/mm/mmap.c |
| @@ -2277,7 +2277,7 @@ do_vmi_align_munmap(struct vma_iterator |
| int count = 0; |
| int error = -ENOMEM; |
| MA_STATE(mas_detach, &mt_detach, 0, 0); |
| - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); |
| + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); |
| mt_set_external_lock(&mt_detach, &mm->mmap_lock); |
| |
| /* |
| @@ -3037,6 +3037,7 @@ void exit_mmap(struct mm_struct *mm) |
| */ |
| set_bit(MMF_OOM_SKIP, &mm->flags); |
| mmap_write_lock(mm); |
| + mt_clear_in_rcu(&mm->mm_mt); |
| free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, |
| USER_PGTABLES_CEILING); |
| tlb_finish_mmu(&tlb); |
| _ |