| From bf10239c8e5a92be04e259ce37833cb9e826c36d Mon Sep 17 00:00:00 2001 |
| From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> |
| Date: Tue, 31 Jul 2012 16:45:52 -0700 |
| Subject: [PATCH] mm: mmu_notifier: fix freed page still mapped in secondary |
| MMU |
| |
| commit 3ad3d901bbcfb15a5e4690e55350db0899095a68 upstream. |
| |
| mmu_notifier_release() is called when the process is exiting. It will |
| delete all the mmu notifiers. But at this time the page belonging to the |
| process is still present in page tables and is present on the LRU list, so |
| this race will happen: |
| |
| CPU 0 CPU 1 |
| mmu_notifier_release: try_to_unmap: |
| hlist_del_init_rcu(&mn->hlist); |
| ptep_clear_flush_notify: |
| mmu nofifler not found |
| free page !!!!!! |
| /* |
| * At the point, the page has been |
| * freed, but it is still mapped in |
| * the secondary MMU. |
| */ |
| |
| mn->ops->release(mn, mm); |
| |
| Then the box is not stable and sometimes we can get this bug: |
| |
| [ 738.075923] BUG: Bad page state in process migrate-perf pfn:03bec |
| [ 738.075931] page:ffffea00000efb00 count:0 mapcount:0 mapping: (null) index:0x8076 |
| [ 738.075936] page flags: 0x20000000000014(referenced|dirty) |
| |
| The same issue is present in mmu_notifier_unregister(). |
| |
| We can call ->release before deleting the notifier to ensure the page has |
| been unmapped from the secondary MMU before it is freed. |
| |
| Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> |
| Cc: Avi Kivity <avi@redhat.com> |
| Cc: Marcelo Tosatti <mtosatti@redhat.com> |
| Cc: Paul Gortmaker <paul.gortmaker@windriver.com> |
| Cc: Andrea Arcangeli <aarcange@redhat.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| --- |
| mm/mmu_notifier.c | 45 +++++++++++++++++++++++---------------------- |
| 1 file changed, 23 insertions(+), 22 deletions(-) |
| |
| diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c |
| index 438951d366f2..0b54146f986e 100644 |
| --- a/mm/mmu_notifier.c |
| +++ b/mm/mmu_notifier.c |
| @@ -33,6 +33,24 @@ |
| void __mmu_notifier_release(struct mm_struct *mm) |
| { |
| struct mmu_notifier *mn; |
| + struct hlist_node *n; |
| + |
| + /* |
| + * RCU here will block mmu_notifier_unregister until |
| + * ->release returns. |
| + */ |
| + rcu_read_lock(); |
| + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) |
| + /* |
| + * if ->release runs before mmu_notifier_unregister it |
| + * must be handled as it's the only way for the driver |
| + * to flush all existing sptes and stop the driver |
| + * from establishing any more sptes before all the |
| + * pages in the mm are freed. |
| + */ |
| + if (mn->ops->release) |
| + mn->ops->release(mn, mm); |
| + rcu_read_unlock(); |
| |
| spin_lock(&mm->mmu_notifier_mm->lock); |
| while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
| @@ -46,23 +64,6 @@ void __mmu_notifier_release(struct mm_struct *mm) |
| * mmu_notifier_unregister to return. |
| */ |
| hlist_del_init_rcu(&mn->hlist); |
| - /* |
| - * RCU here will block mmu_notifier_unregister until |
| - * ->release returns. |
| - */ |
| - rcu_read_lock(); |
| - spin_unlock(&mm->mmu_notifier_mm->lock); |
| - /* |
| - * if ->release runs before mmu_notifier_unregister it |
| - * must be handled as it's the only way for the driver |
| - * to flush all existing sptes and stop the driver |
| - * from establishing any more sptes before all the |
| - * pages in the mm are freed. |
| - */ |
| - if (mn->ops->release) |
| - mn->ops->release(mn, mm); |
| - rcu_read_unlock(); |
| - spin_lock(&mm->mmu_notifier_mm->lock); |
| } |
| spin_unlock(&mm->mmu_notifier_mm->lock); |
| |
| @@ -264,16 +265,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) |
| { |
| BUG_ON(atomic_read(&mm->mm_count) <= 0); |
| |
| - spin_lock(&mm->mmu_notifier_mm->lock); |
| if (!hlist_unhashed(&mn->hlist)) { |
| - hlist_del_rcu(&mn->hlist); |
| - |
| /* |
| * RCU here will force exit_mmap to wait ->release to finish |
| * before freeing the pages. |
| */ |
| rcu_read_lock(); |
| - spin_unlock(&mm->mmu_notifier_mm->lock); |
| + |
| /* |
| * exit_mmap will block in mmu_notifier_release to |
| * guarantee ->release is called before freeing the |
| @@ -282,8 +280,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) |
| if (mn->ops->release) |
| mn->ops->release(mn, mm); |
| rcu_read_unlock(); |
| - } else |
| + |
| + spin_lock(&mm->mmu_notifier_mm->lock); |
| + hlist_del_rcu(&mn->hlist); |
| spin_unlock(&mm->mmu_notifier_mm->lock); |
| + } |
| |
| /* |
| * Wait any running method to finish, of course including |
| -- |
| 1.8.5.2 |
| |