vma_iterator and /proc/$pid/maps

Add a vma_iterator which is a very thin wrapper over the maple tree
interfaces.  Use it in the implementation of /proc/$pid/maps.  Remove
it from the protection of the mmap_sem; switch to rcu_read_lock()
protection.

I believe hold_task_mempolicy() is safe to call without the protection
of the mmap_sem.  We have a reference on struct task_struct, and
changing ->mempolicy is protected by task_lock(), which is acquired
in hold_task_mempolicy().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379..5eca9cb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -288,7 +288,7 @@ struct proc_maps_private {
 	struct task_struct *task;
 	struct mm_struct *mm;
 #ifdef CONFIG_MMU
-	struct vm_area_struct *tail_vma;
+	struct vma_iterator iter;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *task_mempolicy;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 501d27a..a34c304 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,6 +122,22 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
 }
 #endif
 
+static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
+		loff_t *ppos)
+{
+	struct vm_area_struct *vma;
+
+	vma = vma_find(&priv->iter);
+	if (vma) {
+		*ppos = vma->vm_start;
+	} else {
+		*ppos = -2UL;
+		vma = get_gate_vma(priv->mm);
+	}
+
+	return vma;
+}
+
 static void *m_start(struct seq_file *m, loff_t *ppos)
 {
 	struct proc_maps_private *priv = m->private;
@@ -144,37 +160,24 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
 		return NULL;
 	}
 
-	if (mmap_read_lock_killable(mm)) {
-		mmput(mm);
-		put_task_struct(priv->task);
-		priv->task = NULL;
-		return ERR_PTR(-EINTR);
-	}
+	vma_iter_init(&priv->iter, mm, last_addr);
 
 	hold_task_mempolicy(priv);
-	priv->tail_vma = get_gate_vma(mm);
 
-	vma = find_vma(mm, last_addr);
-	if (vma)
-		return vma;
+	rcu_read_lock();
+	if (*ppos == -2UL)
+		return get_gate_vma(priv->mm);
 
-	return priv->tail_vma;
+	return proc_get_vma(priv, ppos);
 }
 
 static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
 {
-	struct proc_maps_private *priv = m->private;
-	struct vm_area_struct *next = NULL, *vma = v;
-
-	if (vma != priv->tail_vma) {
-		next = vma_next(vma->vm_mm, vma);
-		if (!next)
-			next = priv->tail_vma;
+	if (*ppos == -2UL) {
+		*ppos = -1UL;
+		return NULL;
 	}
-
-	*ppos = next ? next->vm_start : -1UL;
-
-	return next;
+	return proc_get_vma(m->private, ppos);
 }
 
 static void m_stop(struct seq_file *m, void *v)
@@ -185,8 +188,8 @@ static void m_stop(struct seq_file *m, void *v)
 	if (!priv->task)
 		return;
 
+	rcu_read_unlock();
 	release_task_mempolicy(priv);
-	mmap_read_unlock(mm);
 	mmput(mm);
 	put_task_struct(priv->task);
 	priv->task = NULL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db8a413..d8012a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -682,6 +682,16 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_ACCESS_FLAGS;
 }
 
+static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi)
+{
+	return mas_find(&vmi->state, ULONG_MAX);
+}
+
+static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
+{
+	return vmi->state.index;
+}
+
 #ifdef CONFIG_SHMEM
 /*
  * The vma_is_shmem is not inline because it is used only by slow
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fa7cc91..ab8fe3b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -575,6 +575,28 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return (struct cpumask *)&mm->cpu_bitmap;
 }
 
+struct vma_iterator {
+	struct ma_state state;
+};
+
+#define VMA_ITERATOR(name, mm, addr) {					\
+	struct vma_iterator name {					\
+		.state = {						\
+			.tree = &mm->mm_mt,				\
+			.index = addr,					\
+			.node = MAS_START,				\
+		}							\
+	}								\
+}
+
+static inline void vma_iter_init(struct vma_iterator *vmi,
+		struct mm_struct *mm, unsigned long addr)
+{
+	vmi->state.tree = &mm->mm_mt;
+	vmi->state.index = addr;
+	vmi->state.node = MAS_START;
+}
+
 struct mmu_gather;
 extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 				unsigned long start, unsigned long end);