vma_iterator and /proc/$pid/maps
Add a vma_iterator which is a very thin wrapper over the maple tree
interfaces. Use it in the implementation of /proc/$pid/maps. Remove
it from the protection of the mmap_sem; switch to rcu_read_lock()
protection.
I believe hold_task_mempolicy() is safe to call without the protection
of the mmap_sem. We have a reference on struct task_struct, and
changing ->mempolicy is protected by task_lock(), which is acquired
in hold_task_mempolicy().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379..5eca9cb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -288,7 +288,7 @@ struct proc_maps_private {
struct task_struct *task;
struct mm_struct *mm;
#ifdef CONFIG_MMU
- struct vm_area_struct *tail_vma;
+ struct vma_iterator iter;
#endif
#ifdef CONFIG_NUMA
struct mempolicy *task_mempolicy;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 501d27a..a34c304 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,6 +122,22 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
+static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
+ loff_t *ppos)
+{
+ struct vm_area_struct *vma;
+
+ vma = vma_find(&priv->iter);
+ if (vma) {
+ *ppos = vma->vm_start;
+ } else {
+ *ppos = -2UL;
+ vma = get_gate_vma(priv->mm);
+ }
+
+ return vma;
+}
+
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
@@ -144,37 +160,24 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return NULL;
}
- if (mmap_read_lock_killable(mm)) {
- mmput(mm);
- put_task_struct(priv->task);
- priv->task = NULL;
- return ERR_PTR(-EINTR);
- }
+ vma_iter_init(&priv->iter, mm, last_addr);
hold_task_mempolicy(priv);
- priv->tail_vma = get_gate_vma(mm);
- vma = find_vma(mm, last_addr);
- if (vma)
- return vma;
+ rcu_read_lock();
+ if (*ppos == -2UL)
+ return get_gate_vma(priv->mm);
- return priv->tail_vma;
+ return proc_get_vma(priv, ppos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *next = NULL, *vma = v;
-
- if (vma != priv->tail_vma) {
- next = vma_next(vma->vm_mm, vma);
- if (!next)
- next = priv->tail_vma;
+ if (*ppos == -2UL) {
+ *ppos = -1UL;
+ return NULL;
}
-
- *ppos = next ? next->vm_start : -1UL;
-
- return next;
+ return proc_get_vma(m->private, ppos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -185,8 +188,8 @@ static void m_stop(struct seq_file *m, void *v)
if (!priv->task)
return;
+ rcu_read_unlock();
release_task_mempolicy(priv);
- mmap_read_unlock(mm);
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db8a413..d8012a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -682,6 +682,16 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
+static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi)
+{
+ return mas_find(&vmi->state, ULONG_MAX);
+}
+
+static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
+{
+ return vmi->state.index;
+}
+
#ifdef CONFIG_SHMEM
/*
* The vma_is_shmem is not inline because it is used only by slow
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fa7cc91..ab8fe3b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -575,6 +575,28 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
+struct vma_iterator {
+ struct ma_state state;
+};
+
+#define VMA_ITERATOR(name, mm, addr) { \
+ struct vma_iterator name { \
+ .state = { \
+ .tree = &mm->mm_mt, \
+ .index = addr, \
+ .node = MAS_START, \
+ } \
+ } \
+}
+
+static inline void vma_iter_init(struct vma_iterator *vmi,
+ struct mm_struct *mm, unsigned long addr)
+{
+ vmi->state.tree = &mm->mm_mt;
+ vmi->state.index = addr;
+ vmi->state.node = MAS_START;
+}
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
unsigned long start, unsigned long end);