blob: 9dbf61c224566c9ce2fed26775bcc146c714cff6 [file] [log] [blame]
From 17ef899c75aa15aeb7818b3b1fec7a84518d10e5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 6 Jun 2011 12:20:33 +0200
Subject: [PATCH] sched: Move mmdrop to RCU on RT
Takes sleeping locks and calls into the memory allocator, so nothing
we want to do in task switch and oder atomic contexts.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca3e517980a0..986c97b1a5bf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -11,6 +11,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
+#include <linux/rcupdate.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <asm/page.h>
@@ -507,6 +508,9 @@ struct mm_struct {
bool tlb_flush_pending;
#endif
struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ struct rcu_head delayed_drop;
+#endif
#ifdef CONFIG_X86_INTEL_MPX
/* address of the bounds directory */
void __user *bd_addr;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f50f34904f4..89319443d322 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2757,6 +2757,7 @@ extern struct mm_struct * mm_alloc(void);
/* mmdrop drops the mm and the page tables */
extern void __mmdrop(struct mm_struct *);
+
static inline void mmdrop(struct mm_struct *mm)
{
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
@@ -2768,6 +2769,17 @@ static inline bool mmget_not_zero(struct mm_struct *mm)
return atomic_inc_not_zero(&mm->mm_users);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+extern void __mmdrop_delayed(struct rcu_head *rhp);
+static inline void mmdrop_delayed(struct mm_struct *mm)
+{
+ if (atomic_dec_and_test(&mm->mm_count))
+ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+# define mmdrop_delayed(mm) mmdrop(mm)
+#endif
+
/* mmput gets rid of the mappings and all user-space */
extern void mmput(struct mm_struct *);
#ifdef CONFIG_MMU
diff --git a/kernel/fork.c b/kernel/fork.c
index bec1b14130e7..113771be0045 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -701,6 +701,19 @@ struct mm_struct *mm_alloc(void)
return mm_init(mm, current);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+/*
+ * RCU callback for delayed mm drop. Not strictly rcu, but we don't
+ * want another facility to make this work.
+ */
+void __mmdrop_delayed(struct rcu_head *rhp)
+{
+ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+ __mmdrop(mm);
+}
+#endif
+
/*
* Called when the last reference to the mm
* is dropped: either by a lazy thread or by
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 37b89c4a46db..56f08d6edcd0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2731,8 +2731,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current);
+ /*
+ * We use mmdrop_delayed() here so we don't have to do the
+ * full __mmdrop() when we are the last user.
+ */
if (mm)
- mmdrop(mm);
+ mmdrop_delayed(mm);
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -5443,6 +5447,8 @@ void sched_setnuma(struct task_struct *p, int nid)
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
+
/*
* Ensures that the idle task is using init_mm right before its cpu goes
* offline.
@@ -5457,7 +5463,11 @@ void idle_task_exit(void)
switch_mm_irqs_off(mm, &init_mm, current);
finish_arch_post_lock_switch();
}
- mmdrop(mm);
+ /*
+ * Defer the cleanup to an alive cpu. On RT we can neither
+ * call mmdrop() nor mmdrop_delayed() from here.
+ */
+ per_cpu(idle_last_mm, smp_processor_id()) = mm;
}
/*
@@ -7328,6 +7338,10 @@ int sched_cpu_dying(unsigned int cpu)
BUG_ON(rq->nr_running != 1);
raw_spin_unlock_irqrestore(&rq->lock, flags);
calc_load_migrate(rq);
+ if (per_cpu(idle_last_mm, cpu)) {
+ mmdrop(per_cpu(idle_last_mm, cpu));
+ per_cpu(idle_last_mm, cpu) = NULL;
+ }
update_max_interval();
nohz_balance_exit_idle(cpu);
hrtick_clear(rq);
--
2.5.0