blob: 3c2511a29f2ac2cc0467d0dd4c1e6793846c8456 [file] [log] [blame]
From 4c09f65999fcb3deea0106441860880c44f95112 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 3 Jul 2009 08:44:21 -0500
Subject: [PATCH] rtmutex: Adaptive locking. Spin when owner runs
commit 1efd0d597ba2f13fa061db2380f3994e3dd5d2bb in tip.
After talking with Gregory Haskins about how they implemented his
version of adaptive spinlocks and before I actually looked at their
code, I was thinking about it while lying in bed.
I always thought that adaptive spinlocks were to spin for a short
period of time based off of some heuristic and then sleep. This idea
is totally bogus. No heuristic can account for a bunch of different
activities. But Gregory mentioned something to me that made a hell of a lot
of sense. And that is to only spin while the owner is running.
If the owner is running, then it would seem that it would be quicker to
spin then to take the scheduling hit. While lying awake in bed, it dawned
on me that we could simply spin in the fast lock and never touch the
"has waiters" flag, which would keep the owner from going into the
slow path. Also, the task itself is preemptible while spinning so this
would not affect latencies.
The only trick was to not have the owner get freed between the time
you saw the owner and the time you check its run queue. This was
easily solved by simply grabing the RCU read lock because freeing
of a task must happen after a grace period.
I first tried to stay only in the fast path. This works fine until you want
to guarantee that the highest prio task gets the lock next. I tried all
sorts of hackeries and found that there was too many cases where we can
miss. I finally concurred with Gregory, and decided that going into the
slow path was the way to go.
I then started looking into what the guys over at Novell did. The had the
basic idea correct, but went way overboard in the implementation, making
it far more complex than it needed to be. I rewrote their work using the
ideas from my original patch, and simplified it quite a bit.
This is the patch that they wanted to do ;-)
Special thanks goes out to Gregory Haskins, Sven Dietrich and
Peter Morreale, for proving that adaptive spin locks certainly *can*
make a difference.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b0e18b..3bb5def 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2684,6 +2684,8 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
+extern int task_is_current(struct task_struct *task);
+
#endif /* __KERNEL__ */
#endif
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 1f12a75..be9864f 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -8,6 +8,12 @@
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
*
+ * Adaptive Spinlocks:
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ * and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
+ *
* See Documentation/rt-mutex-design.txt for details.
*/
#include <linux/spinlock.h>
@@ -664,6 +670,54 @@ update_current(unsigned long new_state, unsigned long *saved_state)
*saved_state = TASK_RUNNING;
}
+#ifdef CONFIG_SMP
+static int adaptive_wait(struct rt_mutex_waiter *waiter,
+ struct task_struct *orig_owner)
+{
+ int sleep = 0;
+
+ for (;;) {
+
+ /* we are the owner? */
+ if (!waiter->task)
+ break;
+
+ /*
+ * We need to read the owner of the lock and then check
+ * its state. But we can't let the owner task be freed
+ * while we read the state. We grab the rcu_lock and
+ * this makes sure that the owner task wont disappear
+ * between testing that it still has the lock, and checking
+ * its state.
+ */
+ rcu_read_lock();
+ /* Owner changed? Then lets update the original */
+ if (orig_owner != rt_mutex_owner(waiter->lock)) {
+ rcu_read_unlock();
+ break;
+ }
+
+ /* Owner went to bed, so should we */
+ if (!task_is_current(orig_owner)) {
+ sleep = 1;
+ rcu_read_unlock();
+ break;
+ }
+ rcu_read_unlock();
+
+ cpu_relax();
+ }
+
+ return sleep;
+}
+#else
+static int adaptive_wait(struct rt_mutex_waiter *waiter,
+ struct task_struct *orig_owner)
+{
+ return 1;
+}
+#endif
+
/*
* Slow path lock function spin_lock style: this variant is very
* careful not to miss any non-lock wakeups.
@@ -679,6 +733,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
{
struct rt_mutex_waiter waiter;
unsigned long saved_state, state, flags;
+ struct task_struct *orig_owner;
debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
@@ -729,13 +784,16 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
* the lock ! We restore lock_depth when we come back.
*/
current->lock_depth = -1;
+ orig_owner = rt_mutex_owner(lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
debug_rt_mutex_print_deadlock(&waiter);
- update_current(TASK_UNINTERRUPTIBLE, &saved_state);
- if (waiter.task)
- schedule_rt_mutex(lock);
+ if (adaptive_wait(&waiter, orig_owner)) {
+ update_current(TASK_UNINTERRUPTIBLE, &saved_state);
+ if (waiter.task)
+ schedule_rt_mutex(lock);
+ }
raw_spin_lock_irqsave(&lock->wait_lock, flags);
current->lock_depth = saved_lock_depth;
diff --git a/kernel/sched.c b/kernel/sched.c
index a5ffe46..ebbd311 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -666,6 +666,11 @@ inline void update_rq_clock(struct rq *rq)
rq->clock = sched_clock_cpu(cpu_of(rq));
}
+int task_is_current(struct task_struct *task)
+{
+ return task_rq(task)->curr == task;
+}
+
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
--
1.7.1.1