| From 4c09f65999fcb3deea0106441860880c44f95112 Mon Sep 17 00:00:00 2001 |
| From: Steven Rostedt <srostedt@redhat.com> |
| Date: Fri, 3 Jul 2009 08:44:21 -0500 |
| Subject: [PATCH] rtmutex: Adaptive locking. Spin when owner runs |
| |
| commit 1efd0d597ba2f13fa061db2380f3994e3dd5d2bb in tip. |
| |
| After talking with Gregory Haskins about how they implemented his |
| version of adaptive spinlocks and before I actually looked at their |
| code, I was thinking about it while lying in bed. |
| |
| I always thought that adaptive spinlocks were to spin for a short |
| period of time based off of some heuristic and then sleep. This idea |
| is totally bogus. No heuristic can account for a bunch of different |
| activities. But Gregory mentioned something to me that made a hell of a lot |
| of sense. And that is to only spin while the owner is running. |
| |
| If the owner is running, then it would seem that it would be quicker to |
| spin then to take the scheduling hit. While lying awake in bed, it dawned |
| on me that we could simply spin in the fast lock and never touch the |
| "has waiters" flag, which would keep the owner from going into the |
| slow path. Also, the task itself is preemptible while spinning so this |
| would not affect latencies. |
| |
| The only trick was to not have the owner get freed between the time |
| you saw the owner and the time you check its run queue. This was |
| easily solved by simply grabing the RCU read lock because freeing |
| of a task must happen after a grace period. |
| |
| I first tried to stay only in the fast path. This works fine until you want |
| to guarantee that the highest prio task gets the lock next. I tried all |
| sorts of hackeries and found that there was too many cases where we can |
| miss. I finally concurred with Gregory, and decided that going into the |
| slow path was the way to go. |
| |
| I then started looking into what the guys over at Novell did. The had the |
| basic idea correct, but went way overboard in the implementation, making |
| it far more complex than it needed to be. I rewrote their work using the |
| ideas from my original patch, and simplified it quite a bit. |
| |
| This is the patch that they wanted to do ;-) |
| |
| Special thanks goes out to Gregory Haskins, Sven Dietrich and |
| Peter Morreale, for proving that adaptive spin locks certainly *can* |
| make a difference. |
| |
| Signed-off-by: Steven Rostedt <srostedt@redhat.com> |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index 4b0e18b..3bb5def 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -2684,6 +2684,8 @@ static inline unsigned long rlimit_max(unsigned int limit) |
| return task_rlimit_max(current, limit); |
| } |
| |
| +extern int task_is_current(struct task_struct *task); |
| + |
| #endif /* __KERNEL__ */ |
| |
| #endif |
| diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c |
| index 1f12a75..be9864f 100644 |
| --- a/kernel/rtmutex.c |
| +++ b/kernel/rtmutex.c |
| @@ -8,6 +8,12 @@ |
| * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt |
| * Copyright (C) 2006 Esben Nielsen |
| * |
| + * Adaptive Spinlocks: |
| + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, |
| + * and Peter Morreale, |
| + * Adaptive Spinlocks simplification: |
| + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> |
| + * |
| * See Documentation/rt-mutex-design.txt for details. |
| */ |
| #include <linux/spinlock.h> |
| @@ -664,6 +670,54 @@ update_current(unsigned long new_state, unsigned long *saved_state) |
| *saved_state = TASK_RUNNING; |
| } |
| |
| +#ifdef CONFIG_SMP |
| +static int adaptive_wait(struct rt_mutex_waiter *waiter, |
| + struct task_struct *orig_owner) |
| +{ |
| + int sleep = 0; |
| + |
| + for (;;) { |
| + |
| + /* we are the owner? */ |
| + if (!waiter->task) |
| + break; |
| + |
| + /* |
| + * We need to read the owner of the lock and then check |
| + * its state. But we can't let the owner task be freed |
| + * while we read the state. We grab the rcu_lock and |
| + * this makes sure that the owner task wont disappear |
| + * between testing that it still has the lock, and checking |
| + * its state. |
| + */ |
| + rcu_read_lock(); |
| + /* Owner changed? Then lets update the original */ |
| + if (orig_owner != rt_mutex_owner(waiter->lock)) { |
| + rcu_read_unlock(); |
| + break; |
| + } |
| + |
| + /* Owner went to bed, so should we */ |
| + if (!task_is_current(orig_owner)) { |
| + sleep = 1; |
| + rcu_read_unlock(); |
| + break; |
| + } |
| + rcu_read_unlock(); |
| + |
| + cpu_relax(); |
| + } |
| + |
| + return sleep; |
| +} |
| +#else |
| +static int adaptive_wait(struct rt_mutex_waiter *waiter, |
| + struct task_struct *orig_owner) |
| +{ |
| + return 1; |
| +} |
| +#endif |
| + |
| /* |
| * Slow path lock function spin_lock style: this variant is very |
| * careful not to miss any non-lock wakeups. |
| @@ -679,6 +733,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock) |
| { |
| struct rt_mutex_waiter waiter; |
| unsigned long saved_state, state, flags; |
| + struct task_struct *orig_owner; |
| |
| debug_rt_mutex_init_waiter(&waiter); |
| waiter.task = NULL; |
| @@ -729,13 +784,16 @@ rt_spin_lock_slowlock(struct rt_mutex *lock) |
| * the lock ! We restore lock_depth when we come back. |
| */ |
| current->lock_depth = -1; |
| + orig_owner = rt_mutex_owner(lock); |
| raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
| |
| debug_rt_mutex_print_deadlock(&waiter); |
| |
| - update_current(TASK_UNINTERRUPTIBLE, &saved_state); |
| - if (waiter.task) |
| - schedule_rt_mutex(lock); |
| + if (adaptive_wait(&waiter, orig_owner)) { |
| + update_current(TASK_UNINTERRUPTIBLE, &saved_state); |
| + if (waiter.task) |
| + schedule_rt_mutex(lock); |
| + } |
| |
| raw_spin_lock_irqsave(&lock->wait_lock, flags); |
| current->lock_depth = saved_lock_depth; |
| diff --git a/kernel/sched.c b/kernel/sched.c |
| index a5ffe46..ebbd311 100644 |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -666,6 +666,11 @@ inline void update_rq_clock(struct rq *rq) |
| rq->clock = sched_clock_cpu(cpu_of(rq)); |
| } |
| |
| +int task_is_current(struct task_struct *task) |
| +{ |
| + return task_rq(task)->curr == task; |
| +} |
| + |
| /* |
| * Tunables that become constants when CONFIG_SCHED_DEBUG is off: |
| */ |
| -- |
| 1.7.1.1 |
| |