rtmutex-Adaptive-locking.-Spin-when-owner-runs.patch - pub/scm/linux/kernel/git/paulg/rt-patches - Git at Google

 From 4c09f65999fcb3deea0106441860880c44f95112 Mon Sep 17 00:00:00 2001
 From: Steven Rostedt <srostedt@redhat.com>
 Date: Fri, 3 Jul 2009 08:44:21 -0500
 Subject: [PATCH] rtmutex: Adaptive locking. Spin when owner runs

 commit 1efd0d597ba2f13fa061db2380f3994e3dd5d2bb in tip.

 After talking with Gregory Haskins about how they implemented his
 version of adaptive spinlocks and before I actually looked at their
 code, I was thinking about it while lying in bed.

 I always thought that adaptive spinlocks were to spin for a short
 period of time based off of some heuristic and then sleep. This idea
 is totally bogus. No heuristic can account for a bunch of different
 activities. But Gregory mentioned something to me that made a hell of a lot
 of sense. And that is to only spin while the owner is running.

 If the owner is running, then it would seem that it would be quicker to
 spin then to take the scheduling hit. While lying awake in bed, it dawned
 on me that we could simply spin in the fast lock and never touch the
 "has waiters" flag, which would keep the owner from going into the
 slow path. Also, the task itself is preemptible while spinning so this
 would not affect latencies.

 The only trick was to not have the owner get freed between the time
 you saw the owner and the time you check its run queue. This was
 easily solved by simply grabing the RCU read lock because freeing
 of a task must happen after a grace period.

 I first tried to stay only in the fast path. This works fine until you want
 to guarantee that the highest prio task gets the lock next. I tried all
 sorts of hackeries and found that there was too many cases where we can
 miss. I finally concurred with Gregory, and decided that going into the
 slow path was the way to go.

 I then started looking into what the guys over at Novell did. The had the
 basic idea correct, but went way overboard in the implementation, making
 it far more complex than it needed to be. I rewrote their work using the
 ideas from my original patch, and simplified it quite a bit.

 This is the patch that they wanted to do ;-)

 Special thanks goes out to Gregory Haskins, Sven Dietrich and
 Peter Morreale, for proving that adaptive spin locks certainly *can*
 make a difference.

 Signed-off-by: Steven Rostedt <srostedt@redhat.com>
 Signed-off-by: Ingo Molnar <mingo@elte.hu>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

 diff --git a/include/linux/sched.h b/include/linux/sched.h
 index 4b0e18b..3bb5def 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -2684,6 +2684,8 @@ static inline unsigned long rlimit_max(unsigned int limit)
  	return task_rlimit_max(current, limit);
  }

 +extern int task_is_current(struct task_struct *task);
 +
  #endif /* __KERNEL__ */

  #endif
 diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
 index 1f12a75..be9864f 100644
 --- a/kernel/rtmutex.c
 +++ b/kernel/rtmutex.c
 @@ -8,6 +8,12 @@
   *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   *  Copyright (C) 2006 Esben Nielsen
   *
 + * Adaptive Spinlocks:
 + *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
 + *                                   and Peter Morreale,
 + * Adaptive Spinlocks simplification:
 + *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
 + *
   *  See Documentation/rt-mutex-design.txt for details.
   */
  #include <linux/spinlock.h>
 @@ -664,6 +670,54 @@ update_current(unsigned long new_state, unsigned long *saved_state)
  		*saved_state = TASK_RUNNING;
  }

 +#ifdef CONFIG_SMP
 +static int adaptive_wait(struct rt_mutex_waiter *waiter,
 +			 struct task_struct *orig_owner)
 +{
 +	int sleep = 0;
 +
 +	for (;;) {
 +
 +		/* we are the owner? */
 +		if (!waiter->task)
 +			break;
 +
 +		/*
 +		 * We need to read the owner of the lock and then check
 +		 * its state. But we can't let the owner task be freed
 +		 * while we read the state. We grab the rcu_lock and
 +		 * this makes sure that the owner task wont disappear
 +		 * between testing that it still has the lock, and checking
 +		 * its state.
 +		 */
 +		rcu_read_lock();
 +		/* Owner changed? Then lets update the original */
 +		if (orig_owner != rt_mutex_owner(waiter->lock)) {
 +			rcu_read_unlock();
 +			break;
 +		}
 +
 +		/* Owner went to bed, so should we */
 +		if (!task_is_current(orig_owner)) {
 +			sleep = 1;
 +			rcu_read_unlock();
 +			break;
 +		}
 +		rcu_read_unlock();
 +
 +		cpu_relax();
 +	}
 +
 +	return sleep;
 +}
 +#else
 +static int adaptive_wait(struct rt_mutex_waiter *waiter,
 +			 struct task_struct *orig_owner)
 +{
 +	return 1;
 +}
 +#endif
 +
  /*
   * Slow path lock function spin_lock style: this variant is very
   * careful not to miss any non-lock wakeups.
 @@ -679,6 +733,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
  {
  	struct rt_mutex_waiter waiter;
  	unsigned long saved_state, state, flags;
 +	struct task_struct *orig_owner;

  	debug_rt_mutex_init_waiter(&waiter);
  	waiter.task = NULL;
 @@ -729,13 +784,16 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
  		 * the lock ! We restore lock_depth when we come back.
  		 */
  		current->lock_depth = -1;
 +		orig_owner = rt_mutex_owner(lock);
  		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

  		debug_rt_mutex_print_deadlock(&waiter);

 -		update_current(TASK_UNINTERRUPTIBLE, &saved_state);
 -		if (waiter.task)
 -			schedule_rt_mutex(lock);
 +		if (adaptive_wait(&waiter, orig_owner)) {
 +			update_current(TASK_UNINTERRUPTIBLE, &saved_state);
 +			if (waiter.task)
 +				schedule_rt_mutex(lock);
 +		}

  		raw_spin_lock_irqsave(&lock->wait_lock, flags);
  		current->lock_depth = saved_lock_depth;
 diff --git a/kernel/sched.c b/kernel/sched.c
 index a5ffe46..ebbd311 100644
 --- a/kernel/sched.c
 +++ b/kernel/sched.c
 @@ -666,6 +666,11 @@ inline void update_rq_clock(struct rq *rq)
  	rq->clock = sched_clock_cpu(cpu_of(rq));
  }

 +int task_is_current(struct task_struct *task)
 +{
 +	return task_rq(task)->curr == task;
 +}
 +
  /*
   * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
   */
 --
 1.7.1.1
	From 4c09f65999fcb3deea0106441860880c44f95112 Mon Sep 17 00:00:00 2001
	From: Steven Rostedt <srostedt@redhat.com>
	Date: Fri, 3 Jul 2009 08:44:21 -0500
	Subject: [PATCH] rtmutex: Adaptive locking. Spin when owner runs

	commit 1efd0d597ba2f13fa061db2380f3994e3dd5d2bb in tip.

	After talking with Gregory Haskins about how they implemented his
	version of adaptive spinlocks and before I actually looked at their
	code, I was thinking about it while lying in bed.

	I always thought that adaptive spinlocks were to spin for a short
	period of time based off of some heuristic and then sleep. This idea
	is totally bogus. No heuristic can account for a bunch of different
	activities. But Gregory mentioned something to me that made a hell of a lot
	of sense. And that is to only spin while the owner is running.

	If the owner is running, then it would seem that it would be quicker to
	spin then to take the scheduling hit. While lying awake in bed, it dawned
	on me that we could simply spin in the fast lock and never touch the
	"has waiters" flag, which would keep the owner from going into the
	slow path. Also, the task itself is preemptible while spinning so this
	would not affect latencies.

	The only trick was to not have the owner get freed between the time
	you saw the owner and the time you check its run queue. This was
	easily solved by simply grabing the RCU read lock because freeing
	of a task must happen after a grace period.

	I first tried to stay only in the fast path. This works fine until you want
	to guarantee that the highest prio task gets the lock next. I tried all
	sorts of hackeries and found that there was too many cases where we can
	miss. I finally concurred with Gregory, and decided that going into the
	slow path was the way to go.

	I then started looking into what the guys over at Novell did. The had the
	basic idea correct, but went way overboard in the implementation, making
	it far more complex than it needed to be. I rewrote their work using the
	ideas from my original patch, and simplified it quite a bit.

	This is the patch that they wanted to do ;-)

	Special thanks goes out to Gregory Haskins, Sven Dietrich and
	Peter Morreale, for proving that adaptive spin locks certainly can
	make a difference.

	Signed-off-by: Steven Rostedt <srostedt@redhat.com>
	Signed-off-by: Ingo Molnar <mingo@elte.hu>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

	diff --git a/include/linux/sched.h b/include/linux/sched.h
	index 4b0e18b..3bb5def 100644
	--- a/include/linux/sched.h
	+++ b/include/linux/sched.h
	@@ -2684,6 +2684,8 @@ static inline unsigned long rlimit_max(unsigned int limit)
	return task_rlimit_max(current, limit);
	}

	+extern int task_is_current(struct task_struct *task);
	+
	#endif /* __KERNEL__ */

	#endif
	diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
	index 1f12a75..be9864f 100644
	--- a/kernel/rtmutex.c
	+++ b/kernel/rtmutex.c
	@@ -8,6 +8,12 @@
	* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
	* Copyright (C) 2006 Esben Nielsen
	*
	+ * Adaptive Spinlocks:
	+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
	+ * and Peter Morreale,
	+ * Adaptive Spinlocks simplification:
	+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
	+ *
	* See Documentation/rt-mutex-design.txt for details.
	*/
	#include <linux/spinlock.h>
	@@ -664,6 +670,54 @@ update_current(unsigned long new_state, unsigned long *saved_state)
	*saved_state = TASK_RUNNING;
	}

	+#ifdef CONFIG_SMP
	+static int adaptive_wait(struct rt_mutex_waiter *waiter,
	+ struct task_struct *orig_owner)
	+{
	+ int sleep = 0;
	+
	+ for (;;) {
	+
	+ /* we are the owner? */
	+ if (!waiter->task)
	+ break;
	+
	+ /*
	+ * We need to read the owner of the lock and then check
	+ * its state. But we can't let the owner task be freed
	+ * while we read the state. We grab the rcu_lock and
	+ * this makes sure that the owner task wont disappear
	+ * between testing that it still has the lock, and checking
	+ * its state.
	+ */
	+ rcu_read_lock();
	+ /* Owner changed? Then lets update the original */
	+ if (orig_owner != rt_mutex_owner(waiter->lock)) {
	+ rcu_read_unlock();
	+ break;
	+ }
	+
	+ /* Owner went to bed, so should we */
	+ if (!task_is_current(orig_owner)) {
	+ sleep = 1;
	+ rcu_read_unlock();
	+ break;
	+ }
	+ rcu_read_unlock();
	+
	+ cpu_relax();
	+ }
	+
	+ return sleep;
	+}
	+#else
	+static int adaptive_wait(struct rt_mutex_waiter *waiter,
	+ struct task_struct *orig_owner)
	+{
	+ return 1;
	+}
	+#endif
	+
	/*
	* Slow path lock function spin_lock style: this variant is very
	* careful not to miss any non-lock wakeups.
	@@ -679,6 +733,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
	{
	struct rt_mutex_waiter waiter;
	unsigned long saved_state, state, flags;
	+ struct task_struct *orig_owner;

	debug_rt_mutex_init_waiter(&waiter);
	waiter.task = NULL;
	@@ -729,13 +784,16 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
	* the lock ! We restore lock_depth when we come back.
	*/
	current->lock_depth = -1;
	+ orig_owner = rt_mutex_owner(lock);
	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

	debug_rt_mutex_print_deadlock(&waiter);

	- update_current(TASK_UNINTERRUPTIBLE, &saved_state);
	- if (waiter.task)
	- schedule_rt_mutex(lock);
	+ if (adaptive_wait(&waiter, orig_owner)) {
	+ update_current(TASK_UNINTERRUPTIBLE, &saved_state);
	+ if (waiter.task)
	+ schedule_rt_mutex(lock);
	+ }

	raw_spin_lock_irqsave(&lock->wait_lock, flags);
	current->lock_depth = saved_lock_depth;
	diff --git a/kernel/sched.c b/kernel/sched.c
	index a5ffe46..ebbd311 100644
	--- a/kernel/sched.c
	+++ b/kernel/sched.c
	@@ -666,6 +666,11 @@ inline void update_rq_clock(struct rq *rq)
	rq->clock = sched_clock_cpu(cpu_of(rq));
	}

	+int task_is_current(struct task_struct *task)
	+{
	+ return task_rq(task)->curr == task;
	+}
	+
	/*
	* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
	*/
	--
	1.7.1.1