patches/rt-mutex-add-sleeping-spinlocks-support.patch - pub/scm/linux/kernel/git/paulg/3.8-rt-patches - Git at Google

 Subject: rt-mutex-add-sleeping-spinlocks-support.patch
 From: Thomas Gleixner <tglx@linutronix.de>
 Date: Fri, 10 Jun 2011 11:21:25 +0200

 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 ---
  include/linux/rtmutex.h |   35 +++-
  kernel/futex.c          |    3
  kernel/rtmutex.c        |  384 +++++++++++++++++++++++++++++++++++++++++++++---
  kernel/rtmutex_common.h |    9 +
  4 files changed, 404 insertions(+), 27 deletions(-)

 --- a/include/linux/rtmutex.h
 +++ b/include/linux/rtmutex.h
 @@ -29,9 +29,10 @@ struct rt_mutex {
  	raw_spinlock_t		wait_lock;
  	struct plist_head	wait_list;
  	struct task_struct	*owner;
 -#ifdef CONFIG_DEBUG_RT_MUTEXES
  	int			save_state;
 -	const char 		*name, *file;
 +#ifdef CONFIG_DEBUG_RT_MUTEXES
 +	const char		*file;
 +	const char		*name;
  	int			line;
  	void			*magic;
  #endif
 @@ -56,19 +57,39 @@ struct hrtimer_sleeper;
  #ifdef CONFIG_DEBUG_RT_MUTEXES
  # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  	, .name = #mutexname, .file = __FILE__, .line = __LINE__
 -# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __func__)
 +
 +# define rt_mutex_init(mutex)					\
 +	do {							\
 +		raw_spin_lock_init(&(mutex)->wait_lock);	\
 +		__rt_mutex_init(mutex, #mutex);			\
 +	} while (0)
 +
   extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  #else
  # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
 -# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL)
 +
 +# define rt_mutex_init(mutex)					\
 +	do {							\
 +		raw_spin_lock_init(&(mutex)->wait_lock);	\
 +		__rt_mutex_init(mutex, #mutex);			\
 +	} while (0)
 +
  # define rt_mutex_debug_task_free(t)			do { } while (0)
  #endif

 -#define __RT_MUTEX_INITIALIZER(mutexname) \
 -	{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
 +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
 +	.wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
  	, .owner = NULL \
 -	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
 +	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)
 +
 +
 +#define __RT_MUTEX_INITIALIZER(mutexname) \
 +	{ __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
 +
 +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
 +	{ __RT_MUTEX_INITIALIZER_PLAIN(mutexname)    \
 +	  , .save_state = 1 }

  #define DEFINE_RT_MUTEX(mutexname) \
  	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
 @@ -2323,8 +2323,7 @@ static int futex_wait_requeue_pi(u32 __u
  	 * The waiter is allocated on our stack, manipulated by the requeue
  	 * code while we sleep on uaddr.
  	 */
 -	debug_rt_mutex_init_waiter(&rt_waiter);
 -	rt_waiter.task = NULL;
 +	rt_mutex_init_waiter(&rt_waiter, false);

  	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  	if (unlikely(ret != 0))
 --- a/kernel/rtmutex.c
 +++ b/kernel/rtmutex.c
 @@ -8,6 +8,12 @@
   *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   *  Copyright (C) 2006 Esben Nielsen
   *
 + * Adaptive Spinlocks:
 + *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
 + *                                   and Peter Morreale,
 + * Adaptive Spinlocks simplification:
 + *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
 + *
   *  See Documentation/rt-mutex-design.txt for details.
   */
  #include <linux/spinlock.h>
 @@ -96,6 +102,12 @@ static inline void mark_rt_mutex_waiters
  }
  #endif

 +static inline void init_lists(struct rt_mutex *lock)
 +{
 +	if (unlikely(!lock->wait_list.node_list.prev))
 +		plist_head_init(&lock->wait_list);
 +}
 +
  /*
   * Calculate task priority from the waiter list priority
   *
 @@ -142,6 +154,14 @@ static void rt_mutex_adjust_prio(struct
  	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  }

 +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
 +{
 +	if (waiter->savestate)
 +		wake_up_lock_sleeper(waiter->task);
 +	else
 +		wake_up_process(waiter->task);
 +}
 +
  /*
   * Max number of times we'll walk the boosting chain:
   */
 @@ -253,13 +273,15 @@ static int rt_mutex_adjust_prio_chain(st
  	/* Release the task */
  	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  	if (!rt_mutex_owner(lock)) {
 +		struct rt_mutex_waiter *lock_top_waiter;
 +
  		/*
  		 * If the requeue above changed the top waiter, then we need
  		 * to wake the new top waiter up to try to get the lock.
  		 */
 -
 -		if (top_waiter != rt_mutex_top_waiter(lock))
 -			wake_up_process(rt_mutex_top_waiter(lock)->task);
 +		lock_top_waiter = rt_mutex_top_waiter(lock);
 +		if (top_waiter != lock_top_waiter)
 +			rt_mutex_wake_waiter(lock_top_waiter);
  		raw_spin_unlock(&lock->wait_lock);
  		goto out_put_task;
  	}
 @@ -304,6 +326,25 @@ static int rt_mutex_adjust_prio_chain(st
  	return ret;
  }

 +
 +#define STEAL_NORMAL  0
 +#define STEAL_LATERAL 1
 +
 +/*
 + * Note that RT tasks are excluded from lateral-steals to prevent the
 + * introduction of an unbounded latency
 + */
 +static inline int lock_is_stealable(struct task_struct *task,
 +				    struct task_struct *pendowner, int mode)
 +{
 +    if (mode == STEAL_NORMAL || rt_task(task)) {
 +	    if (task->prio >= pendowner->prio)
 +		    return 0;
 +    } else if (task->prio > pendowner->prio)
 +	    return 0;
 +    return 1;
 +}
 +
  /*
   * Try to take an rt-mutex
   *
 @@ -313,8 +354,9 @@ static int rt_mutex_adjust_prio_chain(st
   * @task:   the task which wants to acquire the lock
   * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
   */
 -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 -		struct rt_mutex_waiter *waiter)
 +static int
 +__try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 +		       struct rt_mutex_waiter *waiter, int mode)
  {
  	/*
  	 * We have to be careful here if the atomic speedups are
 @@ -347,12 +389,14 @@ static int try_to_take_rt_mutex(struct r
  	 * 3) it is top waiter
  	 */
  	if (rt_mutex_has_waiters(lock)) {
 -		if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
 -			if (!waiter || waiter != rt_mutex_top_waiter(lock))
 -				return 0;
 -		}
 +		struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
 +
 +		if (task != pown && !lock_is_stealable(task, pown, mode))
 +			return 0;
  	}

 +	/* We got the lock. */
 +
  	if (waiter || rt_mutex_has_waiters(lock)) {
  		unsigned long flags;
  		struct rt_mutex_waiter *top;
 @@ -377,7 +421,6 @@ static int try_to_take_rt_mutex(struct r
  		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  	}

 -	/* We got the lock. */
  	debug_rt_mutex_lock(lock);

  	rt_mutex_set_owner(lock, task);
 @@ -387,6 +430,13 @@ static int try_to_take_rt_mutex(struct r
  	return 1;
  }

 +static inline int
 +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 +		     struct rt_mutex_waiter *waiter)
 +{
 +	return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
 +}
 +
  /*
   * Task blocks on lock.
   *
 @@ -501,7 +551,7 @@ static void wakeup_next_waiter(struct rt

  	raw_spin_unlock_irqrestore(&current->pi_lock, flags);

 -	wake_up_process(waiter->task);
 +	rt_mutex_wake_waiter(waiter);
  }

  /*
 @@ -580,18 +630,315 @@ void rt_mutex_adjust_pi(struct task_stru
  		return;
  	}

 -	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 -
  	/* gets dropped in rt_mutex_adjust_prio_chain()! */
  	get_task_struct(task);
 +	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
  }

 +#ifdef CONFIG_PREEMPT_RT_FULL
 +/*
 + * preemptible spin_lock functions:
 + */
 +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
 +					 void  (*slowfn)(struct rt_mutex *lock))
 +{
 +	might_sleep();
 +
 +	if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
 +		rt_mutex_deadlock_account_lock(lock, current);
 +	else
 +		slowfn(lock);
 +}
 +
 +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
 +					   void  (*slowfn)(struct rt_mutex *lock))
 +{
 +	if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
 +		rt_mutex_deadlock_account_unlock(current);
 +	else
 +		slowfn(lock);
 +}
 +
 +#ifdef CONFIG_SMP
 +/*
 + * Note that owner is a speculative pointer and dereferencing relies
 + * on rcu_read_lock() and the check against the lock owner.
 + */
 +static int adaptive_wait(struct rt_mutex *lock,
 +			 struct task_struct *owner)
 +{
 +	int res = 0;
 +
 +	rcu_read_lock();
 +	for (;;) {
 +		if (owner != rt_mutex_owner(lock))
 +			break;
 +		/*
 +		 * Ensure that owner->on_cpu is dereferenced _after_
 +		 * checking the above to be valid.
 +		 */
 +		barrier();
 +		if (!owner->on_cpu) {
 +			res = 1;
 +			break;
 +		}
 +		cpu_relax();
 +	}
 +	rcu_read_unlock();
 +	return res;
 +}
 +#else
 +static int adaptive_wait(struct rt_mutex *lock,
 +			 struct task_struct *orig_owner)
 +{
 +	return 1;
 +}
 +#endif
 +
 +# define pi_lock(lock)			raw_spin_lock_irq(lock)
 +# define pi_unlock(lock)		raw_spin_unlock_irq(lock)
 +
 +/*
 + * Slow path lock function spin_lock style: this variant is very
 + * careful not to miss any non-lock wakeups.
 + *
 + * We store the current state under p->pi_lock in p->saved_state and
 + * the try_to_wake_up() code handles this accordingly.
 + */
 +static void  noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
 +{
 +	struct task_struct *lock_owner, *self = current;
 +	struct rt_mutex_waiter waiter, *top_waiter;
 +	int ret;
 +
 +	rt_mutex_init_waiter(&waiter, true);
 +
 +	raw_spin_lock(&lock->wait_lock);
 +	init_lists(lock);
 +
 +	if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
 +		raw_spin_unlock(&lock->wait_lock);
 +		return;
 +	}
 +
 +	BUG_ON(rt_mutex_owner(lock) == self);
 +
 +	/*
 +	 * We save whatever state the task is in and we'll restore it
 +	 * after acquiring the lock taking real wakeups into account
 +	 * as well. We are serialized via pi_lock against wakeups. See
 +	 * try_to_wake_up().
 +	 */
 +	pi_lock(&self->pi_lock);
 +	self->saved_state = self->state;
 +	__set_current_state(TASK_UNINTERRUPTIBLE);
 +	pi_unlock(&self->pi_lock);
 +
 +	ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
 +	BUG_ON(ret);
 +
 +	for (;;) {
 +		/* Try to acquire the lock again. */
 +		if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
 +			break;
 +
 +		top_waiter = rt_mutex_top_waiter(lock);
 +		lock_owner = rt_mutex_owner(lock);
 +
 +		raw_spin_unlock(&lock->wait_lock);
 +
 +		debug_rt_mutex_print_deadlock(&waiter);
 +
 +		if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
 +			schedule_rt_mutex(lock);
 +
 +		raw_spin_lock(&lock->wait_lock);
 +
 +		pi_lock(&self->pi_lock);
 +		__set_current_state(TASK_UNINTERRUPTIBLE);
 +		pi_unlock(&self->pi_lock);
 +	}
 +
 +	/*
 +	 * Restore the task state to current->saved_state. We set it
 +	 * to the original state above and the try_to_wake_up() code
 +	 * has possibly updated it when a real (non-rtmutex) wakeup
 +	 * happened while we were blocked. Clear saved_state so
 +	 * try_to_wakeup() does not get confused.
 +	 */
 +	pi_lock(&self->pi_lock);
 +	__set_current_state(self->saved_state);
 +	self->saved_state = TASK_RUNNING;
 +	pi_unlock(&self->pi_lock);
 +
 +	/*
 +	 * try_to_take_rt_mutex() sets the waiter bit
 +	 * unconditionally. We might have to fix that up:
 +	 */
 +	fixup_rt_mutex_waiters(lock);
 +
 +	BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
 +	BUG_ON(!plist_node_empty(&waiter.list_entry));
 +
 +	raw_spin_unlock(&lock->wait_lock);
 +
 +	debug_rt_mutex_free_waiter(&waiter);
 +}
 +
 +/*
 + * Slow path to release a rt_mutex spin_lock style
 + */
 +static void  noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
 +{
 +	raw_spin_lock(&lock->wait_lock);
 +
 +	debug_rt_mutex_unlock(lock);
 +
 +	rt_mutex_deadlock_account_unlock(current);
 +
 +	if (!rt_mutex_has_waiters(lock)) {
 +		lock->owner = NULL;
 +		raw_spin_unlock(&lock->wait_lock);
 +		return;
 +	}
 +
 +	wakeup_next_waiter(lock);
 +
 +	raw_spin_unlock(&lock->wait_lock);
 +
 +	/* Undo pi boosting.when necessary */
 +	rt_mutex_adjust_prio(current);
 +}
 +
 +void __lockfunc rt_spin_lock(spinlock_t *lock)
 +{
 +	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
 +	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 +}
 +EXPORT_SYMBOL(rt_spin_lock);
 +
 +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
 +{
 +	rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
 +}
 +EXPORT_SYMBOL(__rt_spin_lock);
 +
 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
 +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
 +{
 +	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
 +	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
 +}
 +EXPORT_SYMBOL(rt_spin_lock_nested);
 +#endif
 +
 +void __lockfunc rt_spin_unlock(spinlock_t *lock)
 +{
 +	/* NOTE: we always pass in '1' for nested, for simplicity */
 +	spin_release(&lock->dep_map, 1, _RET_IP_);
 +	rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
 +}
 +EXPORT_SYMBOL(rt_spin_unlock);
 +
 +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
 +{
 +	rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
 +}
 +EXPORT_SYMBOL(__rt_spin_unlock);
 +
 +/*
 + * Wait for the lock to get unlocked: instead of polling for an unlock
 + * (like raw spinlocks do), we lock and unlock, to force the kernel to
 + * schedule if there's contention:
 + */
 +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
 +{
 +	spin_lock(lock);
 +	spin_unlock(lock);
 +}
 +EXPORT_SYMBOL(rt_spin_unlock_wait);
 +
 +int __lockfunc rt_spin_trylock(spinlock_t *lock)
 +{
 +	int ret;
 +
 +	migrate_disable();
 +	ret = rt_mutex_trylock(&lock->lock);
 +	if (ret)
 +		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 +	else
 +		migrate_enable();
 +
 +	return ret;
 +}
 +EXPORT_SYMBOL(rt_spin_trylock);
 +
 +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
 +{
 +	int ret;
 +
 +	local_bh_disable();
 +	ret = rt_mutex_trylock(&lock->lock);
 +	if (ret) {
 +		migrate_disable();
 +		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 +	} else
 +		local_bh_enable();
 +	return ret;
 +}
 +EXPORT_SYMBOL(rt_spin_trylock_bh);
 +
 +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
 +{
 +	int ret;
 +
 +	*flags = 0;
 +	migrate_disable();
 +	ret = rt_mutex_trylock(&lock->lock);
 +	if (ret)
 +		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 +	else
 +		migrate_enable();
 +	return ret;
 +}
 +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
 +
 +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
 +{
 +	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
 +	if (atomic_add_unless(atomic, -1, 1))
 +		return 0;
 +	migrate_disable();
 +	rt_spin_lock(lock);
 +	if (atomic_dec_and_test(atomic))
 +		return 1;
 +	rt_spin_unlock(lock);
 +	migrate_enable();
 +	return 0;
 +}
 +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
 +
 +void
 +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
 +{
 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
 +	/*
 +	 * Make sure we are not reinitializing a held lock:
 +	 */
 +	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 +	lockdep_init_map(&lock->dep_map, name, key, 0);
 +#endif
 +}
 +EXPORT_SYMBOL(__rt_spin_lock_init);
 +
 +#endif /* PREEMPT_RT_FULL */
 +
  /**
   * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
   * @lock:		 the rt_mutex to take
   * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
 - * 			 or TASK_UNINTERRUPTIBLE)
 + *			 or TASK_UNINTERRUPTIBLE)
   * @timeout:		 the pre-initialized and started timer, or NULL for none
   * @waiter:		 the pre-initialized rt_mutex_waiter
   *
 @@ -647,9 +994,10 @@ rt_mutex_slowlock(struct rt_mutex *lock,
  	struct rt_mutex_waiter waiter;
  	int ret = 0;

 -	debug_rt_mutex_init_waiter(&waiter);
 +	rt_mutex_init_waiter(&waiter, false);

  	raw_spin_lock(&lock->wait_lock);
 +	init_lists(lock);

  	/* Try to acquire the lock again: */
  	if (try_to_take_rt_mutex(lock, current, NULL)) {
 @@ -702,6 +1050,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lo
  	int ret = 0;

  	raw_spin_lock(&lock->wait_lock);
 +	init_lists(lock);

  	if (likely(rt_mutex_owner(lock) != current)) {

 @@ -934,12 +1283,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  {
  	lock->owner = NULL;
 -	raw_spin_lock_init(&lock->wait_lock);
  	plist_head_init(&lock->wait_list);

  	debug_rt_mutex_init(lock, name);
  }
 -EXPORT_SYMBOL_GPL(__rt_mutex_init);
 +EXPORT_SYMBOL(__rt_mutex_init);

  /**
   * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
 @@ -954,7 +1302,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
  void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  				struct task_struct *proxy_owner)
  {
 -	__rt_mutex_init(lock, NULL);
 +	rt_mutex_init(lock);
  	debug_rt_mutex_proxy_lock(lock, proxy_owner);
  	rt_mutex_set_owner(lock, proxy_owner);
  	rt_mutex_deadlock_account_lock(lock, proxy_owner);
 --- a/kernel/rtmutex_common.h
 +++ b/kernel/rtmutex_common.h
 @@ -49,6 +49,7 @@ struct rt_mutex_waiter {
  	struct plist_node	pi_list_entry;
  	struct task_struct	*task;
  	struct rt_mutex		*lock;
 +	bool			savestate;
  #ifdef CONFIG_DEBUG_RT_MUTEXES
  	unsigned long		ip;
  	struct pid		*deadlock_task_pid;
 @@ -126,4 +127,12 @@ extern int rt_mutex_finish_proxy_lock(st
  # include "rtmutex.h"
  #endif

 +static inline void
 +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
 +{
 +	debug_rt_mutex_init_waiter(waiter);
 +	waiter->task = NULL;
 +	waiter->savestate = savestate;
 +}
 +
  #endif
	Subject: rt-mutex-add-sleeping-spinlocks-support.patch
	From: Thomas Gleixner <tglx@linutronix.de>
	Date: Fri, 10 Jun 2011 11:21:25 +0200

	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	---
	include/linux/rtmutex.h \| 35 +++-
	kernel/futex.c \| 3
	kernel/rtmutex.c \| 384 +++++++++++++++++++++++++++++++++++++++++++++---
	kernel/rtmutex_common.h \| 9 +
	4 files changed, 404 insertions(+), 27 deletions(-)

	--- a/include/linux/rtmutex.h
	+++ b/include/linux/rtmutex.h
	@@ -29,9 +29,10 @@ struct rt_mutex {
	raw_spinlock_t wait_lock;
	struct plist_head wait_list;
	struct task_struct *owner;
	-#ifdef CONFIG_DEBUG_RT_MUTEXES
	int save_state;
	- const char name, file;
	+#ifdef CONFIG_DEBUG_RT_MUTEXES
	+ const char *file;
	+ const char *name;
	int line;
	void *magic;
	#endif
	@@ -56,19 +57,39 @@ struct hrtimer_sleeper;
	#ifdef CONFIG_DEBUG_RT_MUTEXES
	# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
	, .name = #mutexname, .file = __FILE__, .line = __LINE__
	-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
	+
	+# define rt_mutex_init(mutex) \
	+ do { \
	+ raw_spin_lock_init(&(mutex)->wait_lock); \
	+ __rt_mutex_init(mutex, #mutex); \
	+ } while (0)
	+
	extern void rt_mutex_debug_task_free(struct task_struct *tsk);
	#else
	# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
	-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
	+
	+# define rt_mutex_init(mutex) \
	+ do { \
	+ raw_spin_lock_init(&(mutex)->wait_lock); \
	+ __rt_mutex_init(mutex, #mutex); \
	+ } while (0)
	+
	# define rt_mutex_debug_task_free(t) do { } while (0)
	#endif

	-#define __RT_MUTEX_INITIALIZER(mutexname) \
	- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
	+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
	+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
	, .owner = NULL \
	- __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
	+ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
	+
	+
	+#define __RT_MUTEX_INITIALIZER(mutexname) \
	+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
	+
	+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
	+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
	+ , .save_state = 1 }

	#define DEFINE_RT_MUTEX(mutexname) \
	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
	--- a/kernel/futex.c
	+++ b/kernel/futex.c
	@@ -2323,8 +2323,7 @@ static int futex_wait_requeue_pi(u32 __u
	* The waiter is allocated on our stack, manipulated by the requeue
	* code while we sleep on uaddr.
	*/
	- debug_rt_mutex_init_waiter(&rt_waiter);
	- rt_waiter.task = NULL;
	+ rt_mutex_init_waiter(&rt_waiter, false);

	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
	if (unlikely(ret != 0))
	--- a/kernel/rtmutex.c
	+++ b/kernel/rtmutex.c
	@@ -8,6 +8,12 @@
	* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
	* Copyright (C) 2006 Esben Nielsen
	*
	+ * Adaptive Spinlocks:
	+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
	+ * and Peter Morreale,
	+ * Adaptive Spinlocks simplification:
	+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
	+ *
	* See Documentation/rt-mutex-design.txt for details.
	*/
	#include <linux/spinlock.h>
	@@ -96,6 +102,12 @@ static inline void mark_rt_mutex_waiters
	}
	#endif

	+static inline void init_lists(struct rt_mutex *lock)
	+{
	+ if (unlikely(!lock->wait_list.node_list.prev))
	+ plist_head_init(&lock->wait_list);
	+}
	+
	/*
	* Calculate task priority from the waiter list priority
	*
	@@ -142,6 +154,14 @@ static void rt_mutex_adjust_prio(struct
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
	}

	+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
	+{
	+ if (waiter->savestate)
	+ wake_up_lock_sleeper(waiter->task);
	+ else
	+ wake_up_process(waiter->task);
	+}
	+
	/*
	* Max number of times we'll walk the boosting chain:
	*/
	@@ -253,13 +273,15 @@ static int rt_mutex_adjust_prio_chain(st
	/* Release the task */
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
	if (!rt_mutex_owner(lock)) {
	+ struct rt_mutex_waiter *lock_top_waiter;
	+
	/*
	* If the requeue above changed the top waiter, then we need
	* to wake the new top waiter up to try to get the lock.
	*/
	-
	- if (top_waiter != rt_mutex_top_waiter(lock))
	- wake_up_process(rt_mutex_top_waiter(lock)->task);
	+ lock_top_waiter = rt_mutex_top_waiter(lock);
	+ if (top_waiter != lock_top_waiter)
	+ rt_mutex_wake_waiter(lock_top_waiter);
	raw_spin_unlock(&lock->wait_lock);
	goto out_put_task;
	}
	@@ -304,6 +326,25 @@ static int rt_mutex_adjust_prio_chain(st
	return ret;
	}

	+
	+#define STEAL_NORMAL 0
	+#define STEAL_LATERAL 1
	+
	+/*
	+ * Note that RT tasks are excluded from lateral-steals to prevent the
	+ * introduction of an unbounded latency
	+ */
	+static inline int lock_is_stealable(struct task_struct *task,
	+ struct task_struct *pendowner, int mode)
	+{
	+ if (mode == STEAL_NORMAL \|\| rt_task(task)) {
	+ if (task->prio >= pendowner->prio)
	+ return 0;
	+ } else if (task->prio > pendowner->prio)
	+ return 0;
	+ return 1;
	+}
	+
	/*
	* Try to take an rt-mutex
	*
	@@ -313,8 +354,9 @@ static int rt_mutex_adjust_prio_chain(st
	* @task: the task which wants to acquire the lock
	* @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
	*/
	-static int try_to_take_rt_mutex(struct rt_mutex lock, struct task_struct task,
	- struct rt_mutex_waiter *waiter)
	+static int
	+__try_to_take_rt_mutex(struct rt_mutex lock, struct task_struct task,
	+ struct rt_mutex_waiter *waiter, int mode)
	{
	/*
	* We have to be careful here if the atomic speedups are
	@@ -347,12 +389,14 @@ static int try_to_take_rt_mutex(struct r
	* 3) it is top waiter
	*/
	if (rt_mutex_has_waiters(lock)) {
	- if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
	- if (!waiter \|\| waiter != rt_mutex_top_waiter(lock))
	- return 0;
	- }
	+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
	+
	+ if (task != pown && !lock_is_stealable(task, pown, mode))
	+ return 0;
	}

	+ /* We got the lock. */
	+
	if (waiter \|\| rt_mutex_has_waiters(lock)) {
	unsigned long flags;
	struct rt_mutex_waiter *top;
	@@ -377,7 +421,6 @@ static int try_to_take_rt_mutex(struct r
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
	}

	- /* We got the lock. */
	debug_rt_mutex_lock(lock);

	rt_mutex_set_owner(lock, task);
	@@ -387,6 +430,13 @@ static int try_to_take_rt_mutex(struct r
	return 1;
	}

	+static inline int
	+try_to_take_rt_mutex(struct rt_mutex lock, struct task_struct task,
	+ struct rt_mutex_waiter *waiter)
	+{
	+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
	+}
	+
	/*
	* Task blocks on lock.
	*
	@@ -501,7 +551,7 @@ static void wakeup_next_waiter(struct rt

	raw_spin_unlock_irqrestore(&current->pi_lock, flags);

	- wake_up_process(waiter->task);
	+ rt_mutex_wake_waiter(waiter);
	}

	/*
	@@ -580,18 +630,315 @@ void rt_mutex_adjust_pi(struct task_stru
	return;
	}

	- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
	-
	/* gets dropped in rt_mutex_adjust_prio_chain()! */
	get_task_struct(task);
	+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
	}

	+#ifdef CONFIG_PREEMPT_RT_FULL
	+/*
	+ * preemptible spin_lock functions:
	+ */
	+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
	+ void (slowfn)(struct rt_mutex lock))
	+{
	+ might_sleep();
	+
	+ if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
	+ rt_mutex_deadlock_account_lock(lock, current);
	+ else
	+ slowfn(lock);
	+}
	+
	+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
	+ void (slowfn)(struct rt_mutex lock))
	+{
	+ if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
	+ rt_mutex_deadlock_account_unlock(current);
	+ else
	+ slowfn(lock);
	+}
	+
	+#ifdef CONFIG_SMP
	+/*
	+ * Note that owner is a speculative pointer and dereferencing relies
	+ * on rcu_read_lock() and the check against the lock owner.
	+ */
	+static int adaptive_wait(struct rt_mutex *lock,
	+ struct task_struct *owner)
	+{
	+ int res = 0;
	+
	+ rcu_read_lock();
	+ for (;;) {
	+ if (owner != rt_mutex_owner(lock))
	+ break;
	+ /*
	+ * Ensure that owner->on_cpu is dereferenced _after_
	+ * checking the above to be valid.
	+ */
	+ barrier();
	+ if (!owner->on_cpu) {
	+ res = 1;
	+ break;
	+ }
	+ cpu_relax();
	+ }
	+ rcu_read_unlock();
	+ return res;
	+}
	+#else
	+static int adaptive_wait(struct rt_mutex *lock,
	+ struct task_struct *orig_owner)
	+{
	+ return 1;
	+}
	+#endif
	+
	+# define pi_lock(lock) raw_spin_lock_irq(lock)
	+# define pi_unlock(lock) raw_spin_unlock_irq(lock)
	+
	+/*
	+ * Slow path lock function spin_lock style: this variant is very
	+ * careful not to miss any non-lock wakeups.
	+ *
	+ * We store the current state under p->pi_lock in p->saved_state and
	+ * the try_to_wake_up() code handles this accordingly.
	+ */
	+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
	+{
	+ struct task_struct lock_owner, self = current;
	+ struct rt_mutex_waiter waiter, *top_waiter;
	+ int ret;
	+
	+ rt_mutex_init_waiter(&waiter, true);
	+
	+ raw_spin_lock(&lock->wait_lock);
	+ init_lists(lock);
	+
	+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
	+ raw_spin_unlock(&lock->wait_lock);
	+ return;
	+ }
	+
	+ BUG_ON(rt_mutex_owner(lock) == self);
	+
	+ /*
	+ * We save whatever state the task is in and we'll restore it
	+ * after acquiring the lock taking real wakeups into account
	+ * as well. We are serialized via pi_lock against wakeups. See
	+ * try_to_wake_up().
	+ */
	+ pi_lock(&self->pi_lock);
	+ self->saved_state = self->state;
	+ __set_current_state(TASK_UNINTERRUPTIBLE);
	+ pi_unlock(&self->pi_lock);
	+
	+ ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
	+ BUG_ON(ret);
	+
	+ for (;;) {
	+ /* Try to acquire the lock again. */
	+ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
	+ break;
	+
	+ top_waiter = rt_mutex_top_waiter(lock);
	+ lock_owner = rt_mutex_owner(lock);
	+
	+ raw_spin_unlock(&lock->wait_lock);
	+
	+ debug_rt_mutex_print_deadlock(&waiter);
	+
	+ if (top_waiter != &waiter \|\| adaptive_wait(lock, lock_owner))
	+ schedule_rt_mutex(lock);
	+
	+ raw_spin_lock(&lock->wait_lock);
	+
	+ pi_lock(&self->pi_lock);
	+ __set_current_state(TASK_UNINTERRUPTIBLE);
	+ pi_unlock(&self->pi_lock);
	+ }
	+
	+ /*
	+ * Restore the task state to current->saved_state. We set it
	+ * to the original state above and the try_to_wake_up() code
	+ * has possibly updated it when a real (non-rtmutex) wakeup
	+ * happened while we were blocked. Clear saved_state so
	+ * try_to_wakeup() does not get confused.
	+ */
	+ pi_lock(&self->pi_lock);
	+ __set_current_state(self->saved_state);
	+ self->saved_state = TASK_RUNNING;
	+ pi_unlock(&self->pi_lock);
	+
	+ /*
	+ * try_to_take_rt_mutex() sets the waiter bit
	+ * unconditionally. We might have to fix that up:
	+ */
	+ fixup_rt_mutex_waiters(lock);
	+
	+ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
	+ BUG_ON(!plist_node_empty(&waiter.list_entry));
	+
	+ raw_spin_unlock(&lock->wait_lock);
	+
	+ debug_rt_mutex_free_waiter(&waiter);
	+}
	+
	+/*
	+ * Slow path to release a rt_mutex spin_lock style
	+ */
	+static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
	+{
	+ raw_spin_lock(&lock->wait_lock);
	+
	+ debug_rt_mutex_unlock(lock);
	+
	+ rt_mutex_deadlock_account_unlock(current);
	+
	+ if (!rt_mutex_has_waiters(lock)) {
	+ lock->owner = NULL;
	+ raw_spin_unlock(&lock->wait_lock);
	+ return;
	+ }
	+
	+ wakeup_next_waiter(lock);
	+
	+ raw_spin_unlock(&lock->wait_lock);
	+
	+ /* Undo pi boosting.when necessary */
	+ rt_mutex_adjust_prio(current);
	+}
	+
	+void __lockfunc rt_spin_lock(spinlock_t *lock)
	+{
	+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
	+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
	+}
	+EXPORT_SYMBOL(rt_spin_lock);
	+
	+void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
	+{
	+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
	+}
	+EXPORT_SYMBOL(__rt_spin_lock);
	+
	+#ifdef CONFIG_DEBUG_LOCK_ALLOC
	+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
	+{
	+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
	+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
	+}
	+EXPORT_SYMBOL(rt_spin_lock_nested);
	+#endif
	+
	+void __lockfunc rt_spin_unlock(spinlock_t *lock)
	+{
	+ /* NOTE: we always pass in '1' for nested, for simplicity */
	+ spin_release(&lock->dep_map, 1, _RET_IP_);
	+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
	+}
	+EXPORT_SYMBOL(rt_spin_unlock);
	+
	+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
	+{
	+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
	+}
	+EXPORT_SYMBOL(__rt_spin_unlock);
	+
	+/*
	+ * Wait for the lock to get unlocked: instead of polling for an unlock
	+ * (like raw spinlocks do), we lock and unlock, to force the kernel to
	+ * schedule if there's contention:
	+ */
	+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
	+{
	+ spin_lock(lock);
	+ spin_unlock(lock);
	+}
	+EXPORT_SYMBOL(rt_spin_unlock_wait);
	+
	+int __lockfunc rt_spin_trylock(spinlock_t *lock)
	+{
	+ int ret;
	+
	+ migrate_disable();
	+ ret = rt_mutex_trylock(&lock->lock);
	+ if (ret)
	+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
	+ else
	+ migrate_enable();
	+
	+ return ret;
	+}
	+EXPORT_SYMBOL(rt_spin_trylock);
	+
	+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
	+{
	+ int ret;
	+
	+ local_bh_disable();
	+ ret = rt_mutex_trylock(&lock->lock);
	+ if (ret) {
	+ migrate_disable();
	+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
	+ } else
	+ local_bh_enable();
	+ return ret;
	+}
	+EXPORT_SYMBOL(rt_spin_trylock_bh);
	+
	+int __lockfunc rt_spin_trylock_irqsave(spinlock_t lock, unsigned long flags)
	+{
	+ int ret;
	+
	+ *flags = 0;
	+ migrate_disable();
	+ ret = rt_mutex_trylock(&lock->lock);
	+ if (ret)
	+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
	+ else
	+ migrate_enable();
	+ return ret;
	+}
	+EXPORT_SYMBOL(rt_spin_trylock_irqsave);
	+
	+int atomic_dec_and_spin_lock(atomic_t atomic, spinlock_t lock)
	+{
	+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
	+ if (atomic_add_unless(atomic, -1, 1))
	+ return 0;
	+ migrate_disable();
	+ rt_spin_lock(lock);
	+ if (atomic_dec_and_test(atomic))
	+ return 1;
	+ rt_spin_unlock(lock);
	+ migrate_enable();
	+ return 0;
	+}
	+EXPORT_SYMBOL(atomic_dec_and_spin_lock);
	+
	+void
	+__rt_spin_lock_init(spinlock_t lock, char name, struct lock_class_key *key)
	+{
	+#ifdef CONFIG_DEBUG_LOCK_ALLOC
	+ /*
	+ * Make sure we are not reinitializing a held lock:
	+ */
	+ debug_check_no_locks_freed((void )lock, sizeof(lock));
	+ lockdep_init_map(&lock->dep_map, name, key, 0);
	+#endif
	+}
	+EXPORT_SYMBOL(__rt_spin_lock_init);
	+
	+#endif /* PREEMPT_RT_FULL */
	+
	/**
	* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
	* @lock: the rt_mutex to take
	* @state: the state the task should block in (TASK_INTERRUPTIBLE
	- * or TASK_UNINTERRUPTIBLE)
	+ * or TASK_UNINTERRUPTIBLE)
	* @timeout: the pre-initialized and started timer, or NULL for none
	* @waiter: the pre-initialized rt_mutex_waiter
	*
	@@ -647,9 +994,10 @@ rt_mutex_slowlock(struct rt_mutex *lock,
	struct rt_mutex_waiter waiter;
	int ret = 0;

	- debug_rt_mutex_init_waiter(&waiter);
	+ rt_mutex_init_waiter(&waiter, false);

	raw_spin_lock(&lock->wait_lock);
	+ init_lists(lock);

	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock, current, NULL)) {
	@@ -702,6 +1050,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lo
	int ret = 0;

	raw_spin_lock(&lock->wait_lock);
	+ init_lists(lock);

	if (likely(rt_mutex_owner(lock) != current)) {

	@@ -934,12 +1283,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
	void __rt_mutex_init(struct rt_mutex lock, const char name)
	{
	lock->owner = NULL;
	- raw_spin_lock_init(&lock->wait_lock);
	plist_head_init(&lock->wait_list);

	debug_rt_mutex_init(lock, name);
	}
	-EXPORT_SYMBOL_GPL(__rt_mutex_init);
	+EXPORT_SYMBOL(__rt_mutex_init);

	/**
	* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
	@@ -954,7 +1302,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
	void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
	struct task_struct *proxy_owner)
	{
	- __rt_mutex_init(lock, NULL);
	+ rt_mutex_init(lock);
	debug_rt_mutex_proxy_lock(lock, proxy_owner);
	rt_mutex_set_owner(lock, proxy_owner);
	rt_mutex_deadlock_account_lock(lock, proxy_owner);
	--- a/kernel/rtmutex_common.h
	+++ b/kernel/rtmutex_common.h
	@@ -49,6 +49,7 @@ struct rt_mutex_waiter {
	struct plist_node pi_list_entry;
	struct task_struct *task;
	struct rt_mutex *lock;
	+ bool savestate;
	#ifdef CONFIG_DEBUG_RT_MUTEXES
	unsigned long ip;
	struct pid *deadlock_task_pid;
	@@ -126,4 +127,12 @@ extern int rt_mutex_finish_proxy_lock(st
	# include "rtmutex.h"
	#endif

	+static inline void
	+rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
	+{
	+ debug_rt_mutex_init_waiter(waiter);
	+ waiter->task = NULL;
	+ waiter->savestate = savestate;
	+}
	+
	#endif