rt-core-implementation.patch - pub/scm/linux/kernel/git/paulg/rt-patches - Git at Google

 From c651e0c06f3ea64d302837cc3b576d6993694fd2 Mon Sep 17 00:00:00 2001
 From: Ingo Molnar <mingo@elte.hu>
 Date: Fri, 3 Jul 2009 08:30:07 -0500
 Subject: [PATCH] rt: core implementation

 commit e9888fb95225bb3b786d79fd983eb67e1acad338 in tip.

 Signed-off-by: Ingo Molnar <mingo@elte.hu>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
 index cee2da4..3223348 100644
 --- a/include/linux/hardirq.h
 +++ b/include/linux/hardirq.h
 @@ -83,9 +83,9 @@
   * Are we doing bottom half or hardware interrupt processing?
   * Are we in a softirq context? Interrupt context?
   */
 -#define in_irq()		(hardirq_count())
 -#define in_softirq()		(softirq_count())
 -#define in_interrupt()		(irq_count())
 +#define in_irq()	(hardirq_count() || (current->flags & PF_HARDIRQ))
 +#define in_softirq()	(softirq_count() || (current->flags & PF_SOFTIRQ))
 +#define in_interrupt()	(irq_count())

  /*
   * Are we in NMI context?
 diff --git a/include/linux/kernel.h b/include/linux/kernel.h
 index 1221d23..3489c31 100644
 --- a/include/linux/kernel.h
 +++ b/include/linux/kernel.h
 @@ -123,7 +123,7 @@ extern int _cond_resched(void);
  # define might_resched() do { } while (0)
  #endif

 -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT)
    void __might_sleep(const char *file, int line, int preempt_offset);
  /**
   * might_sleep - annotation for functions that can sleep
 @@ -287,6 +287,12 @@ extern void printk_tick(void);
  extern void asmlinkage __attribute__((format(printf, 1, 2)))
  	early_printk(const char *fmt, ...);

 +#ifdef CONFIG_PREEMPT_RT
 +extern void zap_rt_locks(void);
 +#else
 +# define zap_rt_locks() do { } while (0)
 +#endif
 +
  unsigned long int_sqrt(unsigned long);

  static inline void console_silent(void)
 @@ -316,6 +322,7 @@ extern int root_mountflags;
  /* Values used for system_state */
  extern enum system_states {
  	SYSTEM_BOOTING,
 +	SYSTEM_BOOTING_SCHEDULER_OK,
  	SYSTEM_RUNNING,
  	SYSTEM_HALT,
  	SYSTEM_POWER_OFF,
 diff --git a/include/linux/profile.h b/include/linux/profile.h
 index a0fc322..5b72082 100644
 --- a/include/linux/profile.h
 +++ b/include/linux/profile.h
 @@ -8,10 +8,11 @@

  #include <asm/errno.h>

 -#define CPU_PROFILING	1
 -#define SCHED_PROFILING	2
 -#define SLEEP_PROFILING	3
 -#define KVM_PROFILING	4
 +#define CPU_PROFILING		1
 +#define SCHED_PROFILING		2
 +#define SLEEP_PROFILING		3
 +#define KVM_PROFILING		4
 +#define PREEMPT_PROFILING	5

  struct proc_dir_entry;
  struct pt_regs;
 @@ -36,6 +37,8 @@ enum profile_type {
  	PROFILE_MUNMAP
  };

 +extern int prof_pid;
 +
  #ifdef CONFIG_PROFILING

  extern int prof_on __read_mostly;
 diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
 index c5da749..9eb17f9 100644
 --- a/include/linux/radix-tree.h
 +++ b/include/linux/radix-tree.h
 @@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
  				unsigned long index, unsigned long max_scan);
  unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
  				unsigned long index, unsigned long max_scan);
 +/*
 + * On a mutex based kernel we can freely schedule within the radix code:
 + */
 +#ifdef CONFIG_PREEMPT_RT
 +static inline int radix_tree_preload(gfp_t gfp_mask)
 +{
 +	return 0;
 +}
 +#else
  int radix_tree_preload(gfp_t gfp_mask);
 +#endif
 +
  void radix_tree_init(void);
  void *radix_tree_tag_set(struct radix_tree_root *root,
  			unsigned long index, unsigned int tag);
 @@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);

  static inline void radix_tree_preload_end(void)
  {
 +#ifndef CONFIG_PREEMPT_RT
  	preempt_enable();
 +#endif
  }

  #endif /* _LINUX_RADIX_TREE_H */
 diff --git a/include/linux/smp.h b/include/linux/smp.h
 index 7a0570e..c55f2ca 100644
 --- a/include/linux/smp.h
 +++ b/include/linux/smp.h
 @@ -50,6 +50,16 @@ extern void smp_send_stop(void);
   */
  extern void smp_send_reschedule(int cpu);

 +/*
 + * trigger a reschedule on all other CPUs:
 + */
 +extern void smp_send_reschedule_allbutself(void);
 +
 +/*
 + * trigger a reschedule on all other CPUs:
 + */
 +extern void smp_send_reschedule_allbutself(void);
 +

  /*
   * Prepare machine for booting other CPUs.
 @@ -136,6 +146,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
  		0;				\
  	})
  static inline void smp_send_reschedule(int cpu) { }
 +static inline void smp_send_reschedule_allbutself(void) { }
  #define num_booting_cpus()			1
  #define smp_prepare_boot_cpu()			do {} while (0)
  #define smp_call_function_many(mask, func, info, wait) \
 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
 index 9466e86..ec218ce 100644
 --- a/include/linux/workqueue.h
 +++ b/include/linux/workqueue.h
 @@ -211,6 +211,9 @@ __create_workqueue_key(const char *name, int singlethread,
  #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
  #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)

 +extern void set_workqueue_prio(struct workqueue_struct *wq, int policy,
 +			       int rt_priority, int nice);
 +
  extern void destroy_workqueue(struct workqueue_struct *wq);

  extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
 diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
 index 7c20f45..f4602f8 100644
 --- a/kernel/Kconfig.preempt
 +++ b/kernel/Kconfig.preempt
 @@ -1,14 +1,13 @@
 -
  choice
 -	prompt "Preemption Model"
 -	default PREEMPT_NONE
 +	prompt "Preemption Mode"
 +	default PREEMPT_RT

  config PREEMPT_NONE
  	bool "No Forced Preemption (Server)"
  	help
 -	  This is the traditional Linux preemption model, geared towards
 +	  This is the traditional Linux preemption model geared towards
  	  throughput. It will still provide good latencies most of the
 -	  time, but there are no guarantees and occasional longer delays
 +	  time but there are no guarantees and occasional long delays
  	  are possible.

  	  Select this option if you are building a kernel for a server or
 @@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY
  	help
  	  This option reduces the latency of the kernel by adding more
  	  "explicit preemption points" to the kernel code. These new
 -	  preemption points have been selected to reduce the maximum
 +	  preemption points have been selected to minimize the maximum
  	  latency of rescheduling, providing faster application reactions,
  	  at the cost of slightly lower throughput.

 @@ -33,38 +32,73 @@ config PREEMPT_VOLUNTARY

  	  Select this if you are building a kernel for a desktop system.

 -config PREEMPT
 +config PREEMPT_DESKTOP
  	bool "Preemptible Kernel (Low-Latency Desktop)"
  	help
  	  This option reduces the latency of the kernel by making
 -	  all kernel code (that is not executing in a critical section)
 +	  all kernel code that is not executing in a critical section
  	  preemptible.  This allows reaction to interactive events by
  	  permitting a low priority process to be preempted involuntarily
  	  even if it is in kernel mode executing a system call and would
 -	  otherwise not be about to reach a natural preemption point.
 -	  This allows applications to run more 'smoothly' even when the
 -	  system is under load, at the cost of slightly lower throughput
 -	  and a slight runtime overhead to kernel code.
 +	  otherwise not about to reach a preemption point.  This allows
 +	  applications to run more 'smoothly' even when the system is
 +	  under load, at the cost of slighly lower throughput and a
 +	  slight runtime overhead to kernel code.
 +
 +	  (According to profiles, when this mode is selected then even
 +	  during kernel-intense workloads the system is in an immediately
 +	  preemptible state more than 50% of the time.)

  	  Select this if you are building a kernel for a desktop or
  	  embedded system with latency requirements in the milliseconds
  	  range.

 +config PREEMPT_RT
 +	bool "Complete Preemption (Real-Time)"
 +	select PREEMPT_SOFTIRQS
 +	select PREEMPT_HARDIRQS
 +	select PREEMPT_RCU
 +	select RT_MUTEXES
 +	help
 +	  This option further reduces the scheduling latency of the
 +	  kernel by replacing almost every spinlock used by the kernel
 +	  with preemptible mutexes and thus making all but the most
 +	  critical kernel code involuntarily preemptible. The remaining
 +	  handful of lowlevel non-preemptible codepaths are short and
 +	  have a deterministic latency of a couple of tens of
 +	  microseconds (depending on the hardware).  This also allows
 +	  applications to run more 'smoothly' even when the system is
 +	  under load, at the cost of lower throughput and runtime
 +	  overhead to kernel code.
 +
 +	  (According to profiles, when this mode is selected then even
 +	  during kernel-intense workloads the system is in an immediately
 +	  preemptible state more than 95% of the time.)
 +
 +	  Select this if you are building a kernel for a desktop,
 +	  embedded or real-time system with guaranteed latency
 +	  requirements of 100 usecs or lower.
 +
  endchoice

 +config PREEMPT
 +	bool
 +	default y
 +	depends on PREEMPT_DESKTOP || PREEMPT_RT
 +
  config PREEMPT_SOFTIRQS
  	bool "Thread Softirqs"
  	default n
  #	depends on PREEMPT
  	help
  	  This option reduces the latency of the kernel by 'threading'
 -	  soft interrupts. This means that all softirqs will execute
 -	  in softirqd's context. While this helps latency, it can also
 -	  reduce performance.
 +          soft interrupts. This means that all softirqs will execute
 +          in softirqd's context. While this helps latency, it can also
 +          reduce performance.

 -	  The threading of softirqs can also be controlled via
 -	  /proc/sys/kernel/softirq_preemption runtime flag and the
 -	  sofirq-preempt=0/1 boot-time option.
 +          The threading of softirqs can also be controlled via
 +          /proc/sys/kernel/softirq_preemption runtime flag and the
 +          sofirq-preempt=0/1 boot-time option.

  	  Say N if you are unsure.

 @@ -75,14 +109,14 @@ config PREEMPT_HARDIRQS
  	select PREEMPT_SOFTIRQS
  	help
  	  This option reduces the latency of the kernel by 'threading'
 -	  hardirqs. This means that all (or selected) hardirqs will run
 -	  in their own kernel thread context. While this helps latency,
 -	  this feature can also reduce performance.
 -
 -	  The threading of hardirqs can also be controlled via the
 -	  /proc/sys/kernel/hardirq_preemption runtime flag and the
 -	  hardirq-preempt=0/1 boot-time option. Per-irq threading can
 -	  be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded
 -	  runtime flags.
 +          hardirqs. This means that all (or selected) hardirqs will run
 +          in their own kernel thread context. While this helps latency,
 +          this feature can also reduce performance.
 +
 +          The threading of hardirqs can also be controlled via the
 +          /proc/sys/kernel/hardirq_preemption runtime flag and the
 +          hardirq-preempt=0/1 boot-time option. Per-irq threading can
 +          be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded
 +          runtime flags.

  	  Say N if you are unsure.
 diff --git a/kernel/exit.c b/kernel/exit.c
 index 309df57..adeffd2 100644
 --- a/kernel/exit.c
 +++ b/kernel/exit.c
 @@ -69,7 +69,9 @@ static void __unhash_process(struct task_struct *p)

  		list_del_rcu(&p->tasks);
  		list_del_init(&p->sibling);
 +		preempt_disable();
  		__get_cpu_var(process_counts)--;
 +		preempt_enable();
  	}
  	list_del_rcu(&p->thread_group);
  }
 @@ -694,9 +696,11 @@ static void exit_mm(struct task_struct * tsk)
  	task_lock(tsk);
  	tsk->mm = NULL;
  	up_read(&mm->mmap_sem);
 +	preempt_disable(); // FIXME
  	enter_lazy_tlb(mm, current);
  	/* We don't want this task to be frozen prematurely */
  	clear_freeze_flag(tsk);
 +	preempt_enable();
  	task_unlock(tsk);
  	mm_update_next_owner(mm);
  	mmput(mm);
 @@ -1501,6 +1505,9 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
  				struct task_struct *p)
  {
  	int ret = eligible_child(wo, p);
 +
 +	BUG_ON(!atomic_read(&p->usage));
 +
  	if (!ret)
  		return ret;

 diff --git a/kernel/fork.c b/kernel/fork.c
 index c49f839..30086f9 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -186,6 +186,16 @@ void __put_task_struct(struct task_struct *tsk)
  		free_task(tsk);
  }

 +#ifdef CONFIG_PREEMPT_RT
 +void __put_task_struct_cb(struct rcu_head *rhp)
 +{
 +	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 +
 +	__put_task_struct(tsk);
 +
 +}
 +#endif
 +
  /*
   * macro override instead of weak attribute alias, to workaround
   * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
 diff --git a/kernel/notifier.c b/kernel/notifier.c
 index 2488ba7..88d65e6 100644
 --- a/kernel/notifier.c
 +++ b/kernel/notifier.c
 @@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl,
   *	@returns:	notifier_call_chain returns the value returned by the
   *			last notifier function called.
   */
 -static int __kprobes notifier_call_chain(struct notifier_block **nl,
 +static int __kprobes notrace notifier_call_chain(struct notifier_block **nl,
  					unsigned long val, void *v,
  					int nr_to_call,	int *nr_calls)
  {
 @@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
  	 * not yet working and interrupts must remain disabled.  At
  	 * such times we must not call down_write().
  	 */
 -	if (unlikely(system_state == SYSTEM_BOOTING))
 +	if (unlikely(system_state < SYSTEM_RUNNING))
  		return notifier_chain_register(&nh->head, n);

  	down_write(&nh->rwsem);
 diff --git a/kernel/signal.c b/kernel/signal.c
 index 5c2181b..9dda83b 100644
 --- a/kernel/signal.c
 +++ b/kernel/signal.c
 @@ -949,7 +949,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,

  	trace_signal_generate(sig, info, t);

 +#ifdef CONFIG_SMP
  	assert_spin_locked(&t->sighand->siglock);
 +#endif

  	if (!prepare_signal(sig, t, from_ancestor_ns))
  		return 0;
 @@ -1710,15 +1712,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
  	read_lock(&tasklist_lock);
  	if (may_ptrace_stop()) {
  		do_notify_parent_cldstop(current, CLD_TRAPPED);
 -		/*
 -		 * Don't want to allow preemption here, because
 -		 * sys_ptrace() needs this task to be inactive.
 -		 *
 -		 * XXX: implement read_unlock_no_resched().
 -		 */
 -		preempt_disable();
  		read_unlock(&tasklist_lock);
 -		preempt_enable_and_schedule();
 +		schedule();
  	} else {
  		/*
  		 * By the time we got the lock, our tracer went away.
 diff --git a/kernel/softirq.c b/kernel/softirq.c
 index 31db011..b021c2d 100644
 --- a/kernel/softirq.c
 +++ b/kernel/softirq.c
 @@ -20,6 +20,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/interrupt.h>
  #include <linux/init.h>
 +#include <linux/delay.h>
  #include <linux/mm.h>
  #include <linux/notifier.h>
  #include <linux/percpu.h>
 @@ -106,6 +107,8 @@ static void trigger_softirqs(void)
  	}
  }

 +#ifndef CONFIG_PREEMPT_RT
 +
  /*
   * This one is for softirq.c-internal use,
   * where hardirqs are disabled legitimately:
 @@ -207,6 +210,8 @@ void local_bh_enable_ip(unsigned long ip)
  }
  EXPORT_SYMBOL(local_bh_enable_ip);

 +#endif
 +
  /*
   * We restart softirq processing MAX_SOFTIRQ_RESTART times,
   * and we fall back to softirqd after that.
 @@ -606,7 +611,7 @@ void tasklet_kill(struct tasklet_struct *t)

  	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  		do {
 -			yield();
 +			msleep(1);
  		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
  	}
  	tasklet_unlock_wait(t);
 @@ -1057,6 +1062,11 @@ int softirq_preemption = 1;

  EXPORT_SYMBOL(softirq_preemption);

 +/*
 + * Real-Time Preemption depends on softirq threading:
 + */
 +#ifndef CONFIG_PREEMPT_RT
 +
  static int __init softirq_preempt_setup (char *str)
  {
  	if (!strncmp(str, "off", 3))
 @@ -1070,7 +1080,7 @@ static int __init softirq_preempt_setup (char *str)
  }

  __setup("softirq-preempt=", softirq_preempt_setup);
 -
 +#endif
  #endif

  #ifdef CONFIG_SMP
 diff --git a/kernel/workqueue.c b/kernel/workqueue.c
 index dee4865..fb74031 100644
 --- a/kernel/workqueue.c
 +++ b/kernel/workqueue.c
 @@ -26,6 +26,7 @@
  #include <linux/slab.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
 +#include <linux/syscalls.h>
  #include <linux/kthread.h>
  #include <linux/hardirq.h>
  #include <linux/mempolicy.h>
 @@ -36,6 +37,8 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/workqueue.h>

 +#include <asm/uaccess.h>
 +
  /*
   * The per-CPU workqueue (if single thread, we always use the first
   * possible cpu).
 @@ -270,13 +273,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
   *
   * We queue the work to the CPU on which it was submitted, but if the CPU dies
   * it can be processed by another CPU.
 + *
 + * Especially no such guarantee on PREEMPT_RT.
   */
  int queue_work(struct workqueue_struct *wq, struct work_struct *work)
  {
 -	int ret;
 +	int ret = 0, cpu = raw_smp_processor_id();

 -	ret = queue_work_on(get_cpu(), wq, work);
 -	put_cpu();
 +	ret = queue_work_on(cpu, wq, work);

  	return ret;
  }
 @@ -774,9 +778,9 @@ void flush_delayed_work(struct delayed_work *dwork)
  {
  	if (del_timer_sync(&dwork->timer)) {
  		struct cpu_workqueue_struct *cwq;
 -		cwq = wq_per_cpu(keventd_wq, get_cpu());
 +		int cpu = raw_smp_processor_id();
 +		cwq = wq_per_cpu(keventd_wq, cpu);
  		__queue_work(cwq, &dwork->work);
 -		put_cpu();
  	}
  	flush_work(&dwork->work);
  }
 @@ -1044,6 +1048,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
  	cwq->thread = NULL;
  }

 +void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu,
 +			       int policy, int rt_priority, int nice)
 +{
 +	struct sched_param param = { .sched_priority = rt_priority };
 +	struct cpu_workqueue_struct *cwq;
 +	mm_segment_t oldfs = get_fs();
 +	struct task_struct *p;
 +	unsigned long flags;
 +	int ret;
 +
 +	cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 +	spin_lock_irqsave(&cwq->lock, flags);
 +	p = cwq->thread;
 +	spin_unlock_irqrestore(&cwq->lock, flags);
 +
 +	set_user_nice(p, nice);
 +
 +	set_fs(KERNEL_DS);
 +	ret = sys_sched_setscheduler(p->pid, policy, &param);
 +	set_fs(oldfs);
 +
 +	WARN_ON(ret);
 +}
 +
 +void set_workqueue_prio(struct workqueue_struct *wq, int policy,
 +			int rt_priority, int nice)
 +{
 +	int cpu;
 +
 +	/* We don't need the distraction of CPUs appearing and vanishing. */
 +	get_online_cpus();
 +	spin_lock(&workqueue_lock);
 +	if (is_wq_single_threaded(wq))
 +		set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice);
 +	else {
 +		for_each_online_cpu(cpu)
 +			set_workqueue_thread_prio(wq, cpu, policy,
 +						  rt_priority, nice);
 +	}
 +	spin_unlock(&workqueue_lock);
 +	put_online_cpus();
 +}
 +
  /**
   * destroy_workqueue - safely terminate a workqueue
   * @wq: target workqueue
 @@ -1176,4 +1223,5 @@ void __init init_workqueues(void)
  	hotcpu_notifier(workqueue_cpu_callback, 0);
  	keventd_wq = create_workqueue("events");
  	BUG_ON(!keventd_wq);
 +	set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20);
  }
 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
 index 5e3407d..aa0142b 100644
 --- a/lib/Kconfig.debug
 +++ b/lib/Kconfig.debug
 @@ -415,6 +415,8 @@ config DEBUG_RT_MUTEXES
  	help
  	 This allows rt mutex semantics violations and rt mutex related
  	 deadlocks (lockups) to be detected and reported automatically.
 +	 When realtime preemption is enabled this includes spinlocks,
 +	 rwlocks, mutexes and (rw)semaphores

  config DEBUG_PI_LIST
  	bool
 @@ -438,7 +440,7 @@ config DEBUG_SPINLOCK

  config DEBUG_MUTEXES
  	bool "Mutex debugging: basic checks"
 -	depends on DEBUG_KERNEL
 +	depends on DEBUG_KERNEL && !PREEMPT_RT
  	help
  	 This feature allows mutex semantics violations to be detected and
  	 reported.
 diff --git a/lib/Makefile b/lib/Makefile
 index 3b0b4a6..2d21722 100644
 --- a/lib/Makefile
 +++ b/lib/Makefile
 @@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
  obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
  obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 +obj-$(CONFIG_PREEMPT_RT) += plist.o
 +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
  lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
  lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
 index 79f9fac..a77306a 100644
 --- a/lib/kernel_lock.c
 +++ b/lib/kernel_lock.c
 @@ -38,6 +38,8 @@ struct semaphore kernel_sem;
   * about recursion, both due to the down() and due to the enabling of
   * preemption. schedule() will re-check the preemption flag after
   * reacquiring the semaphore.
 + *
 + * Called with interrupts disabled.
   */
  int __lockfunc __reacquire_kernel_lock(void)
  {
 @@ -76,7 +78,11 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line)
  		 * No recursion worries - we set up lock_depth _after_
  		 */
  		down(&kernel_sem);
 +#ifdef CONFIG_DEBUG_RT_MUTEXES
 +		current->last_kernel_lock = __builtin_return_address(0);
 +#endif
  	}
 +
  	current->lock_depth = depth;
  }

 @@ -84,9 +90,12 @@ void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
  {
  	BUG_ON(current->lock_depth < 0);

 -	if (likely(--current->lock_depth < 0))
 +	if (likely(--current->lock_depth < 0)) {
 +#ifdef CONFIG_DEBUG_RT_MUTEXES
 +		current->last_kernel_lock = NULL;
 +#endif
  		up(&kernel_sem);
 -
 +	}
  	trace_unlock_kernel(func, file, line);
  }

 diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
 index 619313e..65e7eab 100644
 --- a/lib/locking-selftest.c
 +++ b/lib/locking-selftest.c
 @@ -158,7 +158,7 @@ static void init_shared_classes(void)
  		local_bh_disable();		\
  		local_irq_disable();		\
  		lockdep_softirq_enter();	\
 -		WARN_ON(!in_softirq());
 +		/* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */

  #define SOFTIRQ_EXIT()				\
  		lockdep_softirq_exit();		\
 @@ -550,6 +550,11 @@ GENERATE_TESTCASE(init_held_rsem)
  #undef E

  /*
 + * FIXME: turns these into raw-spinlock tests on -rt
 + */
 +#ifndef CONFIG_PREEMPT_RT
 +
 +/*
   * locking an irq-safe lock with irqs enabled:
   */
  #define E1()				\
 @@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
  #include "locking-selftest-softirq.h"
  // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)

 +#endif /* !CONFIG_PREEMPT_RT */
 +
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
  # define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
 @@ -998,7 +1005,7 @@ static inline void print_testname(const char *testname)

  #define DO_TESTCASE_1(desc, name, nr)				\
  	print_testname(desc"/"#nr);				\
 -	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
 +	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
  	printk("\n");

  #define DO_TESTCASE_1B(desc, name, nr)				\
 @@ -1006,17 +1013,17 @@ static inline void print_testname(const char *testname)
  	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
  	printk("\n");

 -#define DO_TESTCASE_3(desc, name, nr)				\
 -	print_testname(desc"/"#nr);				\
 -	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
 -	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 +#define DO_TESTCASE_3(desc, name, nr)					\
 +	print_testname(desc"/"#nr);					\
 +	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);		\
 +	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
  	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
  	printk("\n");

 -#define DO_TESTCASE_3RW(desc, name, nr)				\
 -	print_testname(desc"/"#nr);				\
 +#define DO_TESTCASE_3RW(desc, name, nr)					\
 +	print_testname(desc"/"#nr);					\
  	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
 -	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
 +	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
  	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
  	printk("\n");

 @@ -1047,7 +1054,7 @@ static inline void print_testname(const char *testname)
  	print_testname(desc);					\
  	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
  	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
 -	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
 +	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);	\
  	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
  	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
  	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
 @@ -1179,6 +1186,7 @@ void locking_selftest(void)
  	/*
  	 * irq-context testcases:
  	 */
 +#ifndef CONFIG_PREEMPT_RT
  	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
  	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
  	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
 @@ -1188,6 +1196,7 @@ void locking_selftest(void)

  	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  //	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
 +#endif

  	if (unexpected_testcase_failures) {
  		printk("-----------------------------------------------------------------\n");
 diff --git a/lib/radix-tree.c b/lib/radix-tree.c
 index 6b9670d..149f285 100644
 --- a/lib/radix-tree.c
 +++ b/lib/radix-tree.c
 @@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root)
  		 * succeed in getting a node here (and never reach
  		 * kmem_cache_alloc)
  		 */
 +		rtp = &get_cpu_var(radix_tree_preloads);
  		rtp = &__get_cpu_var(radix_tree_preloads);
  		if (rtp->nr) {
  			ret = rtp->nodes[rtp->nr - 1];
  			rtp->nodes[rtp->nr - 1] = NULL;
  			rtp->nr--;
  		}
 +		put_cpu_var(radix_tree_preloads);
  	}
  	if (ret == NULL)
  		ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 @@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node)
  	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  }

 +#ifndef CONFIG_PREEMPT_RT
 +
  /*
   * Load up this CPU's radix_tree_node buffer with sufficient objects to
   * ensure that the addition of a single element in the tree cannot fail.  On
 @@ -230,6 +234,8 @@ out:
  }
  EXPORT_SYMBOL(radix_tree_preload);

 +#endif
 +
  /*
   *	Return the maximum key which can be store into a
   *	radix tree with height HEIGHT.
 --
 1.7.1.1