patches-3.8.4-rt1.tar.xz

md5sum:
fb2132c1466f1e2c3fb35a57d512a305  patches-3.8.4-rt1.tar.xz

Announce:
 -----------------------------
 Dear RT Folks,

 I'm pleased to announce the 3.8.4-rt1 release.

 Again the credit for the heavy lifting goes to Sebastian Siewior, AKA
 bigeasy, who took up most of the work to get this out. He's on my
 companies engineering team and I hope you trust him as much as I do.

 Known issues:

       - SLUB behaves worse than SLAB on ARM
       - SLAB is broken on PowerPC

 Still we think that it's time to get out the stuff for broader
 testing. It's -rt1 and we need your help to get this stabilized.

 The RT patch against 3.8.4 can be found here:

   http://www.kernel.org/pub/linux/kernel/projects/rt/3.8/patch-3.8.4-rt1.patch.xz

 The split quilt queue is available at:

   http://www.kernel.org/pub/linux/kernel/projects/rt/3.8/patches-3.8.4-rt1.tar.xz

 Enjoy,

         tglx
 -----------------------------

http://marc.info/?l=linux-kernel&m=136399022624459&w=2

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch b/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch
new file mode 100644
index 0000000..cb44c1a
--- /dev/null
+++ b/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch
@@ -0,0 +1,36 @@
+From db28051c97688cfceaa9a2cea0202af74bb64fdc Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 19 Mar 2013 14:41:04 +0100
+Subject: [PATCH 1/2] kernel/srcu: merge common code into a macro
+
+DEFINE_SRCU() and DEFINE_STATIC_SRCU() does the same thing except for
+the "static" attribute. This patch moves the common pieces into
+_DEFINE_SRCU() which is used by the the former macros either adding the
+static attribute or not.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/srcu.h |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/include/linux/srcu.h
++++ b/include/linux/srcu.h
+@@ -102,13 +102,13 @@ void process_srcu(struct work_struct *wo
+  * define and init a srcu struct at build time.
+  * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+  */
+-#define DEFINE_SRCU(name)						\
++#define _DEFINE_SRCU(name, mod)						\
+ 	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+-	struct srcu_struct name = __SRCU_STRUCT_INIT(name);
++	mod struct srcu_struct name =					\
++				__SRCU_STRUCT_INIT(name);
+ 
+-#define DEFINE_STATIC_SRCU(name)					\
+-	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+-	static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
++#define DEFINE_SRCU(name)		_DEFINE_SRCU(name, )
++#define DEFINE_STATIC_SRCU(name)	_DEFINE_SRCU(name, static)
+ 
+ /**
+  * call_srcu() - Queue a callback for invocation after an SRCU grace period
diff --git a/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch b/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch
new file mode 100644
index 0000000..4a1597d
--- /dev/null
+++ b/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch
@@ -0,0 +1,83 @@
+From c31a0c052205e3ec24efc3fe18ef70c3e913f2d4 Mon Sep 17 00:00:00 2001
+From: Stephen Warren <swarren@nvidia.com>
+Date: Mon, 11 Feb 2013 14:15:32 -0700
+Subject: [PATCH] of: fix recursive locking in of_get_next_available_child()
+
+of_get_next_available_child() acquires devtree_lock, then calls
+of_device_is_available() which calls of_get_property() which calls
+of_find_property() which tries to re-acquire devtree_lock, thus causing
+deadlock.
+
+To avoid this, create a new __of_device_is_available() which calls
+__of_get_property() instead, which calls __of_find_property(), which
+does not take the lock,. Update of_get_next_available_child() to call
+the new __of_device_is_available() since it already owns the lock.
+
+Signed-off-by: Stephen Warren <swarren@nvidia.com>
+Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
+---
+ drivers/of/base.c |   30 +++++++++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 5 deletions(-)
+
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -307,19 +307,19 @@ int of_machine_is_compatible(const char
+ EXPORT_SYMBOL(of_machine_is_compatible);
+ 
+ /**
+- *  of_device_is_available - check if a device is available for use
++ *  __of_device_is_available - check if a device is available for use
+  *
+- *  @device: Node to check for availability
++ *  @device: Node to check for availability, with locks already held
+  *
+  *  Returns 1 if the status property is absent or set to "okay" or "ok",
+  *  0 otherwise
+  */
+-int of_device_is_available(const struct device_node *device)
++static int __of_device_is_available(const struct device_node *device)
+ {
+ 	const char *status;
+ 	int statlen;
+ 
+-	status = of_get_property(device, "status", &statlen);
++	status = __of_get_property(device, "status", &statlen);
+ 	if (status == NULL)
+ 		return 1;
+ 
+@@ -330,6 +330,26 @@ int of_device_is_available(const struct
+ 
+ 	return 0;
+ }
++
++/**
++ *  of_device_is_available - check if a device is available for use
++ *
++ *  @device: Node to check for availability
++ *
++ *  Returns 1 if the status property is absent or set to "okay" or "ok",
++ *  0 otherwise
++ */
++int of_device_is_available(const struct device_node *device)
++{
++	unsigned long flags;
++	int res;
++
++	raw_spin_lock_irqsave(&devtree_lock, flags);
++	res = __of_device_is_available(device);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
++	return res;
++
++}
+ EXPORT_SYMBOL(of_device_is_available);
+ 
+ /**
+@@ -421,7 +441,7 @@ struct device_node *of_get_next_availabl
+ 	raw_spin_lock(&devtree_lock);
+ 	next = prev ? prev->sibling : node->child;
+ 	for (; next; next = next->sibling) {
+-		if (!of_device_is_available(next))
++		if (!__of_device_is_available(next))
+ 			continue;
+ 		if (of_node_get(next))
+ 			break;
diff --git a/patches/0002-kernel-SRCU-provide-a-static-initializer.patch b/patches/0002-kernel-SRCU-provide-a-static-initializer.patch
new file mode 100644
index 0000000..3914991
--- /dev/null
+++ b/patches/0002-kernel-SRCU-provide-a-static-initializer.patch
@@ -0,0 +1,100 @@
+From 3f09905a6a65ed4fcf8e664abf044c91b2ce7b27 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 19 Mar 2013 14:44:30 +0100
+Subject: [PATCH 2/2] kernel/SRCU: provide a static initializer
+
+There are macros for static initializer for the three out of four
+possible notifier types, that are:
+	ATOMIC_NOTIFIER_HEAD()
+	BLOCKING_NOTIFIER_HEAD()
+	RAW_NOTIFIER_HEAD()
+
+This patch provides a static initilizer for the forth type to make it
+complete.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/notifier.h |   26 +++++++++++++++++++++-----
+ include/linux/srcu.h     |    6 +++---
+ 2 files changed, 24 insertions(+), 8 deletions(-)
+
+--- a/include/linux/notifier.h
++++ b/include/linux/notifier.h
+@@ -42,9 +42,7 @@
+  * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
+  * As compensation, srcu_notifier_chain_unregister() is rather expensive.
+  * SRCU notifier chains should be used when the chain will be called very
+- * often but notifier_blocks will seldom be removed.  Also, SRCU notifier
+- * chains are slightly more difficult to use because they require special
+- * runtime initialization.
++ * often but notifier_blocks will seldom be removed.
+  */
+ 
+ struct notifier_block {
+@@ -85,7 +83,7 @@ struct srcu_notifier_head {
+ 		(name)->head = NULL;		\
+ 	} while (0)
+ 
+-/* srcu_notifier_heads must be initialized and cleaned up dynamically */
++/* srcu_notifier_heads must be cleaned up dynamically */
+ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
+ #define srcu_cleanup_notifier_head(name)	\
+ 		cleanup_srcu_struct(&(name)->srcu);
+@@ -98,7 +96,13 @@ extern void srcu_init_notifier_head(stru
+ 		.head = NULL }
+ #define RAW_NOTIFIER_INIT(name)	{				\
+ 		.head = NULL }
+-/* srcu_notifier_heads cannot be initialized statically */
++
++#define SRCU_NOTIFIER_INIT(name, pcpu)				\
++	{							\
++		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
++		.head = NULL,					\
++		.srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu),	\
++	}
+ 
+ #define ATOMIC_NOTIFIER_HEAD(name)				\
+ 	struct atomic_notifier_head name =			\
+@@ -110,6 +114,18 @@ extern void srcu_init_notifier_head(stru
+ 	struct raw_notifier_head name =				\
+ 		RAW_NOTIFIER_INIT(name)
+ 
++#define _SRCU_NOTIFIER_HEAD(name, mod)				\
++	static DEFINE_PER_CPU(struct srcu_struct_array,		\
++			name##_head_srcu_array);		\
++	mod struct srcu_notifier_head name =			\
++			SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
++
++#define SRCU_NOTIFIER_HEAD(name)				\
++	_SRCU_NOTIFIER_HEAD(name, )
++
++#define SRCU_NOTIFIER_HEAD_STATIC(name)				\
++	_SRCU_NOTIFIER_HEAD(name, static)
++
+ #ifdef __KERNEL__
+ 
+ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+--- a/include/linux/srcu.h
++++ b/include/linux/srcu.h
+@@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct
+ 
+ void process_srcu(struct work_struct *work);
+ 
+-#define __SRCU_STRUCT_INIT(name)					\
++#define __SRCU_STRUCT_INIT(name, pcpu_name)				\
+ 	{								\
+ 		.completed = -300,					\
+-		.per_cpu_ref = &name##_srcu_array,			\
++		.per_cpu_ref = &pcpu_name,				\
+ 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
+ 		.running = false,					\
+ 		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
+@@ -105,7 +105,7 @@ void process_srcu(struct work_struct *wo
+ #define _DEFINE_SRCU(name, mod)						\
+ 	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+ 	mod struct srcu_struct name =					\
+-				__SRCU_STRUCT_INIT(name);
++				__SRCU_STRUCT_INIT(name, name##_srcu_array);
+ 
+ #define DEFINE_SRCU(name)		_DEFINE_SRCU(name, )
+ #define DEFINE_STATIC_SRCU(name)	_DEFINE_SRCU(name, static)
diff --git a/patches/0002-x86-highmem-add-a-already-used-pte-check.patch b/patches/0002-x86-highmem-add-a-already-used-pte-check.patch
new file mode 100644
index 0000000..c6d8735
--- /dev/null
+++ b/patches/0002-x86-highmem-add-a-already-used-pte-check.patch
@@ -0,0 +1,23 @@
+From 65513f34449eedb6b84c24a3583266534c1627e4 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 11 Mar 2013 17:09:55 +0100
+Subject: [PATCH 2/6] x86/highmem: add a "already used pte" check
+
+This is a copy from kmap_atomic_prot().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/x86/mm/iomap_32.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/mm/iomap_32.c
++++ b/arch/x86/mm/iomap_32.c
+@@ -65,6 +65,8 @@ void *kmap_atomic_prot_pfn(unsigned long
+ 	type = kmap_atomic_idx_push();
+ 	idx = type + KM_TYPE_NR * smp_processor_id();
+ 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++	WARN_ON(!pte_none(*(kmap_pte - idx)));
++
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ 	current->kmap_pte[type] = pte;
+ #endif
diff --git a/patches/0003-arm-highmem-flush-tlb-on-unmap.patch b/patches/0003-arm-highmem-flush-tlb-on-unmap.patch
new file mode 100644
index 0000000..71a8b20
--- /dev/null
+++ b/patches/0003-arm-highmem-flush-tlb-on-unmap.patch
@@ -0,0 +1,28 @@
+From e2ca4d092d9c6e6b07b465b4d81da207bbcc7437 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 11 Mar 2013 21:37:27 +0100
+Subject: [PATCH 3/6] arm/highmem: flush tlb on unmap
+
+The tlb should be flushed on unmap and thus make the mapping entry
+invalid. This is only done in the non-debug case which does not look
+right.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm/mm/highmem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/mm/highmem.c
++++ b/arch/arm/mm/highmem.c
+@@ -95,10 +95,10 @@ void __kunmap_atomic(void *kvaddr)
+ 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ 		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+-		set_top_pte(vaddr, __pte(0));
+ #else
+ 		(void) idx;  /* to kill a warning */
+ #endif
++		set_top_pte(vaddr, __pte(0));
+ 		kmap_atomic_idx_pop();
+ 	} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
+ 		/* this address was obtained through kmap_high_get() */
diff --git a/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch b/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
new file mode 100644
index 0000000..8f6323f
--- /dev/null
+++ b/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
@@ -0,0 +1,44 @@
+From eef09918aff670a6162d2ae5fe87b393698ef57d Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 1 Mar 2013 11:17:42 +0100
+Subject: [PATCH 5/6] futex: Ensure lock/unlock symetry versus pi_lock and
+ hash bucket lock
+
+In exit_pi_state_list() we have the following locking construct:
+
+   spin_lock(&hb->lock);
+   raw_spin_lock_irq(&curr->pi_lock);
+
+   ...
+   spin_unlock(&hb->lock);
+
+In !RT this works, but on RT the migrate_enable() function which is
+called from spin_unlock() sees atomic context due to the held pi_lock
+and just decrements the migrate_disable_atomic counter of the
+task. Now the next call to migrate_disable() sees the counter being
+negative and issues a warning. That check should be in
+migrate_enable() already.
+
+Fix this by dropping pi_lock before unlocking hb->lock and reaquire
+pi_lock after that again. This is safe as the loop code reevaluates
+head again under the pi_lock.
+
+Reported-by: Yong Zhang <yong.zhang@windriver.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/futex.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -568,7 +568,9 @@ void exit_pi_state_list(struct task_stru
+ 		 * task still owns the PI-state:
+ 		 */
+ 		if (head->next != next) {
++			raw_spin_unlock_irq(&curr->pi_lock);
+ 			spin_unlock(&hb->lock);
++			raw_spin_lock_irq(&curr->pi_lock);
+ 			continue;
+ 		}
+ 
diff --git a/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch b/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch
new file mode 100644
index 0000000..38da041
--- /dev/null
+++ b/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch
@@ -0,0 +1,77 @@
+From b72b514282ffad0d665ea94932b968f388304079 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 19:01:05 +0100
+Subject: [PATCH] HACK: printk: drop the logbuf_lock more often
+
+The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
+with a "full" buffer after executing "dmesg" on the shell.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/printk.c |   27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1072,6 +1072,7 @@ static int syslog_print_all(char __user
+ {
+ 	char *text;
+ 	int len = 0;
++	int attempts = 0;
+ 
+ 	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ 	if (!text)
+@@ -1083,7 +1084,14 @@ static int syslog_print_all(char __user
+ 		u64 seq;
+ 		u32 idx;
+ 		enum log_flags prev;
+-
++		int num_msg;
++try_again:
++		attempts++;
++		if (attempts > 10) {
++			len = -EBUSY;
++			goto out;
++		}
++		num_msg = 0;
+ 		if (clear_seq < log_first_seq) {
+ 			/* messages are gone, move to first available one */
+ 			clear_seq = log_first_seq;
+@@ -1104,6 +1112,14 @@ static int syslog_print_all(char __user
+ 			prev = msg->flags;
+ 			idx = log_next(idx);
+ 			seq++;
++			num_msg++;
++			if (num_msg > 5) {
++				num_msg = 0;
++				raw_spin_unlock_irq(&logbuf_lock);
++				raw_spin_lock_irq(&logbuf_lock);
++				if (clear_seq < log_first_seq)
++					goto try_again;
++			}
+ 		}
+ 
+ 		/* move first record forward until length fits into the buffer */
+@@ -1117,6 +1133,14 @@ static int syslog_print_all(char __user
+ 			prev = msg->flags;
+ 			idx = log_next(idx);
+ 			seq++;
++			num_msg++;
++			if (num_msg > 5) {
++				num_msg = 0;
++				raw_spin_unlock_irq(&logbuf_lock);
++				raw_spin_lock_irq(&logbuf_lock);
++				if (clear_seq < log_first_seq)
++					goto try_again;
++			}
+ 		}
+ 
+ 		/* last message fitting into this dump */
+@@ -1158,6 +1182,7 @@ static int syslog_print_all(char __user
+ 		clear_seq = log_next_seq;
+ 		clear_idx = log_next_idx;
+ 	}
++out:
+ 	raw_spin_unlock_irq(&logbuf_lock);
+ 
+ 	kfree(text);
diff --git a/patches/acpi-use-local-irq-nort.patch b/patches/acpi-use-local-irq-nort.patch
new file mode 100644
index 0000000..f1dcb8c
--- /dev/null
+++ b/patches/acpi-use-local-irq-nort.patch
@@ -0,0 +1,25 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Jul 2009 22:54:51 +0200
+Subject: acpi: Do not disable interrupts on PREEMPT_RT
+
+Use the local_irq_*_nort() variants.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/include/asm/acpi.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/acpi.h
++++ b/arch/x86/include/asm/acpi.h
+@@ -51,8 +51,8 @@
+ 
+ #define ACPI_ASM_MACROS
+ #define BREAKPOINT3
+-#define ACPI_DISABLE_IRQS() local_irq_disable()
+-#define ACPI_ENABLE_IRQS()  local_irq_enable()
++#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
++#define ACPI_ENABLE_IRQS()  local_irq_enable_nort()
+ #define ACPI_FLUSH_CPU_CACHE()	wbinvd()
+ 
+ int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/patches/arch-use-pagefault-disabled.patch b/patches/arch-use-pagefault-disabled.patch
new file mode 100644
index 0000000..961cc0c
--- /dev/null
+++ b/patches/arch-use-pagefault-disabled.patch
@@ -0,0 +1,286 @@
+Subject: mm: Fixup all fault handlers to check current->pagefault_disable
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 17 Mar 2011 11:32:28 +0100
+
+Necessary for decoupling pagefault disable from preempt count.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/alpha/mm/fault.c      |    2 +-
+ arch/arm/mm/fault.c        |    2 +-
+ arch/avr32/mm/fault.c      |    3 ++-
+ arch/cris/mm/fault.c       |    2 +-
+ arch/frv/mm/fault.c        |    2 +-
+ arch/ia64/mm/fault.c       |    2 +-
+ arch/m32r/mm/fault.c       |    2 +-
+ arch/m68k/mm/fault.c       |    2 +-
+ arch/microblaze/mm/fault.c |    2 +-
+ arch/mips/mm/fault.c       |    2 +-
+ arch/mn10300/mm/fault.c    |    2 +-
+ arch/parisc/mm/fault.c     |    2 +-
+ arch/powerpc/mm/fault.c    |    2 +-
+ arch/s390/mm/fault.c       |    6 ++++--
+ arch/score/mm/fault.c      |    2 +-
+ arch/sh/mm/fault.c         |    2 +-
+ arch/sparc/mm/fault_32.c   |    2 +-
+ arch/sparc/mm/fault_64.c   |    2 +-
+ arch/tile/mm/fault.c       |    2 +-
+ arch/um/kernel/trap.c      |    2 +-
+ arch/x86/mm/fault.c        |    2 +-
+ arch/xtensa/mm/fault.c     |    2 +-
+ 22 files changed, 26 insertions(+), 23 deletions(-)
+
+--- a/arch/alpha/mm/fault.c
++++ b/arch/alpha/mm/fault.c
+@@ -108,7 +108,7 @@ do_page_fault(unsigned long address, uns
+ 
+ 	/* If we're in an interrupt context, or have no user context,
+ 	   we must not take the fault.  */
+-	if (!mm || in_atomic())
++	if (!mm || in_atomic() || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ #ifdef CONFIG_ALPHA_LARGE_VMALLOC
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -279,7 +279,7 @@ do_page_fault(unsigned long addr, unsign
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	/*
+--- a/arch/avr32/mm/fault.c
++++ b/arch/avr32/mm/fault.c
+@@ -81,7 +81,8 @@ asmlinkage void do_page_fault(unsigned l
+ 	 * If we're in an interrupt or have no user context, we must
+ 	 * not take the fault...
+ 	 */
+-	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
++	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM) ||
++	    current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	local_irq_enable();
+--- a/arch/cris/mm/fault.c
++++ b/arch/cris/mm/fault.c
+@@ -114,7 +114,7 @@ do_page_fault(unsigned long address, str
+ 	 * user context, we must not take the fault.
+ 	 */
+ 
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ retry:
+--- a/arch/frv/mm/fault.c
++++ b/arch/frv/mm/fault.c
+@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datamm
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	down_read(&mm->mmap_sem);
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -98,7 +98,7 @@ ia64_do_page_fault (unsigned long addres
+ 	/*
+ 	 * If we're in an interrupt or have no user context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ #ifdef CONFIG_VIRTUAL_MEM_MAP
+--- a/arch/m32r/mm/fault.c
++++ b/arch/m32r/mm/fault.c
+@@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_
+ 	 * If we're in an interrupt or have no user context or are running in an
+ 	 * atomic region then we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto bad_area_nosemaphore;
+ 
+ 	/* When running in the kernel we expect faults to occur only to
+--- a/arch/m68k/mm/fault.c
++++ b/arch/m68k/mm/fault.c
+@@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs,
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ retry:
+--- a/arch/microblaze/mm/fault.c
++++ b/arch/microblaze/mm/fault.c
+@@ -108,7 +108,7 @@ void do_page_fault(struct pt_regs *regs,
+ 	if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+ 		is_write = 0;
+ 
+-	if (unlikely(in_atomic() || !mm)) {
++	if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ 		if (kernel_mode(regs))
+ 			goto bad_area_nosemaphore;
+ 
+--- a/arch/mips/mm/fault.c
++++ b/arch/mips/mm/fault.c
+@@ -89,7 +89,7 @@ asmlinkage void __kprobes do_page_fault(
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto bad_area_nosemaphore;
+ 
+ retry:
+--- a/arch/mn10300/mm/fault.c
++++ b/arch/mn10300/mm/fault.c
+@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ retry:
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -176,7 +176,7 @@ void do_page_fault(struct pt_regs *regs,
+ 	unsigned long acc_type;
+ 	int fault;
+ 
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	down_read(&mm->mmap_sem);
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -259,7 +259,7 @@ int __kprobes do_page_fault(struct pt_re
+ 	if (!arch_irq_disabled_regs(regs))
+ 		local_irq_enable();
+ 
+-	if (in_atomic() || mm == NULL) {
++	if (in_atomic() || mm == NULL || current->pagefault_disabled) {
+ 		if (!user_mode(regs))
+ 			return SIGSEGV;
+ 		/* in_atomic() in user mode is really bad,
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -296,7 +296,8 @@ static inline int do_exception(struct pt
+ 	 * user context.
+ 	 */
+ 	fault = VM_FAULT_BADCONTEXT;
+-	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
++	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
++		    tsk->pagefault_disabled))
+ 		goto out;
+ 
+ 	address = trans_exc_code & __FAIL_ADDR_MASK;
+@@ -435,7 +436,8 @@ void __kprobes do_asce_exception(struct
+ 	clear_tsk_thread_flag(current, TIF_PER_TRAP);
+ 
+ 	trans_exc_code = regs->int_parm_long;
+-	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
++	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
++		     current->pagefault_disabled()));
+ 		goto no_context;
+ 
+ 	down_read(&mm->mmap_sem);
+--- a/arch/score/mm/fault.c
++++ b/arch/score/mm/fault.c
+@@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_
+ 	* If we're in an interrupt or have no user
+ 	* context, we must not take the fault..
+ 	*/
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto bad_area_nosemaphore;
+ 
+ 	down_read(&mm->mmap_sem);
+--- a/arch/sh/mm/fault.c
++++ b/arch/sh/mm/fault.c
+@@ -440,7 +440,7 @@ asmlinkage void __kprobes do_page_fault(
+ 	 * If we're in an interrupt, have no user context or are running
+ 	 * in an atomic region then we must not take the fault:
+ 	 */
+-	if (unlikely(in_atomic() || !mm)) {
++	if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ 		bad_area_nosemaphore(regs, error_code, address);
+ 		return;
+ 	}
+--- a/arch/sparc/mm/fault_32.c
++++ b/arch/sparc/mm/fault_32.c
+@@ -200,7 +200,7 @@ asmlinkage void do_sparc_fault(struct pt
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/sparc/mm/fault_64.c
++++ b/arch/sparc/mm/fault_64.c
+@@ -321,7 +321,7 @@ asmlinkage void __kprobes do_sparc64_fau
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_enabled)
+ 		goto intr_or_no_mm;
+ 
+ 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/tile/mm/fault.c
++++ b/arch/tile/mm/fault.c
+@@ -360,7 +360,7 @@ static int handle_page_fault(struct pt_r
+ 	 * If we're in an interrupt, have no user context or are running in an
+ 	 * atomic region then we must not take the fault.
+ 	 */
+-	if (in_atomic() || !mm) {
++	if (in_atomic() || !mm || current->pagefault_disabled) {
+ 		vma = NULL;  /* happy compiler */
+ 		goto bad_area_nosemaphore;
+ 	}
+--- a/arch/um/kernel/trap.c
++++ b/arch/um/kernel/trap.c
+@@ -39,7 +39,7 @@ int handle_page_fault(unsigned long addr
+ 	 * If the fault was during atomic operation, don't take the fault, just
+ 	 * fail.
+ 	 */
+-	if (in_atomic())
++	if (in_atomic() || current->pagefault_disabled)
+ 		goto out_nosemaphore;
+ 
+ retry:
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -1108,7 +1108,7 @@ __do_page_fault(struct pt_regs *regs, un
+ 	 * If we're in an interrupt, have no user context or are running
+ 	 * in an atomic region then we must not take the fault:
+ 	 */
+-	if (unlikely(in_atomic() || !mm)) {
++	if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ 		bad_area_nosemaphore(regs, error_code, address);
+ 		return;
+ 	}
+--- a/arch/xtensa/mm/fault.c
++++ b/arch/xtensa/mm/fault.c
+@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
+ 	/* If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm) {
++	if (in_atomic() || !mm || current->pagefault_disabled) {
+ 		bad_page_fault(regs, address, SIGSEGV);
+ 		return;
+ 	}
diff --git a/patches/arm-allow-irq-threading.patch b/patches/arm-allow-irq-threading.patch
new file mode 100644
index 0000000..ad9fa31
--- /dev/null
+++ b/patches/arm-allow-irq-threading.patch
@@ -0,0 +1,22 @@
+Subject: arm: Allow forced irq threading
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Jul 2011 13:15:20 +0200
+
+All timer interrupts and the perf interrupt are marked NO_THREAD, so
+its safe to allow forced interrupt threading.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -17,6 +17,7 @@ config ARM
+ 	select GENERIC_STRNCPY_FROM_USER
+ 	select GENERIC_STRNLEN_USER
+ 	select HARDIRQS_SW_RESEND
++	select IRQ_FORCED_THREADING
+ 	select HAVE_AOUT
+ 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+ 	select HAVE_ARCH_KGDB
diff --git a/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch b/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
new file mode 100644
index 0000000..38f059e
--- /dev/null
+++ b/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
@@ -0,0 +1,54 @@
+From: Benedikt Spranger <b.spranger@linutronix.de>
+Date: Sat, 6 Mar 2010 17:47:10 +0100
+Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused
+
+Setup and remove the interrupt handler in clock event mode selection.
+This avoids calling the (shared) interrupt handler when the device is
+not used.
+
+Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/arm/mach-at91/at91rm9200_time.c  |    1 +
+ arch/arm/mach-at91/at91sam926x_time.c |    5 ++++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/mach-at91/at91rm9200_time.c
++++ b/arch/arm/mach-at91/at91rm9200_time.c
+@@ -134,6 +134,7 @@ clkevt32k_mode(enum clock_event_mode mod
+ 		break;
+ 	case CLOCK_EVT_MODE_SHUTDOWN:
+ 	case CLOCK_EVT_MODE_UNUSED:
++		remove_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
+ 	case CLOCK_EVT_MODE_RESUME:
+ 		irqmask = 0;
+ 		break;
+--- a/arch/arm/mach-at91/at91sam926x_time.c
++++ b/arch/arm/mach-at91/at91sam926x_time.c
+@@ -77,7 +77,7 @@ static struct clocksource pit_clk = {
+ 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+ 
+-
++static struct irqaction at91sam926x_pit_irq;
+ /*
+  * Clockevent device:  interrupts every 1/HZ (== pit_cycles * MCK/16)
+  */
+@@ -86,6 +86,8 @@ pit_clkevt_mode(enum clock_event_mode mo
+ {
+ 	switch (mode) {
+ 	case CLOCK_EVT_MODE_PERIODIC:
++		/* Set up irq handler */
++		setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
+ 		/* update clocksource counter */
+ 		pit_cnt += pit_cycle * PIT_PICNT(pit_read(AT91_PIT_PIVR));
+ 		pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN
+@@ -98,6 +100,7 @@ pit_clkevt_mode(enum clock_event_mode mo
+ 	case CLOCK_EVT_MODE_UNUSED:
+ 		/* disable irq, leaving the clocksource active */
+ 		pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN);
++		remove_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
+ 		break;
+ 	case CLOCK_EVT_MODE_RESUME:
+ 		break;
diff --git a/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch b/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
new file mode 100644
index 0000000..b649cd4
--- /dev/null
+++ b/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
@@ -0,0 +1,32 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 1 May 2010 18:29:35 +0200
+Subject: ARM: at91: tclib: Default to tclib timer for RT
+
+RT is not too happy about the shared timer interrupt in AT91
+devices. Default to tclib timer for RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/misc/Kconfig |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -63,6 +63,7 @@ config ATMEL_PWM
+ config ATMEL_TCLIB
+ 	bool "Atmel AT32/AT91 Timer/Counter Library"
+ 	depends on (AVR32 || ARCH_AT91)
++	default y if PREEMPT_RT_FULL
+ 	help
+ 	  Select this if you want a library to allocate the Timer/Counter
+ 	  blocks found on many Atmel processors.  This facilitates using
+@@ -95,7 +96,7 @@ config ATMEL_TCB_CLKSRC_BLOCK
+ config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ 	bool "TC Block use 32 KiHz clock"
+ 	depends on ATMEL_TCB_CLKSRC
+-	default y
++	default y if !PREEMPT_RT_FULL
+ 	help
+ 	  Select this to use 32 KiHz base clock rate as TC block clock
+ 	  source for clock events.
diff --git a/patches/arm-convert-boot-lock-to-raw.patch b/patches/arm-convert-boot-lock-to-raw.patch
new file mode 100644
index 0000000..91be8d9
--- /dev/null
+++ b/patches/arm-convert-boot-lock-to-raw.patch
@@ -0,0 +1,279 @@
+Subject: preempt-rt: Convert arm boot_lock to raw
+From: Frank Rowand <frank.rowand@am.sony.com>
+Date: Mon, 19 Sep 2011 14:51:14 -0700
+
+
+The arm boot_lock is used by the secondary processor startup code.  The locking
+task is the idle thread, which has idle->sched_class == &idle_sched_class.
+idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
+lock, the attempt to wake it when the lock becomes available will fail:
+
+try_to_wake_up()
+   ...
+      activate_task()
+         enqueue_task()
+            p->sched_class->enqueue_task(rq, p, flags)
+
+Fix by converting boot_lock to a raw spin lock.
+
+Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
+Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/mach-exynos/platsmp.c    |   12 ++++++------
+ arch/arm/mach-msm/platsmp.c       |   10 +++++-----
+ arch/arm/mach-omap2/omap-smp.c    |   10 +++++-----
+ arch/arm/mach-spear13xx/platsmp.c |   10 +++++-----
+ arch/arm/mach-ux500/platsmp.c     |   10 +++++-----
+ arch/arm/plat-versatile/platsmp.c |   10 +++++-----
+ 6 files changed, 31 insertions(+), 31 deletions(-)
+
+--- a/arch/arm/mach-exynos/platsmp.c
++++ b/arch/arm/mach-exynos/platsmp.c
+@@ -71,7 +71,7 @@ static void __iomem *scu_base_addr(void)
+ 	return (void __iomem *)(S5P_VA_SCU);
+ }
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ static void __cpuinit exynos_secondary_init(unsigned int cpu)
+ {
+@@ -91,8 +91,8 @@ static void __cpuinit exynos_secondary_i
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -104,7 +104,7 @@ static int __cpuinit exynos_boot_seconda
+ 	 * Set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * The secondary processor is waiting to be released from
+@@ -133,7 +133,7 @@ static int __cpuinit exynos_boot_seconda
+ 
+ 		if (timeout == 0) {
+ 			printk(KERN_ERR "cpu1 power enable failed");
+-			spin_unlock(&boot_lock);
++			raw_spin_unlock(&boot_lock);
+ 			return -ETIMEDOUT;
+ 		}
+ 	}
+@@ -161,7 +161,7 @@ static int __cpuinit exynos_boot_seconda
+ 	 * now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-msm/platsmp.c
++++ b/arch/arm/mach-msm/platsmp.c
+@@ -31,7 +31,7 @@
+ 
+ extern void msm_secondary_startup(void);
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ static inline int get_core_count(void)
+ {
+@@ -58,8 +58,8 @@ static void __cpuinit msm_secondary_init
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ static __cpuinit void prepare_cold_cpu(unsigned int cpu)
+@@ -96,7 +96,7 @@ static int __cpuinit msm_boot_secondary(
+ 	 * set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * The secondary processor is waiting to be released from
+@@ -130,7 +130,7 @@ static int __cpuinit msm_boot_secondary(
+ 	 * now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-omap2/omap-smp.c
++++ b/arch/arm/mach-omap2/omap-smp.c
+@@ -45,7 +45,7 @@ u16 pm44xx_errata;
+ /* SCU base address */
+ static void __iomem *scu_base;
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ void __iomem *omap4_get_scu_base(void)
+ {
+@@ -76,8 +76,8 @@ static void __cpuinit omap4_secondary_in
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -90,7 +90,7 @@ static int __cpuinit omap4_boot_secondar
+ 	 * Set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * Update the AuxCoreBoot0 with boot state for secondary core.
+@@ -163,7 +163,7 @@ static int __cpuinit omap4_boot_secondar
+ 	 * Now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return 0;
+ }
+--- a/arch/arm/mach-spear13xx/platsmp.c
++++ b/arch/arm/mach-spear13xx/platsmp.c
+@@ -21,7 +21,7 @@
+ #include <mach/spear.h>
+ #include <mach/generic.h>
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
+ 
+@@ -44,8 +44,8 @@ static void __cpuinit spear13xx_secondar
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -56,7 +56,7 @@ static int __cpuinit spear13xx_boot_seco
+ 	 * set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * The secondary processor is waiting to be released from
+@@ -83,7 +83,7 @@ static int __cpuinit spear13xx_boot_seco
+ 	 * now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-ux500/platsmp.c
++++ b/arch/arm/mach-ux500/platsmp.c
+@@ -50,7 +50,7 @@ static void __iomem *scu_base_addr(void)
+ 	return NULL;
+ }
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ static void __cpuinit ux500_secondary_init(unsigned int cpu)
+ {
+@@ -70,8 +70,8 @@ static void __cpuinit ux500_secondary_in
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -82,7 +82,7 @@ static int __cpuinit ux500_boot_secondar
+ 	 * set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * The secondary processor is waiting to be released from
+@@ -103,7 +103,7 @@ static int __cpuinit ux500_boot_secondar
+ 	 * now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/plat-versatile/platsmp.c
++++ b/arch/arm/plat-versatile/platsmp.c
+@@ -32,7 +32,7 @@ static void __cpuinit write_pen_release(
+ 	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+ }
+ 
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+ 
+ void __cpuinit versatile_secondary_init(unsigned int cpu)
+ {
+@@ -52,8 +52,8 @@ void __cpuinit versatile_secondary_init(
+ 	/*
+ 	 * Synchronise with the boot thread.
+ 	 */
+-	spin_lock(&boot_lock);
+-	spin_unlock(&boot_lock);
++	raw_spin_lock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ }
+ 
+ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -64,7 +64,7 @@ int __cpuinit versatile_boot_secondary(u
+ 	 * Set synchronisation state between this boot processor
+ 	 * and the secondary one
+ 	 */
+-	spin_lock(&boot_lock);
++	raw_spin_lock(&boot_lock);
+ 
+ 	/*
+ 	 * This is really belt and braces; we hold unintended secondary
+@@ -94,7 +94,7 @@ int __cpuinit versatile_boot_secondary(u
+ 	 * now the secondary core is starting up let it run its
+ 	 * calibrations, then wait for it to finish
+ 	 */
+-	spin_unlock(&boot_lock);
++	raw_spin_unlock(&boot_lock);
+ 
+ 	return pen_release != -1 ? -ENOSYS : 0;
+ }
diff --git a/patches/arm-disable-highmem-on-rt.patch b/patches/arm-disable-highmem-on-rt.patch
new file mode 100644
index 0000000..344b381
--- /dev/null
+++ b/patches/arm-disable-highmem-on-rt.patch
@@ -0,0 +1,20 @@
+Subject: arm-disable-highmem-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:09:28 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1752,7 +1752,7 @@ config HAVE_ARCH_PFN_VALID
+ 
+ config HIGHMEM
+ 	bool "High Memory Support"
+-	depends on MMU
++	depends on MMU && !PREEMPT_RT_FULL
+ 	help
+ 	  The address space of ARM processors is only 4 Gigabytes large
+ 	  and it has to accommodate user address space, kernel address
diff --git a/patches/arm-enable-highmem-for-rt.patch b/patches/arm-enable-highmem-for-rt.patch
new file mode 100644
index 0000000..e803c17
--- /dev/null
+++ b/patches/arm-enable-highmem-for-rt.patch
@@ -0,0 +1,140 @@
+Subject: arm-enable-highmem-for-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 13 Feb 2013 11:03:11 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig                 |    2 -
+ arch/arm/include/asm/switch_to.h |    9 ++++++++
+ arch/arm/mm/highmem.c            |   41 +++++++++++++++++++++++++++++++++++++--
+ include/linux/highmem.h          |    1 
+ 4 files changed, 50 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1752,7 +1752,7 @@ config HAVE_ARCH_PFN_VALID
+ 
+ config HIGHMEM
+ 	bool "High Memory Support"
+-	depends on MMU && !PREEMPT_RT_FULL
++	depends on MMU
+ 	help
+ 	  The address space of ARM processors is only 4 Gigabytes large
+ 	  and it has to accommodate user address space, kernel address
+--- a/arch/arm/include/asm/switch_to.h
++++ b/arch/arm/include/asm/switch_to.h
+@@ -3,6 +3,14 @@
+ 
+ #include <linux/thread_info.h>
+ 
++#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
++void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
++#else
++static inline void
++switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
++#endif
++
++
+ /*
+  * switch_to(prev, next) should switch from task `prev' to `next'
+  * `prev' will never be the same as `next'.  schedule() itself
+@@ -12,6 +20,7 @@ extern struct task_struct *__switch_to(s
+ 
+ #define switch_to(prev,next,last)					\
+ do {									\
++	switch_kmaps(prev, next);					\
+ 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
+ } while (0)
+ 
+--- a/arch/arm/mm/highmem.c
++++ b/arch/arm/mm/highmem.c
+@@ -38,6 +38,7 @@ EXPORT_SYMBOL(kunmap);
+ 
+ void *kmap_atomic(struct page *page)
+ {
++	pte_t pte = mk_pte(page, kmap_prot);
+ 	unsigned int idx;
+ 	unsigned long vaddr;
+ 	void *kmap;
+@@ -76,7 +77,10 @@ void *kmap_atomic(struct page *page)
+ 	 * in place, so the contained TLB flush ensures the TLB is updated
+ 	 * with the new mapping.
+ 	 */
+-	set_top_pte(vaddr, mk_pte(page, kmap_prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++	current->kmap_pte[type] = pte;
++#endif
++	set_top_pte(vaddr, pte);
+ 
+ 	return (void *)vaddr;
+ }
+@@ -93,6 +97,9 @@ void __kunmap_atomic(void *kvaddr)
+ 
+ 		if (cache_is_vivt())
+ 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
++#ifdef CONFIG_PREEMPT_RT_FULL
++		current->kmap_pte[type] = __pte(0);
++#endif
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ 		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ #else
+@@ -110,6 +117,7 @@ EXPORT_SYMBOL(__kunmap_atomic);
+ 
+ void *kmap_atomic_pfn(unsigned long pfn)
+ {
++	pte_t pte = pfn_pte(pfn, kmap_prot);
+ 	unsigned long vaddr;
+ 	int idx, type;
+ 
+@@ -121,7 +129,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ 	BUG_ON(!pte_none(get_top_pte(vaddr)));
+ #endif
+-	set_top_pte(vaddr, pfn_pte(pfn, kmap_prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++	current->kmap_pte[type] = pte;
++#endif
++	set_top_pte(vaddr, pte);
+ 
+ 	return (void *)vaddr;
+ }
+@@ -135,3 +146,29 @@ struct page *kmap_atomic_to_page(const v
+ 
+ 	return pte_page(get_top_pte(vaddr));
+ }
++
++#if defined CONFIG_PREEMPT_RT_FULL
++void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
++{
++	int i;
++
++	/*
++	 * Clear @prev's kmap_atomic mappings
++	 */
++	for (i = 0; i < prev_p->kmap_idx; i++) {
++		int idx = i + KM_TYPE_NR * smp_processor_id();
++
++		set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), __pte(0));
++	}
++	/*
++	 * Restore @next_p's kmap_atomic mappings
++	 */
++	for (i = 0; i < next_p->kmap_idx; i++) {
++		int idx = i + KM_TYPE_NR * smp_processor_id();
++
++		if (!pte_none(next_p->kmap_pte[i]))
++			set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx),
++					next_p->kmap_pte[i]);
++	}
++}
++#endif
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -7,6 +7,7 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+ #include <linux/hardirq.h>
++#include <linux/sched.h>
+ 
+ #include <asm/cacheflush.h>
+ 
diff --git a/patches/arm-mark-pmu-interupt-no-thread.patch b/patches/arm-mark-pmu-interupt-no-thread.patch
new file mode 100644
index 0000000..6f97ee0
--- /dev/null
+++ b/patches/arm-mark-pmu-interupt-no-thread.patch
@@ -0,0 +1,23 @@
+Subject: arm: Mark pmu interupt IRQF_NO_THREAD
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 16 Mar 2011 14:45:31 +0100
+
+PMU interrupts must not be threaded.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/kernel/perf_event_cpu.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/kernel/perf_event_cpu.c
++++ b/arch/arm/kernel/perf_event_cpu.c
+@@ -118,7 +118,8 @@ static int cpu_pmu_request_irq(struct ar
+ 			continue;
+ 		}
+ 
+-		err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
++		err = request_irq(irq, handler,
++				  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+ 				  cpu_pmu);
+ 		if (err) {
+ 			pr_err("unable to request IRQ%d for ARM PMU counters\n",
diff --git a/patches/arm-omap-make-wakeupgen_lock-raw.patch b/patches/arm-omap-make-wakeupgen_lock-raw.patch
new file mode 100644
index 0000000..74a258a
--- /dev/null
+++ b/patches/arm-omap-make-wakeupgen_lock-raw.patch
@@ -0,0 +1,62 @@
+Subject: arm-omap-make-wakeupgen_lock-raw.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 11 Apr 2012 11:26:38 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/mach-omap2/omap-wakeupgen.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/arch/arm/mach-omap2/omap-wakeupgen.c
++++ b/arch/arm/mach-omap2/omap-wakeupgen.c
+@@ -46,7 +46,7 @@
+ 
+ static void __iomem *wakeupgen_base;
+ static void __iomem *sar_base;
+-static DEFINE_SPINLOCK(wakeupgen_lock);
++static DEFINE_RAW_SPINLOCK(wakeupgen_lock);
+ static unsigned int irq_target_cpu[MAX_IRQS];
+ static unsigned int irq_banks = MAX_NR_REG_BANKS;
+ static unsigned int max_irqs = MAX_IRQS;
+@@ -134,9 +134,9 @@ static void wakeupgen_mask(struct irq_da
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&wakeupgen_lock, flags);
++	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ 	_wakeupgen_clear(d->irq, irq_target_cpu[d->irq]);
+-	spin_unlock_irqrestore(&wakeupgen_lock, flags);
++	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+ 
+ /*
+@@ -146,9 +146,9 @@ static void wakeupgen_unmask(struct irq_
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&wakeupgen_lock, flags);
++	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ 	_wakeupgen_set(d->irq, irq_target_cpu[d->irq]);
+-	spin_unlock_irqrestore(&wakeupgen_lock, flags);
++	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -189,7 +189,7 @@ static void wakeupgen_irqmask_all(unsign
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&wakeupgen_lock, flags);
++	raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ 	if (set) {
+ 		_wakeupgen_save_masks(cpu);
+ 		_wakeupgen_set_all(cpu, WKG_MASK_ALL);
+@@ -197,7 +197,7 @@ static void wakeupgen_irqmask_all(unsign
+ 		_wakeupgen_set_all(cpu, WKG_UNMASK_ALL);
+ 		_wakeupgen_restore_masks(cpu);
+ 	}
+-	spin_unlock_irqrestore(&wakeupgen_lock, flags);
++	raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+ #endif
+ 
diff --git a/patches/arm-preempt-lazy-support.patch b/patches/arm-preempt-lazy-support.patch
new file mode 100644
index 0000000..d1e990b
--- /dev/null
+++ b/patches/arm-preempt-lazy-support.patch
@@ -0,0 +1,103 @@
+Subject: arm-preempt-lazy-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 31 Oct 2012 12:04:11 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig                   |    1 +
+ arch/arm/include/asm/thread_info.h |    3 +++
+ arch/arm/kernel/asm-offsets.c      |    1 +
+ arch/arm/kernel/entry-armv.S       |   13 +++++++++++--
+ arch/arm/kernel/signal.c           |    3 ++-
+ 5 files changed, 18 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -47,6 +47,7 @@ config ARM
+ 	select HAVE_MEMBLOCK
+ 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
+ 	select HAVE_PERF_EVENTS
++	select HAVE_PREEMPT_LAZY
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+ 	select HAVE_SYSCALL_TRACEPOINTS
+ 	select HAVE_UID16
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -50,6 +50,7 @@ struct cpu_context_save {
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
++	int			preempt_lazy_count;	/* 0 => preemptable, <0 => bug */
+ 	mm_segment_t		addr_limit;	/* address limit */
+ 	struct task_struct	*task;		/* main task structure */
+ 	struct exec_domain	*exec_domain;	/* execution domain */
+@@ -148,6 +149,7 @@ extern int vfp_restore_user_hwstate(stru
+ #define TIF_SIGPENDING		0
+ #define TIF_NEED_RESCHED	1
+ #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
++#define TIF_NEED_RESCHED_LAZY	3
+ #define TIF_SYSCALL_TRACE	8
+ #define TIF_SYSCALL_AUDIT	9
+ #define TIF_SYSCALL_TRACEPOINT	10
+@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru
+ #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+ #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -50,6 +50,7 @@ int main(void)
+   BLANK();
+   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
+   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
++  DEFINE(TI_PREEMPT_LAZY,	offsetof(struct thread_info, preempt_lazy_count));
+   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
+   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
+   DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -216,11 +216,18 @@ __irq_svc:
+ #ifdef CONFIG_PREEMPT
+ 	get_thread_info tsk
+ 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
+-	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
+ 	teq	r8, #0				@ if preempt count != 0
++	bne	1f				@ return from exeption
++	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
++	tst	r0, #_TIF_NEED_RESCHED		@ if NEED_RESCHED is set
++	blne	svc_preempt			@ preempt!
++
++	ldr	r8, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
++	teq	r8, #0				@ if preempt lazy count != 0
+ 	movne	r0, #0				@ force flags to 0
+-	tst	r0, #_TIF_NEED_RESCHED
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	blne	svc_preempt
++1:
+ #endif
+ 
+ #ifdef CONFIG_TRACE_IRQFLAGS
+@@ -240,6 +247,8 @@ svc_preempt:
+ 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
+ 	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
+ 	tst	r0, #_TIF_NEED_RESCHED
++	bne	1b
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	moveq	pc, r8				@ go again
+ 	b	1b
+ #endif
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -638,7 +638,8 @@ asmlinkage int
+ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ {
+ 	do {
+-		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
++		if (likely(thread_flags & (_TIF_NEED_RESCHED |
++					   _TIF_NEED_RESCHED_LAZY))) {
+ 			schedule();
+ 		} else {
+ 			if (unlikely(!user_mode(regs)))
diff --git a/patches/ata-disable-interrupts-if-non-rt.patch b/patches/ata-disable-interrupts-if-non-rt.patch
new file mode 100644
index 0000000..6088cd8
--- /dev/null
+++ b/patches/ata-disable-interrupts-if-non-rt.patch
@@ -0,0 +1,64 @@
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Fri, 3 Jul 2009 08:44:29 -0500
+Subject: ata: Do not disable interrupts in ide code for preempt-rt
+
+Use the local_irq_*_nort variants.
+
+Signed-off-by: Steven Rostedt <srostedt@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/ata/libata-sff.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/ata/libata-sff.c
++++ b/drivers/ata/libata-sff.c
+@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(str
+ 	unsigned long flags;
+ 	unsigned int consumed;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	return consumed;
+ }
+@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_qu
+ 		unsigned long flags;
+ 
+ 		/* FIXME: use a bounce buffer */
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		buf = kmap_atomic(page);
+ 
+ 		/* do the actual data transfer */
+@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_qu
+ 				       do_write);
+ 
+ 		kunmap_atomic(buf);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	} else {
+ 		buf = page_address(page);
+ 		ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
+@@ -864,7 +864,7 @@ next_sg:
+ 		unsigned long flags;
+ 
+ 		/* FIXME: use bounce buffer */
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		buf = kmap_atomic(page);
+ 
+ 		/* do the actual data transfer */
+@@ -872,7 +872,7 @@ next_sg:
+ 								count, rw);
+ 
+ 		kunmap_atomic(buf);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	} else {
+ 		buf = page_address(page);
+ 		consumed = ap->ops->sff_data_xfer(dev,  buf + offset,
diff --git a/patches/block-shorten-interrupt-disabled-regions.patch b/patches/block-shorten-interrupt-disabled-regions.patch
new file mode 100644
index 0000000..910d5ac
--- /dev/null
+++ b/patches/block-shorten-interrupt-disabled-regions.patch
@@ -0,0 +1,97 @@
+Subject: block: Shorten interrupt disabled regions
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 22 Jun 2011 19:47:02 +0200
+
+Moving the blk_sched_flush_plug() call out of the interrupt/preempt
+disabled region in the scheduler allows us to replace
+local_irq_save/restore(flags) by local_irq_disable/enable() in
+blk_flush_plug().
+
+Now instead of doing this we disable interrupts explicitely when we
+lock the request_queue and reenable them when we drop the lock. That
+allows interrupts to be handled when the plug list contains requests
+for more than one queue.
+
+Aside of that this change makes the scope of the irq disabled region
+more obvious. The current code confused the hell out of me when
+looking at:
+
+ local_irq_save(flags);
+   spin_lock(q->queue_lock);
+   ...
+   queue_unplugged(q...);
+     scsi_request_fn();
+       spin_unlock(q->queue_lock);
+       spin_lock(shost->host_lock);
+       spin_unlock_irq(shost->host_lock);
+
+-------------------^^^ ????
+
+       spin_lock_irq(q->queue_lock);
+       spin_unlock(q->lock);
+ local_irq_restore(flags);
+
+Also add a comment to __blk_run_queue() documenting that
+q->request_fn() can drop q->queue_lock and reenable interrupts, but
+must return with q->queue_lock held and interrupts disabled.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ block/blk-core.c |   12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -2929,7 +2929,7 @@ static void queue_unplugged(struct reque
+ 		blk_run_queue_async(q);
+ 	else
+ 		__blk_run_queue(q);
+-	spin_unlock(q->queue_lock);
++	spin_unlock_irq(q->queue_lock);
+ }
+ 
+ static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
+@@ -2977,7 +2977,6 @@ EXPORT_SYMBOL(blk_check_plugged);
+ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
+ {
+ 	struct request_queue *q;
+-	unsigned long flags;
+ 	struct request *rq;
+ 	LIST_HEAD(list);
+ 	unsigned int depth;
+@@ -2998,11 +2997,6 @@ void blk_flush_plug_list(struct blk_plug
+ 	q = NULL;
+ 	depth = 0;
+ 
+-	/*
+-	 * Save and disable interrupts here, to avoid doing it for every
+-	 * queue lock we have to take.
+-	 */
+-	local_irq_save(flags);
+ 	while (!list_empty(&list)) {
+ 		rq = list_entry_rq(list.next);
+ 		list_del_init(&rq->queuelist);
+@@ -3015,7 +3009,7 @@ void blk_flush_plug_list(struct blk_plug
+ 				queue_unplugged(q, depth, from_schedule);
+ 			q = rq->q;
+ 			depth = 0;
+-			spin_lock(q->queue_lock);
++			spin_lock_irq(q->queue_lock);
+ 		}
+ 
+ 		/*
+@@ -3042,8 +3036,6 @@ void blk_flush_plug_list(struct blk_plug
+ 	 */
+ 	if (q)
+ 		queue_unplugged(q, depth, from_schedule);
+-
+-	local_irq_restore(flags);
+ }
+ 
+ void blk_finish_plug(struct blk_plug *plug)
diff --git a/patches/block-use-cpu-chill.patch b/patches/block-use-cpu-chill.patch
new file mode 100644
index 0000000..9237f69
--- /dev/null
+++ b/patches/block-use-cpu-chill.patch
@@ -0,0 +1,45 @@
+Subject: block: Use cpu_chill() for retry loops
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 20 Dec 2012 18:28:26 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Steven also observed a live lock when there was a
+concurrent priority boosting going on.
+
+Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ block/blk-ioc.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -8,6 +8,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+ #include <linux/slab.h>
++#include <linux/delay.h>
+ 
+ #include "blk.h"
+ 
+@@ -110,7 +111,7 @@ static void ioc_release_fn(struct work_s
+ 			spin_unlock(q->queue_lock);
+ 		} else {
+ 			spin_unlock_irqrestore(&ioc->lock, flags);
+-			cpu_relax();
++			cpu_chill();
+ 			spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ 		}
+ 	}
+@@ -188,7 +189,7 @@ retry:
+ 			spin_unlock(icq->q->queue_lock);
+ 		} else {
+ 			spin_unlock_irqrestore(&ioc->lock, flags);
+-			cpu_relax();
++			cpu_chill();
+ 			goto retry;
+ 		}
+ 	}
diff --git a/patches/bug-rt-dependend-variants.patch b/patches/bug-rt-dependend-variants.patch
new file mode 100644
index 0000000..5f7a1e3
--- /dev/null
+++ b/patches/bug-rt-dependend-variants.patch
@@ -0,0 +1,34 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:58 -0500
+Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/asm-generic/bug.h |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/include/asm-generic/bug.h
++++ b/include/asm-generic/bug.h
+@@ -202,6 +202,20 @@ extern void warn_slowpath_null(const cha
+ # define WARN_ON_SMP(x)			({0;})
+ #endif
+ 
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define BUG_ON_RT(c)			BUG_ON(c)
++# define BUG_ON_NONRT(c)		do { } while (0)
++# define WARN_ON_RT(condition)		WARN_ON(condition)
++# define WARN_ON_NONRT(condition)	do { } while (0)
++# define WARN_ON_ONCE_NONRT(condition)	do { } while (0)
++#else
++# define BUG_ON_RT(c)			do { } while (0)
++# define BUG_ON_NONRT(c)		BUG_ON(c)
++# define WARN_ON_RT(condition)		do { } while (0)
++# define WARN_ON_NONRT(condition)	WARN_ON(condition)
++# define WARN_ON_ONCE_NONRT(condition)	WARN_ON_ONCE(condition)
++#endif
++
+ #endif /* __ASSEMBLY__ */
+ 
+ #endif
diff --git a/patches/clocksource-tclib-allow-higher-clockrates.patch b/patches/clocksource-tclib-allow-higher-clockrates.patch
new file mode 100644
index 0000000..a4502c7
--- /dev/null
+++ b/patches/clocksource-tclib-allow-higher-clockrates.patch
@@ -0,0 +1,159 @@
+From: Benedikt Spranger <b.spranger@linutronix.de>
+Date: Mon, 8 Mar 2010 18:57:04 +0100
+Subject: clocksource: TCLIB: Allow higher clock rates for clock events
+
+As default the TCLIB uses the 32KiHz base clock rate for clock events.
+Add a compile time selection to allow higher clock resulution.
+
+Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/clocksource/tcb_clksrc.c |   44 +++++++++++++++++++++++----------------
+ drivers/misc/Kconfig             |   11 +++++++--
+ 2 files changed, 35 insertions(+), 20 deletions(-)
+
+--- a/drivers/clocksource/tcb_clksrc.c
++++ b/drivers/clocksource/tcb_clksrc.c
+@@ -23,8 +23,7 @@
+  *     this 32 bit free-running counter. the second channel is not used.
+  *
+  *   - The third channel may be used to provide a 16-bit clockevent
+- *     source, used in either periodic or oneshot mode.  This runs
+- *     at 32 KiHZ, and can handle delays of up to two seconds.
++ *     source, used in either periodic or oneshot mode.
+  *
+  * A boot clocksource and clockevent source are also currently needed,
+  * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
+@@ -74,6 +73,7 @@ static struct clocksource clksrc = {
+ struct tc_clkevt_device {
+ 	struct clock_event_device	clkevt;
+ 	struct clk			*clk;
++	u32				freq;
+ 	void __iomem			*regs;
+ };
+ 
+@@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_cl
+ 	return container_of(clkevt, struct tc_clkevt_device, clkevt);
+ }
+ 
+-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+- * because using one of the divided clocks would usually mean the
+- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+- *
+- * A divided clock could be good for high resolution timers, since
+- * 30.5 usec resolution can seem "low".
+- */
+ static u32 timer_clock;
+ 
+ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
+@@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mod
+ 	case CLOCK_EVT_MODE_PERIODIC:
+ 		clk_enable(tcd->clk);
+ 
+-		/* slow clock, count up to RC, then irq and restart */
++		/* count up to RC, then irq and restart */
+ 		__raw_writel(timer_clock
+ 				| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ 				regs + ATMEL_TC_REG(2, CMR));
+-		__raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
++		__raw_writel((tcd->freq + HZ/2)/HZ,
++			     tcaddr + ATMEL_TC_REG(2, RC));
+ 
+ 		/* Enable clock and interrupts on RC compare */
+ 		__raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
+@@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mod
+ 	case CLOCK_EVT_MODE_ONESHOT:
+ 		clk_enable(tcd->clk);
+ 
+-		/* slow clock, count up to RC, then irq and stop */
++		/* count up to RC, then irq and stop */
+ 		__raw_writel(timer_clock | ATMEL_TC_CPCSTOP
+ 				| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ 				regs + ATMEL_TC_REG(2, CMR));
+@@ -158,8 +152,12 @@ static struct tc_clkevt_device clkevt =
+ 		.features	= CLOCK_EVT_FEAT_PERIODIC
+ 					| CLOCK_EVT_FEAT_ONESHOT,
+ 		.shift		= 32,
++#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ 		/* Should be lower than at91rm9200's system timer */
+ 		.rating		= 125,
++#else
++		.rating		= 200,
++#endif
+ 		.set_next_event	= tc_next_event,
+ 		.set_mode	= tc_mode,
+ 	},
+@@ -185,8 +183,9 @@ static struct irqaction tc_irqaction = {
+ 	.handler	= ch2_irq,
+ };
+ 
+-static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
++static void __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
+ {
++	unsigned divisor = atmel_tc_divisors[divisor_idx];
+ 	struct clk *t2_clk = tc->clk[2];
+ 	int irq = tc->irq[2];
+ 
+@@ -194,11 +193,17 @@ static void __init setup_clkevents(struc
+ 	clkevt.clk = t2_clk;
+ 	tc_irqaction.dev_id = &clkevt;
+ 
+-	timer_clock = clk32k_divisor_idx;
++	timer_clock = divisor_idx;
+ 
+-	clkevt.clkevt.mult = div_sc(32768, NSEC_PER_SEC, clkevt.clkevt.shift);
+-	clkevt.clkevt.max_delta_ns
+-		= clockevent_delta2ns(0xffff, &clkevt.clkevt);
++	if (!divisor)
++		clkevt.freq = 32768;
++	else
++		clkevt.freq = clk_get_rate(t2_clk)/divisor;
++
++	clkevt.clkevt.mult = div_sc(clkevt.freq, NSEC_PER_SEC,
++				    clkevt.clkevt.shift);
++	clkevt.clkevt.max_delta_ns =
++		clockevent_delta2ns(0xffff, &clkevt.clkevt);
+ 	clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+ 	clkevt.clkevt.cpumask = cpumask_of(0);
+ 
+@@ -327,8 +332,11 @@ static int __init tcb_clksrc_init(void)
+ 	clocksource_register_hz(&clksrc, divided_rate);
+ 
+ 	/* channel 2:  periodic and oneshot timer support */
++#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ 	setup_clkevents(tc, clk32k_divisor_idx);
+-
++#else
++	setup_clkevents(tc, best_divisor_idx);
++#endif
+ 	return 0;
+ }
+ arch_initcall(tcb_clksrc_init);
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -78,8 +78,7 @@ config ATMEL_TCB_CLKSRC
+ 	  are combined to make a single 32-bit timer.
+ 
+ 	  When GENERIC_CLOCKEVENTS is defined, the third timer channel
+-	  may be used as a clock event device supporting oneshot mode
+-	  (delays of up to two seconds) based on the 32 KiHz clock.
++	  may be used as a clock event device supporting oneshot mode.
+ 
+ config ATMEL_TCB_CLKSRC_BLOCK
+ 	int
+@@ -93,6 +92,14 @@ config ATMEL_TCB_CLKSRC_BLOCK
+ 	  TC can be used for other purposes, such as PWM generation and
+ 	  interval timing.
+ 
++config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
++	bool "TC Block use 32 KiHz clock"
++	depends on ATMEL_TCB_CLKSRC
++	default y
++	help
++	  Select this to use 32 KiHz base clock rate as TC block clock
++	  source for clock events.
++
+ config IBM_ASM
+ 	tristate "Device driver for IBM RSA service processor"
+ 	depends on X86 && PCI && INPUT
diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch
new file mode 100644
index 0000000..f7d9d01
--- /dev/null
+++ b/patches/completion-use-simple-wait-queues.patch
@@ -0,0 +1,155 @@
+Subject: completion: Use simple wait queues
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 11 Jan 2013 11:23:51 +0100
+
+Completions have no long lasting callbacks and therefor do not need
+the complex waitqueue variant. Use simple waitqueues which reduces the
+contention on the waitqueue lock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/completion.h |    8 ++++----
+ include/linux/uprobes.h    |    1 +
+ kernel/sched/core.c        |   34 +++++++++++++++++-----------------
+ 3 files changed, 22 insertions(+), 21 deletions(-)
+
+--- a/include/linux/completion.h
++++ b/include/linux/completion.h
+@@ -8,7 +8,7 @@
+  * See kernel/sched.c for details.
+  */
+ 
+-#include <linux/wait.h>
++#include <linux/wait-simple.h>
+ 
+ /*
+  * struct completion - structure used to maintain state for a "completion"
+@@ -24,11 +24,11 @@
+  */
+ struct completion {
+ 	unsigned int done;
+-	wait_queue_head_t wait;
++	struct swait_head wait;
+ };
+ 
+ #define COMPLETION_INITIALIZER(work) \
+-	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
++	{ 0, SWAIT_HEAD_INITIALIZER((work).wait) }
+ 
+ #define COMPLETION_INITIALIZER_ONSTACK(work) \
+ 	({ init_completion(&work); work; })
+@@ -73,7 +73,7 @@ struct completion {
+ static inline void init_completion(struct completion *x)
+ {
+ 	x->done = 0;
+-	init_waitqueue_head(&x->wait);
++	init_swait_head(&x->wait);
+ }
+ 
+ extern void wait_for_completion(struct completion *);
+--- a/include/linux/uprobes.h
++++ b/include/linux/uprobes.h
+@@ -26,6 +26,7 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/rbtree.h>
++#include <linux/wait.h>
+ 
+ struct vm_area_struct;
+ struct mm_struct;
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3487,10 +3487,10 @@ void complete(struct completion *x)
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	raw_spin_lock_irqsave(&x->wait.lock, flags);
+ 	x->done++;
+-	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	__swait_wake_locked(&x->wait, TASK_NORMAL, 1);
++	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete);
+ 
+@@ -3507,10 +3507,10 @@ void complete_all(struct completion *x)
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	raw_spin_lock_irqsave(&x->wait.lock, flags);
+ 	x->done += UINT_MAX/2;
+-	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	__swait_wake_locked(&x->wait, TASK_NORMAL, 0);
++	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete_all);
+ 
+@@ -3518,20 +3518,20 @@ static inline long __sched
+ do_wait_for_common(struct completion *x, long timeout, int state)
+ {
+ 	if (!x->done) {
+-		DECLARE_WAITQUEUE(wait, current);
++		DEFINE_SWAITER(wait);
+ 
+-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
++		swait_prepare_locked(&x->wait, &wait);
+ 		do {
+ 			if (signal_pending_state(state, current)) {
+ 				timeout = -ERESTARTSYS;
+ 				break;
+ 			}
+ 			__set_current_state(state);
+-			spin_unlock_irq(&x->wait.lock);
++			raw_spin_unlock_irq(&x->wait.lock);
+ 			timeout = schedule_timeout(timeout);
+-			spin_lock_irq(&x->wait.lock);
++			raw_spin_lock_irq(&x->wait.lock);
+ 		} while (!x->done && timeout);
+-		__remove_wait_queue(&x->wait, &wait);
++		swait_finish_locked(&x->wait, &wait);
+ 		if (!x->done)
+ 			return timeout;
+ 	}
+@@ -3544,9 +3544,9 @@ wait_for_common(struct completion *x, lo
+ {
+ 	might_sleep();
+ 
+-	spin_lock_irq(&x->wait.lock);
++	raw_spin_lock_irq(&x->wait.lock);
+ 	timeout = do_wait_for_common(x, timeout, state);
+-	spin_unlock_irq(&x->wait.lock);
++	raw_spin_unlock_irq(&x->wait.lock);
+ 	return timeout;
+ }
+ 
+@@ -3677,12 +3677,12 @@ bool try_wait_for_completion(struct comp
+ 	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	raw_spin_lock_irqsave(&x->wait.lock, flags);
+ 	if (!x->done)
+ 		ret = 0;
+ 	else
+ 		x->done--;
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -3700,10 +3700,10 @@ bool completion_done(struct completion *
+ 	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	raw_spin_lock_irqsave(&x->wait.lock, flags);
+ 	if (!x->done)
+ 		ret = 0;
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
diff --git a/patches/cond-resched-lock-rt-tweak.patch b/patches/cond-resched-lock-rt-tweak.patch
new file mode 100644
index 0000000..2dff484
--- /dev/null
+++ b/patches/cond-resched-lock-rt-tweak.patch
@@ -0,0 +1,20 @@
+Subject: cond-resched-lock-rt-tweak.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 22:51:33 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2719,7 +2719,7 @@ extern int _cond_resched(void);
+ 
+ extern int __cond_resched_lock(spinlock_t *lock);
+ 
+-#ifdef CONFIG_PREEMPT_COUNT
++#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
+ #define PREEMPT_LOCK_OFFSET	PREEMPT_OFFSET
+ #else
+ #define PREEMPT_LOCK_OFFSET	0
diff --git a/patches/cond-resched-softirq-rt.patch b/patches/cond-resched-softirq-rt.patch
new file mode 100644
index 0000000..dd146ee
--- /dev/null
+++ b/patches/cond-resched-softirq-rt.patch
@@ -0,0 +1,47 @@
+Subject: cond-resched-softirq-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Jul 2011 09:56:44 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h |    4 ++++
+ kernel/sched/core.c   |    2 ++
+ 2 files changed, 6 insertions(+)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2730,12 +2730,16 @@ extern int __cond_resched_lock(spinlock_
+ 	__cond_resched_lock(lock);				\
+ })
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ extern int __cond_resched_softirq(void);
+ 
+ #define cond_resched_softirq() ({					\
+ 	__might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
+ 	__cond_resched_softirq();					\
+ })
++#else
++# define cond_resched_softirq()		cond_resched()
++#endif
+ 
+ /*
+  * Does a critical section need to be broken due to another
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4366,6 +4366,7 @@ int __cond_resched_lock(spinlock_t *lock
+ }
+ EXPORT_SYMBOL(__cond_resched_lock);
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ int __sched __cond_resched_softirq(void)
+ {
+ 	BUG_ON(!in_softirq());
+@@ -4379,6 +4380,7 @@ int __sched __cond_resched_softirq(void)
+ 	return 0;
+ }
+ EXPORT_SYMBOL(__cond_resched_softirq);
++#endif
+ 
+ /**
+  * yield - yield the current processor to other threads.
diff --git a/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch b/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
new file mode 100644
index 0000000..80ed692
--- /dev/null
+++ b/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
@@ -0,0 +1,125 @@
+Subject: cpu: Make hotplug.lock a "sleeping" spinlock on RT
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Fri, 02 Mar 2012 10:36:57 -0500
+
+Tasks can block on hotplug.lock in pin_current_cpu(), but their state
+might be != RUNNING. So the mutex wakeup will set the state
+unconditionally to RUNNING. That might cause spurious unexpected
+wakeups. We could provide a state preserving mutex_lock() function,
+but this is semantically backwards. So instead we convert the
+hotplug.lock() to a spinlock for RT, which has the state preserving
+semantics already.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Carsten Emde <C.Emde@osadl.org>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <clark.williams@gmail.com>
+Cc: stable-rt@vger.kernel.org
+Link: http://lkml.kernel.org/r/1330702617.25686.265.camel@gandalf.stny.rr.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c |   35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -51,7 +51,12 @@ static int cpu_hotplug_disabled;
+ 
+ static struct {
+ 	struct task_struct *active_writer;
++#ifdef CONFIG_PREEMPT_RT_FULL
++	/* Makes the lock keep the task's state */
++	spinlock_t lock;
++#else
+ 	struct mutex lock; /* Synchronizes accesses to refcount, */
++#endif
+ 	/*
+ 	 * Also blocks the new readers during
+ 	 * an ongoing cpu hotplug operation.
+@@ -59,10 +64,22 @@ static struct {
+ 	int refcount;
+ } cpu_hotplug = {
+ 	.active_writer = NULL,
++#ifdef CONFIG_PREEMPT_RT_FULL
++	.lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
++#else
+ 	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
++#endif
+ 	.refcount = 0,
+ };
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
++# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
++#else
++# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
++# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
++#endif
++
+ struct hotplug_pcp {
+ 	struct task_struct *unplug;
+ 	int refcount;
+@@ -92,8 +109,8 @@ retry:
+ 		return;
+ 	}
+ 	preempt_enable();
+-	mutex_lock(&cpu_hotplug.lock);
+-	mutex_unlock(&cpu_hotplug.lock);
++	hotplug_lock();
++	hotplug_unlock();
+ 	preempt_disable();
+ 	goto retry;
+ }
+@@ -166,9 +183,9 @@ void get_online_cpus(void)
+ 	might_sleep();
+ 	if (cpu_hotplug.active_writer == current)
+ 		return;
+-	mutex_lock(&cpu_hotplug.lock);
++	hotplug_lock();
+ 	cpu_hotplug.refcount++;
+-	mutex_unlock(&cpu_hotplug.lock);
++	hotplug_unlock();
+ 
+ }
+ EXPORT_SYMBOL_GPL(get_online_cpus);
+@@ -177,14 +194,14 @@ void put_online_cpus(void)
+ {
+ 	if (cpu_hotplug.active_writer == current)
+ 		return;
+-	mutex_lock(&cpu_hotplug.lock);
+ 
++	hotplug_lock();
+ 	if (WARN_ON(!cpu_hotplug.refcount))
+ 		cpu_hotplug.refcount++; /* try to fix things up */
+ 
+ 	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
+ 		wake_up_process(cpu_hotplug.active_writer);
+-	mutex_unlock(&cpu_hotplug.lock);
++	hotplug_unlock();
+ 
+ }
+ EXPORT_SYMBOL_GPL(put_online_cpus);
+@@ -216,11 +233,11 @@ static void cpu_hotplug_begin(void)
+ 	cpu_hotplug.active_writer = current;
+ 
+ 	for (;;) {
+-		mutex_lock(&cpu_hotplug.lock);
++		hotplug_lock();
+ 		if (likely(!cpu_hotplug.refcount))
+ 			break;
+ 		__set_current_state(TASK_UNINTERRUPTIBLE);
+-		mutex_unlock(&cpu_hotplug.lock);
++		hotplug_unlock();
+ 		schedule();
+ 	}
+ }
+@@ -228,7 +245,7 @@ static void cpu_hotplug_begin(void)
+ static void cpu_hotplug_done(void)
+ {
+ 	cpu_hotplug.active_writer = NULL;
+-	mutex_unlock(&cpu_hotplug.lock);
++	hotplug_unlock();
+ }
+ 
+ #else /* #if CONFIG_HOTPLUG_CPU */
diff --git a/patches/cpu-rt-rework-cpu-down.patch b/patches/cpu-rt-rework-cpu-down.patch
new file mode 100644
index 0000000..7a6496a
--- /dev/null
+++ b/patches/cpu-rt-rework-cpu-down.patch
@@ -0,0 +1,548 @@
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Mon, 16 Jul 2012 08:07:43 +0000
+Subject: cpu/rt: Rework cpu down for PREEMPT_RT
+
+Bringing a CPU down is a pain with the PREEMPT_RT kernel because
+tasks can be preempted in many more places than in non-RT. In
+order to handle per_cpu variables, tasks may be pinned to a CPU
+for a while, and even sleep. But these tasks need to be off the CPU
+if that CPU is going down.
+
+Several synchronization methods have been tried, but when stressed
+they failed. This is a new approach.
+
+A sync_tsk thread is still created and tasks may still block on a
+lock when the CPU is going down, but how that works is a bit different.
+When cpu_down() starts, it will create the sync_tsk and wait on it
+to inform that current tasks that are pinned on the CPU are no longer
+pinned. But new tasks that are about to be pinned will still be allowed
+to do so at this time.
+
+Then the notifiers are called. Several notifiers will bring down tasks
+that will enter these locations. Some of these tasks will take locks
+of other tasks that are on the CPU. If we don't let those other tasks
+continue, but make them block until CPU down is done, the tasks that
+the notifiers are waiting on will never complete as they are waiting
+for the locks held by the tasks that are blocked.
+
+Thus we still let the task pin the CPU until the notifiers are done.
+After the notifiers run, we then make new tasks entering the pinned
+CPU sections grab a mutex and wait. This mutex is now a per CPU mutex
+in the hotplug_pcp descriptor.
+
+To help things along, a new function in the scheduler code is created
+called migrate_me(). This function will try to migrate the current task
+off the CPU this is going down if possible. When the sync_tsk is created,
+all tasks will then try to migrate off the CPU going down. There are
+several cases that this wont work, but it helps in most cases.
+
+After the notifiers are called and if a task can't migrate off but enters
+the pin CPU sections, it will be forced to wait on the hotplug_pcp mutex
+until the CPU down is complete. Then the scheduler will force the migration
+anyway.
+
+Also, I found that THREAD_BOUND need to also be accounted for in the
+pinned CPU, and the migrate_disable no longer treats them special.
+This helps fix issues with ksoftirqd and workqueue that unbind on CPU down.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/sched.h |    7 +
+ kernel/cpu.c          |  241 +++++++++++++++++++++++++++++++++++++++++---------
+ kernel/sched/core.c   |   82 ++++++++++++++++-
+ 3 files changed, 285 insertions(+), 45 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1977,6 +1977,10 @@ extern void do_set_cpus_allowed(struct t
+ 
+ extern int set_cpus_allowed_ptr(struct task_struct *p,
+ 				const struct cpumask *new_mask);
++int migrate_me(void);
++void tell_sched_cpu_down_begin(int cpu);
++void tell_sched_cpu_down_done(int cpu);
++
+ #else
+ static inline void do_set_cpus_allowed(struct task_struct *p,
+ 				      const struct cpumask *new_mask)
+@@ -1989,6 +1993,9 @@ static inline int set_cpus_allowed_ptr(s
+ 		return -EINVAL;
+ 	return 0;
+ }
++static inline int migrate_me(void) { return 0; }
++static inline void tell_sched_cpu_down_begin(int cpu) { }
++static inline void tell_sched_cpu_down_done(int cpu) { }
+ #endif
+ 
+ #ifdef CONFIG_NO_HZ
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -51,12 +51,7 @@ static int cpu_hotplug_disabled;
+ 
+ static struct {
+ 	struct task_struct *active_writer;
+-#ifdef CONFIG_PREEMPT_RT_FULL
+-	/* Makes the lock keep the task's state */
+-	spinlock_t lock;
+-#else
+ 	struct mutex lock; /* Synchronizes accesses to refcount, */
+-#endif
+ 	/*
+ 	 * Also blocks the new readers during
+ 	 * an ongoing cpu hotplug operation.
+@@ -64,28 +59,46 @@ static struct {
+ 	int refcount;
+ } cpu_hotplug = {
+ 	.active_writer = NULL,
+-#ifdef CONFIG_PREEMPT_RT_FULL
+-	.lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
+-#else
+ 	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+-#endif
+ 	.refcount = 0,
+ };
+ 
+-#ifdef CONFIG_PREEMPT_RT_FULL
+-# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
+-# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
+-#else
+-# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
+-# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
+-#endif
+-
++/**
++ * hotplug_pcp - per cpu hotplug descriptor
++ * @unplug:	set when pin_current_cpu() needs to sync tasks
++ * @sync_tsk:	the task that waits for tasks to finish pinned sections
++ * @refcount:	counter of tasks in pinned sections
++ * @grab_lock:	set when the tasks entering pinned sections should wait
++ * @synced:	notifier for @sync_tsk to tell cpu_down it's finished
++ * @mutex:	the mutex to make tasks wait (used when @grab_lock is true)
++ * @mutex_init:	zero if the mutex hasn't been initialized yet.
++ *
++ * Although @unplug and @sync_tsk may point to the same task, the @unplug
++ * is used as a flag and still exists after @sync_tsk has exited and
++ * @sync_tsk set to NULL.
++ */
+ struct hotplug_pcp {
+ 	struct task_struct *unplug;
++	struct task_struct *sync_tsk;
+ 	int refcount;
++	int grab_lock;
+ 	struct completion synced;
++#ifdef CONFIG_PREEMPT_RT_FULL
++	spinlock_t lock;
++#else
++	struct mutex mutex;
++#endif
++	int mutex_init;
+ };
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
++# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
++#else
++# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
++# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
++#endif
++
+ static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
+ 
+ /**
+@@ -99,18 +112,40 @@ static DEFINE_PER_CPU(struct hotplug_pcp
+ void pin_current_cpu(void)
+ {
+ 	struct hotplug_pcp *hp;
++	int force = 0;
+ 
+ retry:
+ 	hp = &__get_cpu_var(hotplug_pcp);
+ 
+-	if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
++	if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
+ 	    hp->unplug == current || (current->flags & PF_STOMPER)) {
+ 		hp->refcount++;
+ 		return;
+ 	}
+-	preempt_enable();
+-	hotplug_lock();
+-	hotplug_unlock();
++
++	if (hp->grab_lock) {
++		preempt_enable();
++		hotplug_lock(hp);
++		hotplug_unlock(hp);
++	} else {
++		preempt_enable();
++		/*
++		 * Try to push this task off of this CPU.
++		 */
++		if (!migrate_me()) {
++			preempt_disable();
++			hp = &__get_cpu_var(hotplug_pcp);
++			if (!hp->grab_lock) {
++				/*
++				 * Just let it continue it's already pinned
++				 * or about to sleep.
++				 */
++				force = 1;
++				goto retry;
++			}
++			preempt_enable();
++		}
++	}
+ 	preempt_disable();
+ 	goto retry;
+ }
+@@ -132,26 +167,84 @@ void unpin_current_cpu(void)
+ 		wake_up_process(hp->unplug);
+ }
+ 
+-/*
+- * FIXME: Is this really correct under all circumstances ?
+- */
++static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
++{
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	while (hp->refcount) {
++		schedule_preempt_disabled();
++		set_current_state(TASK_UNINTERRUPTIBLE);
++	}
++}
++
+ static int sync_unplug_thread(void *data)
+ {
+ 	struct hotplug_pcp *hp = data;
+ 
+ 	preempt_disable();
+ 	hp->unplug = current;
++	wait_for_pinned_cpus(hp);
++
++	/*
++	 * This thread will synchronize the cpu_down() with threads
++	 * that have pinned the CPU. When the pinned CPU count reaches
++	 * zero, we inform the cpu_down code to continue to the next step.
++	 */
+ 	set_current_state(TASK_UNINTERRUPTIBLE);
+-	while (hp->refcount) {
+-		schedule_preempt_disabled();
++	preempt_enable();
++	complete(&hp->synced);
++
++	/*
++	 * If all succeeds, the next step will need tasks to wait till
++	 * the CPU is offline before continuing. To do this, the grab_lock
++	 * is set and tasks going into pin_current_cpu() will block on the
++	 * mutex. But we still need to wait for those that are already in
++	 * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
++	 * will kick this thread out.
++	 */
++	while (!hp->grab_lock && !kthread_should_stop()) {
++		schedule();
++		set_current_state(TASK_UNINTERRUPTIBLE);
++	}
++
++	/* Make sure grab_lock is seen before we see a stale completion */
++	smp_mb();
++
++	/*
++	 * Now just before cpu_down() enters stop machine, we need to make
++	 * sure all tasks that are in pinned CPU sections are out, and new
++	 * tasks will now grab the lock, keeping them from entering pinned
++	 * CPU sections.
++	 */
++	if (!kthread_should_stop()) {
++		preempt_disable();
++		wait_for_pinned_cpus(hp);
++		preempt_enable();
++		complete(&hp->synced);
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	while (!kthread_should_stop()) {
++		schedule();
+ 		set_current_state(TASK_UNINTERRUPTIBLE);
+ 	}
+ 	set_current_state(TASK_RUNNING);
+-	preempt_enable();
+-	complete(&hp->synced);
++
++	/*
++	 * Force this thread off this CPU as it's going down and
++	 * we don't want any more work on this CPU.
++	 */
++	current->flags &= ~PF_THREAD_BOUND;
++	do_set_cpus_allowed(current, cpu_present_mask);
++	migrate_me();
+ 	return 0;
+ }
+ 
++static void __cpu_unplug_sync(struct hotplug_pcp *hp)
++{
++	wake_up_process(hp->sync_tsk);
++	wait_for_completion(&hp->synced);
++}
++
+ /*
+  * Start the sync_unplug_thread on the target cpu and wait for it to
+  * complete.
+@@ -159,23 +252,83 @@ static int sync_unplug_thread(void *data
+ static int cpu_unplug_begin(unsigned int cpu)
+ {
+ 	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+-	struct task_struct *tsk;
++	int err;
++
++	/* Protected by cpu_hotplug.lock */
++	if (!hp->mutex_init) {
++#ifdef CONFIG_PREEMPT_RT_FULL
++		spin_lock_init(&hp->lock);
++#else
++		mutex_init(&hp->mutex);
++#endif
++		hp->mutex_init = 1;
++	}
++
++	/* Inform the scheduler to migrate tasks off this CPU */
++	tell_sched_cpu_down_begin(cpu);
+ 
+ 	init_completion(&hp->synced);
+-	tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
+-	if (IS_ERR(tsk))
+-		return (PTR_ERR(tsk));
+-	kthread_bind(tsk, cpu);
+-	wake_up_process(tsk);
+-	wait_for_completion(&hp->synced);
++
++	hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
++	if (IS_ERR(hp->sync_tsk)) {
++		err = PTR_ERR(hp->sync_tsk);
++		hp->sync_tsk = NULL;
++		return err;
++	}
++	kthread_bind(hp->sync_tsk, cpu);
++
++	/*
++	 * Wait for tasks to get out of the pinned sections,
++	 * it's still OK if new tasks enter. Some CPU notifiers will
++	 * wait for tasks that are going to enter these sections and
++	 * we must not have them block.
++	 */
++	__cpu_unplug_sync(hp);
++
+ 	return 0;
+ }
+ 
++static void cpu_unplug_sync(unsigned int cpu)
++{
++	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++
++	init_completion(&hp->synced);
++	/* The completion needs to be initialzied before setting grab_lock */
++	smp_wmb();
++
++	/* Grab the mutex before setting grab_lock */
++	hotplug_lock(hp);
++	hp->grab_lock = 1;
++
++	/*
++	 * The CPU notifiers have been completed.
++	 * Wait for tasks to get out of pinned CPU sections and have new
++	 * tasks block until the CPU is completely down.
++	 */
++	__cpu_unplug_sync(hp);
++
++	/* All done with the sync thread */
++	kthread_stop(hp->sync_tsk);
++	hp->sync_tsk = NULL;
++}
++
+ static void cpu_unplug_done(unsigned int cpu)
+ {
+ 	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+ 
+ 	hp->unplug = NULL;
++	/* Let all tasks know cpu unplug is finished before cleaning up */
++	smp_wmb();
++
++	if (hp->sync_tsk)
++		kthread_stop(hp->sync_tsk);
++
++	if (hp->grab_lock) {
++		hotplug_unlock(hp);
++		/* protected by cpu_hotplug.lock */
++		hp->grab_lock = 0;
++	}
++	tell_sched_cpu_down_done(cpu);
+ }
+ 
+ void get_online_cpus(void)
+@@ -183,9 +336,9 @@ void get_online_cpus(void)
+ 	might_sleep();
+ 	if (cpu_hotplug.active_writer == current)
+ 		return;
+-	hotplug_lock();
++	mutex_lock(&cpu_hotplug.lock);
+ 	cpu_hotplug.refcount++;
+-	hotplug_unlock();
++	mutex_unlock(&cpu_hotplug.lock);
+ 
+ }
+ EXPORT_SYMBOL_GPL(get_online_cpus);
+@@ -195,14 +348,13 @@ void put_online_cpus(void)
+ 	if (cpu_hotplug.active_writer == current)
+ 		return;
+ 
+-	hotplug_lock();
++	mutex_lock(&cpu_hotplug.lock);
+ 	if (WARN_ON(!cpu_hotplug.refcount))
+ 		cpu_hotplug.refcount++; /* try to fix things up */
+ 
+ 	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
+ 		wake_up_process(cpu_hotplug.active_writer);
+-	hotplug_unlock();
+-
++	mutex_unlock(&cpu_hotplug.lock);
+ }
+ EXPORT_SYMBOL_GPL(put_online_cpus);
+ 
+@@ -233,11 +385,11 @@ static void cpu_hotplug_begin(void)
+ 	cpu_hotplug.active_writer = current;
+ 
+ 	for (;;) {
+-		hotplug_lock();
++		mutex_lock(&cpu_hotplug.lock);
+ 		if (likely(!cpu_hotplug.refcount))
+ 			break;
+ 		__set_current_state(TASK_UNINTERRUPTIBLE);
+-		hotplug_unlock();
++		mutex_unlock(&cpu_hotplug.lock);
+ 		schedule();
+ 	}
+ }
+@@ -245,7 +397,7 @@ static void cpu_hotplug_begin(void)
+ static void cpu_hotplug_done(void)
+ {
+ 	cpu_hotplug.active_writer = NULL;
+-	hotplug_unlock();
++	mutex_unlock(&cpu_hotplug.lock);
+ }
+ 
+ #else /* #if CONFIG_HOTPLUG_CPU */
+@@ -421,6 +573,9 @@ static int __ref _cpu_down(unsigned int
+ 	}
+ 	smpboot_park_threads(cpu);
+ 
++	/* Notifiers are done. Don't let any more tasks pin this CPU. */
++	cpu_unplug_sync(cpu);
++
+ 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ 	if (err) {
+ 		/* CPU didn't die: tell everyone.  Can't complain. */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2896,7 +2896,7 @@ void migrate_disable(void)
+ {
+ 	struct task_struct *p = current;
+ 
+-	if (in_atomic() || p->flags & PF_THREAD_BOUND) {
++	if (in_atomic()) {
+ #ifdef CONFIG_SCHED_DEBUG
+ 		p->migrate_disable_atomic++;
+ #endif
+@@ -2927,7 +2927,7 @@ void migrate_enable(void)
+ 	unsigned long flags;
+ 	struct rq *rq;
+ 
+-	if (in_atomic() || p->flags & PF_THREAD_BOUND) {
++	if (in_atomic()) {
+ #ifdef CONFIG_SCHED_DEBUG
+ 		p->migrate_disable_atomic--;
+ #endif
+@@ -4872,6 +4872,84 @@ void do_set_cpus_allowed(struct task_str
+ 	cpumask_copy(&p->cpus_allowed, new_mask);
+ }
+ 
++static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
++static DEFINE_MUTEX(sched_down_mutex);
++static cpumask_t sched_down_cpumask;
++
++void tell_sched_cpu_down_begin(int cpu)
++{
++	mutex_lock(&sched_down_mutex);
++	cpumask_set_cpu(cpu, &sched_down_cpumask);
++	mutex_unlock(&sched_down_mutex);
++}
++
++void tell_sched_cpu_down_done(int cpu)
++{
++	mutex_lock(&sched_down_mutex);
++	cpumask_clear_cpu(cpu, &sched_down_cpumask);
++	mutex_unlock(&sched_down_mutex);
++}
++
++/**
++ * migrate_me - try to move the current task off this cpu
++ *
++ * Used by the pin_current_cpu() code to try to get tasks
++ * to move off the current CPU as it is going down.
++ * It will only move the task if the task isn't pinned to
++ * the CPU (with migrate_disable, affinity or THREAD_BOUND)
++ * and the task has to be in a RUNNING state. Otherwise the
++ * movement of the task will wake it up (change its state
++ * to running) when the task did not expect it.
++ *
++ * Returns 1 if it succeeded in moving the current task
++ *         0 otherwise.
++ */
++int migrate_me(void)
++{
++	struct task_struct *p = current;
++	struct migration_arg arg;
++	struct cpumask *cpumask;
++	struct cpumask *mask;
++	unsigned long flags;
++	unsigned int dest_cpu;
++	struct rq *rq;
++
++	/*
++	 * We can not migrate tasks bounded to a CPU or tasks not
++	 * running. The movement of the task will wake it up.
++	 */
++	if (p->flags & PF_THREAD_BOUND || p->state)
++		return 0;
++
++	mutex_lock(&sched_down_mutex);
++	rq = task_rq_lock(p, &flags);
++
++	cpumask = &__get_cpu_var(sched_cpumasks);
++	mask = &p->cpus_allowed;
++
++	cpumask_andnot(cpumask, mask, &sched_down_cpumask);
++
++	if (!cpumask_weight(cpumask)) {
++		/* It's only on this CPU? */
++		task_rq_unlock(rq, p, &flags);
++		mutex_unlock(&sched_down_mutex);
++		return 0;
++	}
++
++	dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
++
++	arg.task = p;
++	arg.dest_cpu = dest_cpu;
++
++	task_rq_unlock(rq, p, &flags);
++
++	stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
++	tlb_migrate_finish(p->mm);
++	mutex_unlock(&sched_down_mutex);
++
++	return 1;
++}
++
+ /*
+  * This is how migration works:
+  *
diff --git a/patches/cpu-rt-variants.patch b/patches/cpu-rt-variants.patch
new file mode 100644
index 0000000..a344eda
--- /dev/null
+++ b/patches/cpu-rt-variants.patch
@@ -0,0 +1,26 @@
+Subject: cpu-rt-variants.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 Jun 2011 15:42:38 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/smp.h |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -218,6 +218,14 @@ static inline void kick_all_cpus_sync(vo
+ #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
+ #define put_cpu()		preempt_enable()
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define get_cpu_light()	get_cpu()
++# define put_cpu_light()	put_cpu()
++#else
++# define get_cpu_light()	({ migrate_disable(); smp_processor_id(); })
++# define put_cpu_light()	migrate_enable()
++#endif
++
+ /*
+  * Callback to arch code if there's nosmp or maxcpus=0 on the
+  * boot command line:
diff --git a/patches/cpumask-disable-offstack-on-rt.patch b/patches/cpumask-disable-offstack-on-rt.patch
new file mode 100644
index 0000000..c8728a6
--- /dev/null
+++ b/patches/cpumask-disable-offstack-on-rt.patch
@@ -0,0 +1,34 @@
+Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 14 Dec 2011 01:03:49 +0100
+
+We can't deal with the cpumask allocations which happen in atomic
+context (see arch/x86/kernel/apic/io_apic.c) on RT right now.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/Kconfig |    2 +-
+ lib/Kconfig      |    1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -775,7 +775,7 @@ config IOMMU_HELPER
+ config MAXSMP
+ 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
+ 	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+-	select CPUMASK_OFFSTACK
++	select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
+ 	---help---
+ 	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
+ 	  If unsure, say N.
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -315,6 +315,7 @@ config CHECK_SIGNATURE
+ 
+ config CPUMASK_OFFSTACK
+ 	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
++	depends on !PREEMPT_RT_FULL
+ 	help
+ 	  Use dynamic allocation for cpumask_var_t, instead of putting
+ 	  them on the stack.  This is a bit more expensive, but avoids
diff --git a/patches/debugobjects-rt.patch b/patches/debugobjects-rt.patch
new file mode 100644
index 0000000..3e58f6f
--- /dev/null
+++ b/patches/debugobjects-rt.patch
@@ -0,0 +1,23 @@
+Subject: debugobjects-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:41:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/debugobjects.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct d
+ 	struct debug_obj *obj;
+ 	unsigned long flags;
+ 
+-	fill_pool();
++#ifdef CONFIG_PREEMPT_RT_FULL
++	if (preempt_count() == 0 && !irqs_disabled())
++#endif
++		fill_pool();
+ 
+ 	db = get_bucket((unsigned long) addr);
+ 
diff --git a/patches/dm-make-rt-aware.patch b/patches/dm-make-rt-aware.patch
new file mode 100644
index 0000000..e71d485
--- /dev/null
+++ b/patches/dm-make-rt-aware.patch
@@ -0,0 +1,34 @@
+Subject: dm: Make rt aware
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 14 Nov 2011 23:06:09 +0100
+
+Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has
+interrupts legitimately enabled here as we cant deadlock against the
+irq thread due to the "sleeping spinlocks" conversion.
+
+Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/md/dm.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1677,14 +1677,14 @@ static void dm_request_fn(struct request
+ 		if (map_request(ti, clone, md))
+ 			goto requeued;
+ 
+-		BUG_ON(!irqs_disabled());
++		BUG_ON_NONRT(!irqs_disabled());
+ 		spin_lock(q->queue_lock);
+ 	}
+ 
+ 	goto out;
+ 
+ requeued:
+-	BUG_ON(!irqs_disabled());
++	BUG_ON_NONRT(!irqs_disabled());
+ 	spin_lock(q->queue_lock);
+ 
+ delay_and_out:
diff --git a/patches/drivers-net-8139-disable-irq-nosync.patch b/patches/drivers-net-8139-disable-irq-nosync.patch
new file mode 100644
index 0000000..32dc2b1
--- /dev/null
+++ b/patches/drivers-net-8139-disable-irq-nosync.patch
@@ -0,0 +1,25 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:24 -0500
+Subject: drivers/net: Use disable_irq_nosync() in 8139too
+
+Use disable_irq_nosync() instead of disable_irq() as this might be
+called in atomic context with netpoll.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/realtek/8139too.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/realtek/8139too.c
++++ b/drivers/net/ethernet/realtek/8139too.c
+@@ -2216,7 +2216,7 @@ static void rtl8139_poll_controller(stru
+ 	struct rtl8139_private *tp = netdev_priv(dev);
+ 	const int irq = tp->pci_dev->irq;
+ 
+-	disable_irq(irq);
++	disable_irq_nosync(irq);
+ 	rtl8139_interrupt(irq, dev);
+ 	enable_irq(irq);
+ }
diff --git a/patches/drivers-net-fix-livelock-issues.patch b/patches/drivers-net-fix-livelock-issues.patch
new file mode 100644
index 0000000..0d9fc5c
--- /dev/null
+++ b/patches/drivers-net-fix-livelock-issues.patch
@@ -0,0 +1,126 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 20 Jun 2009 11:36:54 +0200
+Subject: drivers/net: fix livelock issues
+
+Preempt-RT runs into a live lock issue with the NETDEV_TX_LOCKED micro
+optimization. The reason is that the softirq thread is rescheduling
+itself on that return value. Depending on priorities it starts to
+monoplize the CPU and livelock on UP systems.
+
+Remove it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/atheros/atl1c/atl1c_main.c      |    6 +-----
+ drivers/net/ethernet/atheros/atl1e/atl1e_main.c      |    3 +--
+ drivers/net/ethernet/chelsio/cxgb/sge.c              |    3 +--
+ drivers/net/ethernet/neterion/s2io.c                 |    7 +------
+ drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c |    6 ++----
+ drivers/net/ethernet/tehuti/tehuti.c                 |    9 ++-------
+ drivers/net/rionet.c                                 |    6 +-----
+ 7 files changed, 9 insertions(+), 31 deletions(-)
+
+--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
++++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+@@ -2171,11 +2171,7 @@ static netdev_tx_t atl1c_xmit_frame(stru
+ 	}
+ 
+ 	tpd_req = atl1c_cal_tpd_req(skb);
+-	if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
+-		if (netif_msg_pktdata(adapter))
+-			dev_info(&adapter->pdev->dev, "tx locked\n");
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&adapter->tx_lock, flags);
+ 
+ 	if (atl1c_tpd_avail(adapter, type) < tpd_req) {
+ 		/* no enough descriptor, just stop queue */
+--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
++++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+@@ -1803,8 +1803,7 @@ static netdev_tx_t atl1e_xmit_frame(stru
+ 		return NETDEV_TX_OK;
+ 	}
+ 	tpd_req = atl1e_cal_tdp_req(skb);
+-	if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
+-		return NETDEV_TX_LOCKED;
++	spin_lock_irqsave(&adapter->tx_lock, flags);
+ 
+ 	if (atl1e_tpd_avail(adapter) < tpd_req) {
+ 		/* no enough descriptor, just stop queue */
+--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
+@@ -1666,8 +1666,7 @@ static int t1_sge_tx(struct sk_buff *skb
+ 	struct cmdQ *q = &sge->cmdQ[qid];
+ 	unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
+ 
+-	if (!spin_trylock(&q->lock))
+-		return NETDEV_TX_LOCKED;
++	spin_lock(&q->lock);
+ 
+ 	reclaim_completed_tx(sge, q);
+ 
+--- a/drivers/net/ethernet/neterion/s2io.c
++++ b/drivers/net/ethernet/neterion/s2io.c
+@@ -4088,12 +4088,7 @@ static netdev_tx_t s2io_xmit(struct sk_b
+ 			[skb->priority & (MAX_TX_FIFOS - 1)];
+ 	fifo = &mac_control->fifos[queue];
+ 
+-	if (do_spin_lock)
+-		spin_lock_irqsave(&fifo->tx_lock, flags);
+-	else {
+-		if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
+-			return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&fifo->tx_lock, flags);
+ 
+ 	if (sp->config.multiq) {
+ 		if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
+--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
++++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+@@ -2114,10 +2114,8 @@ static int pch_gbe_xmit_frame(struct sk_
+ 	struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
+ 	unsigned long flags;
+ 
+-	if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
+-		/* Collision - tell upper layer to requeue */
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&tx_ring->tx_lock, flags);
++
+ 	if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
+ 		netif_stop_queue(netdev);
+ 		spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
+--- a/drivers/net/ethernet/tehuti/tehuti.c
++++ b/drivers/net/ethernet/tehuti/tehuti.c
+@@ -1630,13 +1630,8 @@ static netdev_tx_t bdx_tx_transmit(struc
+ 	unsigned long flags;
+ 
+ 	ENTER;
+-	local_irq_save(flags);
+-	if (!spin_trylock(&priv->tx_lock)) {
+-		local_irq_restore(flags);
+-		DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
+-		    BDX_DRV_NAME, ndev->name);
+-		return NETDEV_TX_LOCKED;
+-	}
++
++	spin_lock_irqsave(&priv->tx_lock, flags);
+ 
+ 	/* build tx descriptor */
+ 	BDX_ASSERT(f->m.wptr >= f->m.memsz);	/* started with valid wptr */
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_b
+ 	unsigned long flags;
+ 	int add_num = 1;
+ 
+-	local_irq_save(flags);
+-	if (!spin_trylock(&rnet->tx_lock)) {
+-		local_irq_restore(flags);
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&rnet->tx_lock, flags);
+ 
+ 	if (is_multicast_ether_addr(eth->h_dest))
+ 		add_num = nets[rnet->mport->id].nact;
diff --git a/patches/drivers-net-gianfar-make-rt-aware.patch b/patches/drivers-net-gianfar-make-rt-aware.patch
new file mode 100644
index 0000000..5303089
--- /dev/null
+++ b/patches/drivers-net-gianfar-make-rt-aware.patch
@@ -0,0 +1,55 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 1 Apr 2010 20:20:57 +0200
+Subject: drivers: net: gianfar: Make RT aware
+
+The adjust_link() disables interrupts before taking the queue
+locks. On RT those locks are converted to "sleeping" locks and
+therefor the local_irq_save/restore must be converted to
+local_irq_save/restore_nort.
+
+Reported-by: Xianghua Xiao <xiaoxianghua@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Xianghua Xiao <xiaoxianghua@gmail.com>
+
+---
+ drivers/net/ethernet/freescale/gianfar.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -1663,7 +1663,7 @@ void stop_gfar(struct net_device *dev)
+ 
+ 
+ 	/* Lock it down */
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	lock_tx_qs(priv);
+ 	lock_rx_qs(priv);
+ 
+@@ -1671,7 +1671,7 @@ void stop_gfar(struct net_device *dev)
+ 
+ 	unlock_rx_qs(priv);
+ 	unlock_tx_qs(priv);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	/* Free the IRQs */
+ 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+@@ -2951,7 +2951,7 @@ static void adjust_link(struct net_devic
+ 	struct phy_device *phydev = priv->phydev;
+ 	int new_state = 0;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	lock_tx_qs(priv);
+ 
+ 	if (phydev->link) {
+@@ -3020,7 +3020,7 @@ static void adjust_link(struct net_devic
+ 	if (new_state && netif_msg_link(priv))
+ 		phy_print_status(phydev);
+ 	unlock_tx_qs(priv);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ 
+ /* Update the hash table based on the current list of multicast
diff --git a/patches/drivers-net-tulip-add-missing-pci-disable.patch b/patches/drivers-net-tulip-add-missing-pci-disable.patch
new file mode 100644
index 0000000..78beb2b
--- /dev/null
+++ b/patches/drivers-net-tulip-add-missing-pci-disable.patch
@@ -0,0 +1,23 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:18 -0500
+Subject: drivers/net: tulip_remove_one needs to call pci_disable_device()
+
+Otherwise the device is not completely shut down.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/dec/tulip/tulip_core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
++++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
+@@ -1943,6 +1943,7 @@ static void tulip_remove_one(struct pci_
+ 	pci_iounmap(pdev, tp->base_addr);
+ 	free_netdev (dev);
+ 	pci_release_regions (pdev);
++	pci_disable_device (pdev);
+ 	pci_set_drvdata (pdev, NULL);
+ 
+ 	/* pci_power_off (pdev, -1); */
diff --git a/patches/drivers-net-vortex-fix-locking-issues.patch b/patches/drivers-net-vortex-fix-locking-issues.patch
new file mode 100644
index 0000000..4dd3898
--- /dev/null
+++ b/patches/drivers-net-vortex-fix-locking-issues.patch
@@ -0,0 +1,48 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Fri, 3 Jul 2009 08:30:00 -0500
+Subject: drivers/net: vortex fix locking issues
+
+Argh, cut and paste wasn't enough...
+
+Use this patch instead.  It needs an irq disable.  But, believe it or not,
+on SMP this is actually better.  If the irq is shared (as it is in Mark's
+case), we don't stop the irq of other devices from being handled on
+another CPU (unfortunately for Mark, he pinned all interrupts to one CPU).
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+ drivers/net/ethernet/3com/3c59x.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+--- a/drivers/net/ethernet/3com/3c59x.c
++++ b/drivers/net/ethernet/3com/3c59x.c
+@@ -843,9 +843,9 @@ static void poll_vortex(struct net_devic
+ {
+ 	struct vortex_private *vp = netdev_priv(dev);
+ 	unsigned long flags;
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	(vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ #endif
+ 
+@@ -1919,12 +1919,12 @@ static void vortex_tx_timeout(struct net
+ 			 * Block interrupts because vortex_interrupt does a bare spin_lock()
+ 			 */
+ 			unsigned long flags;
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			if (vp->full_bus_master_tx)
+ 				boomerang_interrupt(dev->irq, dev);
+ 			else
+ 				vortex_interrupt(dev->irq, dev);
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 		}
+ 	}
+ 
diff --git a/patches/drivers-random-reduce-preempt-disabled-region.patch b/patches/drivers-random-reduce-preempt-disabled-region.patch
new file mode 100644
index 0000000..7b85cec
--- /dev/null
+++ b/patches/drivers-random-reduce-preempt-disabled-region.patch
@@ -0,0 +1,39 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:30 -0500
+Subject: drivers: random: Reduce preempt disabled region
+
+No need to keep preemption disabled across the whole function.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/char/random.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -676,9 +676,12 @@ static void add_timer_randomness(struct
+ 	preempt_disable();
+ 	/* if over the trickle threshold, use only 1 in 4096 samples */
+ 	if (input_pool.entropy_count > trickle_thresh &&
+-	    ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff))
+-		goto out;
++	    ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff)) {
++		preempt_enable();
++		return;
++	}
+ 
++	preempt_enable();
+ 	sample.jiffies = jiffies;
+ 	sample.cycles = get_cycles();
+ 	sample.num = num;
+@@ -719,8 +722,6 @@ static void add_timer_randomness(struct
+ 		credit_entropy_bits(&input_pool,
+ 				    min_t(int, fls(delta>>1), 11));
+ 	}
+-out:
+-	preempt_enable();
+ }
+ 
+ void add_input_randomness(unsigned int type, unsigned int code,
diff --git a/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch b/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch
new file mode 100644
index 0000000..b8c297e
--- /dev/null
+++ b/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch
@@ -0,0 +1,28 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:01 -0500
+Subject: serial: 8250: Call flush_to_ldisc when the irq is threaded
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ drivers/tty/tty_buffer.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/tty/tty_buffer.c
++++ b/drivers/tty/tty_buffer.c
+@@ -566,10 +566,15 @@ void tty_flip_buffer_push(struct tty_str
+ 		buf->tail->commit = buf->tail->used;
+ 	spin_unlock_irqrestore(&buf->lock, flags);
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	if (tty->low_latency)
+ 		flush_to_ldisc(&buf->work);
+ 	else
+ 		schedule_work(&buf->work);
++#else
++	flush_to_ldisc(&buf->work);
++#endif
++
+ }
+ EXPORT_SYMBOL(tty_flip_buffer_push);
+ 
diff --git a/patches/drivers-serial-cleanup-locking-for-rt.patch b/patches/drivers-serial-cleanup-locking-for-rt.patch
new file mode 100644
index 0000000..0ad518e
--- /dev/null
+++ b/patches/drivers-serial-cleanup-locking-for-rt.patch
@@ -0,0 +1,42 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:01 -0500
+Subject: serial: 8250: Clean up the locking for -rt
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/tty/serial/8250/8250.c |   15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/drivers/tty/serial/8250/8250.c
++++ b/drivers/tty/serial/8250/8250.c
+@@ -2900,14 +2900,10 @@ serial8250_console_write(struct console
+ 
+ 	touch_nmi_watchdog();
+ 
+-	local_irq_save(flags);
+-	if (port->sysrq) {
+-		/* serial8250_handle_irq() already took the lock */
+-		locked = 0;
+-	} else if (oops_in_progress) {
+-		locked = spin_trylock(&port->lock);
+-	} else
+-		spin_lock(&port->lock);
++	if (port->sysrq || oops_in_progress)
++		locked = spin_trylock_irqsave(&port->lock, flags);
++	else
++		spin_lock_irqsave(&port->lock, flags);
+ 
+ 	/*
+ 	 *	First save the IER then disable the interrupts
+@@ -2939,8 +2935,7 @@ serial8250_console_write(struct console
+ 		serial8250_modem_status(up);
+ 
+ 	if (locked)
+-		spin_unlock(&port->lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&port->lock, flags);
+ }
+ 
+ static int __init serial8250_console_setup(struct console *co, char *options)
diff --git a/patches/drivers-tty-fix-omap-lock-crap.patch b/patches/drivers-tty-fix-omap-lock-crap.patch
new file mode 100644
index 0000000..03b070d
--- /dev/null
+++ b/patches/drivers-tty-fix-omap-lock-crap.patch
@@ -0,0 +1,38 @@
+Subject: drivers-tty-fix-omap-lock-crap.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 28 Jul 2011 13:32:57 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/tty/serial/omap-serial.c |   12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/drivers/tty/serial/omap-serial.c
++++ b/drivers/tty/serial/omap-serial.c
+@@ -1166,13 +1166,10 @@ serial_omap_console_write(struct console
+ 
+ 	pm_runtime_get_sync(up->dev);
+ 
+-	local_irq_save(flags);
+-	if (up->port.sysrq)
+-		locked = 0;
+-	else if (oops_in_progress)
+-		locked = spin_trylock(&up->port.lock);
++	if (up->port.sysrq || oops_in_progress)
++		locked = spin_trylock_irqsave(&up->port.lock, flags);
+ 	else
+-		spin_lock(&up->port.lock);
++		spin_lock_irqsave(&up->port.lock, flags);
+ 
+ 	/*
+ 	 * First save the IER then disable the interrupts
+@@ -1201,8 +1198,7 @@ serial_omap_console_write(struct console
+ 	pm_runtime_mark_last_busy(up->dev);
+ 	pm_runtime_put_autosuspend(up->dev);
+ 	if (locked)
+-		spin_unlock(&up->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&up->port.lock, flags);
+ }
+ 
+ static int __init
diff --git a/patches/drivers-tty-pl011-irq-disable-madness.patch b/patches/drivers-tty-pl011-irq-disable-madness.patch
new file mode 100644
index 0000000..0e648b0
--- /dev/null
+++ b/patches/drivers-tty-pl011-irq-disable-madness.patch
@@ -0,0 +1,44 @@
+Subject: drivers-tty-pl011-irq-disable-madness.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 08 Jan 2013 21:36:51 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/tty/serial/amba-pl011.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -1779,13 +1779,19 @@ pl011_console_write(struct console *co,
+ 
+ 	clk_enable(uap->clk);
+ 
+-	local_irq_save(flags);
++	/*
++	 * local_irq_save(flags);
++	 *
++	 * This local_irq_save() is nonsense. If we come in via sysrq
++	 * handling then interrupts are already disabled. Aside of
++	 * that the port.sysrq check is racy on SMP regardless.
++	*/
+ 	if (uap->port.sysrq)
+ 		locked = 0;
+ 	else if (oops_in_progress)
+-		locked = spin_trylock(&uap->port.lock);
++		locked = spin_trylock_irqsave(&uap->port.lock, flags);
+ 	else
+-		spin_lock(&uap->port.lock);
++		spin_lock_irqsave(&uap->port.lock, flags);
+ 
+ 	/*
+ 	 *	First save the CR then disable the interrupts
+@@ -1807,8 +1813,7 @@ pl011_console_write(struct console *co,
+ 	writew(old_cr, uap->port.membase + UART011_CR);
+ 
+ 	if (locked)
+-		spin_unlock(&uap->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&uap->port.lock, flags);
+ 
+ 	clk_disable(uap->clk);
+ }
diff --git a/patches/early-printk-consolidate.patch b/patches/early-printk-consolidate.patch
new file mode 100644
index 0000000..29d5d9e
--- /dev/null
+++ b/patches/early-printk-consolidate.patch
@@ -0,0 +1,485 @@
+Subject: early-printk-consolidate.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 23 Jul 2011 11:04:08 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/kernel/early_printk.c        |   17 +++--------------
+ arch/blackfin/kernel/early_printk.c   |    2 --
+ arch/microblaze/kernel/early_printk.c |   26 ++++----------------------
+ arch/mips/kernel/early_printk.c       |   11 +++++------
+ arch/powerpc/kernel/udbg.c            |    6 ++----
+ arch/sh/kernel/sh_bios.c              |    2 --
+ arch/sparc/kernel/setup_32.c          |    1 +
+ arch/sparc/kernel/setup_64.c          |    8 +++++++-
+ arch/tile/kernel/early_printk.c       |   27 +++++----------------------
+ arch/um/kernel/early_printk.c         |    8 +++++---
+ arch/unicore32/kernel/early_printk.c  |   12 ++++--------
+ arch/x86/kernel/early_printk.c        |   21 ++-------------------
+ include/linux/console.h               |    1 +
+ include/linux/printk.h                |    6 ++++++
+ kernel/printk.c                       |   30 +++++++++++++++++++++++-------
+ 15 files changed, 68 insertions(+), 110 deletions(-)
+
+--- a/arch/arm/kernel/early_printk.c
++++ b/arch/arm/kernel/early_printk.c
+@@ -29,28 +29,17 @@ static void early_console_write(struct c
+ 	early_write(s, n);
+ }
+ 
+-static struct console early_console = {
++static struct console early_console_dev = {
+ 	.name =		"earlycon",
+ 	.write =	early_console_write,
+ 	.flags =	CON_PRINTBUFFER | CON_BOOT,
+ 	.index =	-1,
+ };
+ 
+-asmlinkage void early_printk(const char *fmt, ...)
+-{
+-	char buf[512];
+-	int n;
+-	va_list ap;
+-
+-	va_start(ap, fmt);
+-	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+-	early_write(buf, n);
+-	va_end(ap);
+-}
+-
+ static int __init setup_early_printk(char *buf)
+ {
+-	register_console(&early_console);
++	early_console = &early_console_dev;
++	register_console(&early_console_dev);
+ 	return 0;
+ }
+ 
+--- a/arch/blackfin/kernel/early_printk.c
++++ b/arch/blackfin/kernel/early_printk.c
+@@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_
+ extern struct console *bfin_jc_early_init(void);
+ #endif
+ 
+-static struct console *early_console;
+-
+ /* Default console */
+ #define DEFAULT_PORT 0
+ #define DEFAULT_CFLAG CS8|B57600
+--- a/arch/microblaze/kernel/early_printk.c
++++ b/arch/microblaze/kernel/early_printk.c
+@@ -21,7 +21,6 @@
+ #include <asm/setup.h>
+ #include <asm/prom.h>
+ 
+-static u32 early_console_initialized;
+ static u32 base_addr;
+ 
+ #ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
+@@ -109,27 +108,11 @@ static struct console early_serial_uart1
+ };
+ #endif /* CONFIG_SERIAL_8250_CONSOLE */
+ 
+-static struct console *early_console;
+-
+-void early_printk(const char *fmt, ...)
+-{
+-	char buf[512];
+-	int n;
+-	va_list ap;
+-
+-	if (early_console_initialized) {
+-		va_start(ap, fmt);
+-		n = vscnprintf(buf, 512, fmt, ap);
+-		early_console->write(early_console, buf, n);
+-		va_end(ap);
+-	}
+-}
+-
+ int __init setup_early_printk(char *opt)
+ {
+ 	int version = 0;
+ 
+-	if (early_console_initialized)
++	if (early_console)
+ 		return 1;
+ 
+ 	base_addr = of_early_console(&version);
+@@ -159,7 +142,6 @@ int __init setup_early_printk(char *opt)
+ 		}
+ 
+ 		register_console(early_console);
+-		early_console_initialized = 1;
+ 		return 0;
+ 	}
+ 	return 1;
+@@ -169,7 +151,7 @@ int __init setup_early_printk(char *opt)
+  * only for early console because of performance degression */
+ void __init remap_early_printk(void)
+ {
+-	if (!early_console_initialized || !early_console)
++	if (!early_console)
+ 		return;
+ 	printk(KERN_INFO "early_printk_console remapping from 0x%x to ",
+ 								base_addr);
+@@ -195,9 +177,9 @@ void __init remap_early_printk(void)
+ 
+ void __init disable_early_printk(void)
+ {
+-	if (!early_console_initialized || !early_console)
++	if (!early_console)
+ 		return;
+ 	printk(KERN_WARNING "disabling early console\n");
+ 	unregister_console(early_console);
+-	early_console_initialized = 0;
++	early_console = NULL;
+ }
+--- a/arch/mips/kernel/early_printk.c
++++ b/arch/mips/kernel/early_printk.c
+@@ -8,6 +8,7 @@
+  *   written by Ralf Baechle (ralf@linux-mips.org)
+  */
+ #include <linux/console.h>
++#include <linux/printk.h>
+ #include <linux/init.h>
+ 
+ #include <asm/setup.h>
+@@ -25,20 +26,18 @@ early_console_write(struct console *con,
+ 	}
+ }
+ 
+-static struct console early_console __initdata = {
++static struct console early_console_prom = {
+ 	.name	= "early",
+ 	.write	= early_console_write,
+ 	.flags	= CON_PRINTBUFFER | CON_BOOT,
+ 	.index	= -1
+ };
+ 
+-static int early_console_initialized __initdata;
+-
+ void __init setup_early_printk(void)
+ {
+-	if (early_console_initialized)
++	if (early_console)
+ 		return;
+-	early_console_initialized = 1;
++	early_console = &early_console_prom;
+ 
+-	register_console(&early_console);
++	register_console(&early_console_prom);
+ }
+--- a/arch/powerpc/kernel/udbg.c
++++ b/arch/powerpc/kernel/udbg.c
+@@ -156,15 +156,13 @@ static struct console udbg_console = {
+ 	.index	= 0,
+ };
+ 
+-static int early_console_initialized;
+-
+ /*
+  * Called by setup_system after ppc_md->probe and ppc_md->early_init.
+  * Call it again after setting udbg_putc in ppc_md->setup_arch.
+  */
+ void __init register_early_udbg_console(void)
+ {
+-	if (early_console_initialized)
++	if (early_console)
+ 		return;
+ 
+ 	if (!udbg_putc)
+@@ -174,7 +172,7 @@ void __init register_early_udbg_console(
+ 		printk(KERN_INFO "early console immortal !\n");
+ 		udbg_console.flags &= ~CON_BOOT;
+ 	}
+-	early_console_initialized = 1;
++	early_console = &udbg_console;
+ 	register_console(&udbg_console);
+ }
+ 
+--- a/arch/sh/kernel/sh_bios.c
++++ b/arch/sh/kernel/sh_bios.c
+@@ -144,8 +144,6 @@ static struct console bios_console = {
+ 	.index		= -1,
+ };
+ 
+-static struct console *early_console;
+-
+ static int __init setup_early_printk(char *buf)
+ {
+ 	int keep_early = 0;
+--- a/arch/sparc/kernel/setup_32.c
++++ b/arch/sparc/kernel/setup_32.c
+@@ -309,6 +309,7 @@ void __init setup_arch(char **cmdline_p)
+ 
+ 	boot_flags_init(*cmdline_p);
+ 
++	early_console = &prom_early_console;
+ 	register_console(&prom_early_console);
+ 
+ 	printk("ARCH: ");
+--- a/arch/sparc/kernel/setup_64.c
++++ b/arch/sparc/kernel/setup_64.c
+@@ -551,6 +551,12 @@ static void __init init_sparc64_elf_hwca
+ 		pause_patch();
+ }
+ 
++static inline void register_prom_console(void)
++{
++	early_console = &prom_early_console;
++	register_console(&prom_early_console);
++}
++
+ void __init setup_arch(char **cmdline_p)
+ {
+ 	/* Initialize PROM console and command line. */
+@@ -562,7 +568,7 @@ void __init setup_arch(char **cmdline_p)
+ #ifdef CONFIG_EARLYFB
+ 	if (btext_find_display())
+ #endif
+-		register_console(&prom_early_console);
++		register_prom_console();
+ 
+ 	if (tlb_type == hypervisor)
+ 		printk("ARCH: SUN4V\n");
+--- a/arch/tile/kernel/early_printk.c
++++ b/arch/tile/kernel/early_printk.c
+@@ -17,6 +17,7 @@
+ #include <linux/init.h>
+ #include <linux/string.h>
+ #include <linux/irqflags.h>
++#include <linux/printk.h>
+ #include <asm/setup.h>
+ #include <hv/hypervisor.h>
+ 
+@@ -33,25 +34,8 @@ static struct console early_hv_console =
+ };
+ 
+ /* Direct interface for emergencies */
+-static struct console *early_console = &early_hv_console;
+-static int early_console_initialized;
+ static int early_console_complete;
+ 
+-static void early_vprintk(const char *fmt, va_list ap)
+-{
+-	char buf[512];
+-	int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+-	early_console->write(early_console, buf, n);
+-}
+-
+-void early_printk(const char *fmt, ...)
+-{
+-	va_list ap;
+-	va_start(ap, fmt);
+-	early_vprintk(fmt, ap);
+-	va_end(ap);
+-}
+-
+ void early_panic(const char *fmt, ...)
+ {
+ 	va_list ap;
+@@ -69,14 +53,13 @@ static int __initdata keep_early;
+ 
+ static int __init setup_early_printk(char *str)
+ {
+-	if (early_console_initialized)
++	if (early_console)
+ 		return 1;
+ 
+ 	if (str != NULL && strncmp(str, "keep", 4) == 0)
+ 		keep_early = 1;
+ 
+ 	early_console = &early_hv_console;
+-	early_console_initialized = 1;
+ 	register_console(early_console);
+ 
+ 	return 0;
+@@ -85,12 +68,12 @@ static int __init setup_early_printk(cha
+ void __init disable_early_printk(void)
+ {
+ 	early_console_complete = 1;
+-	if (!early_console_initialized || !early_console)
++	if (!early_console)
+ 		return;
+ 	if (!keep_early) {
+ 		early_printk("disabling early console\n");
+ 		unregister_console(early_console);
+-		early_console_initialized = 0;
++		early_console = NULL;
+ 	} else {
+ 		early_printk("keeping early console\n");
+ 	}
+@@ -98,7 +81,7 @@ void __init disable_early_printk(void)
+ 
+ void warn_early_printk(void)
+ {
+-	if (early_console_complete || early_console_initialized)
++	if (early_console_complete || early_console)
+ 		return;
+ 	early_printk("\
+ Machine shutting down before console output is fully initialized.\n\
+--- a/arch/um/kernel/early_printk.c
++++ b/arch/um/kernel/early_printk.c
+@@ -16,7 +16,7 @@ static void early_console_write(struct c
+ 	um_early_printk(s, n);
+ }
+ 
+-static struct console early_console = {
++static struct console early_console_dev = {
+ 	.name = "earlycon",
+ 	.write = early_console_write,
+ 	.flags = CON_BOOT,
+@@ -25,8 +25,10 @@ static struct console early_console = {
+ 
+ static int __init setup_early_printk(char *buf)
+ {
+-	register_console(&early_console);
+-
++	if (!early_console) {
++		early_console = &early_console_dev;
++		register_console(&early_console_dev);
++	}
+ 	return 0;
+ }
+ 
+--- a/arch/unicore32/kernel/early_printk.c
++++ b/arch/unicore32/kernel/early_printk.c
+@@ -33,21 +33,17 @@ static struct console early_ocd_console
+ 	.index =	-1,
+ };
+ 
+-/* Direct interface for emergencies */
+-static struct console *early_console = &early_ocd_console;
+-
+-static int __initdata keep_early;
+-
+ static int __init setup_early_printk(char *buf)
+ {
+-	if (!buf)
++	int keep_early;
++
++	if (!buf || early_console)
+ 		return 0;
+ 
+ 	if (strstr(buf, "keep"))
+ 		keep_early = 1;
+ 
+-	if (!strncmp(buf, "ocd", 3))
+-		early_console = &early_ocd_console;
++	early_console = &early_ocd_console;
+ 
+ 	if (keep_early)
+ 		early_console->flags &= ~CON_BOOT;
+--- a/arch/x86/kernel/early_printk.c
++++ b/arch/x86/kernel/early_printk.c
+@@ -169,25 +169,9 @@ static struct console early_serial_conso
+ 	.index =	-1,
+ };
+ 
+-/* Direct interface for emergencies */
+-static struct console *early_console = &early_vga_console;
+-static int __initdata early_console_initialized;
+-
+-asmlinkage void early_printk(const char *fmt, ...)
+-{
+-	char buf[512];
+-	int n;
+-	va_list ap;
+-
+-	va_start(ap, fmt);
+-	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+-	early_console->write(early_console, buf, n);
+-	va_end(ap);
+-}
+-
+ static inline void early_console_register(struct console *con, int keep_early)
+ {
+-	if (early_console->index != -1) {
++	if (con->index != -1) {
+ 		printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+ 		       con->name);
+ 		return;
+@@ -207,9 +191,8 @@ static int __init setup_early_printk(cha
+ 	if (!buf)
+ 		return 0;
+ 
+-	if (early_console_initialized)
++	if (early_console)
+ 		return 0;
+-	early_console_initialized = 1;
+ 
+ 	keep = (strstr(buf, "keep") != NULL);
+ 
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -141,6 +141,7 @@ struct console {
+ 	for (con = console_drivers; con != NULL; con = con->next)
+ 
+ extern int console_set_on_cmdline;
++extern struct console *early_console;
+ 
+ extern int add_preferred_console(char *name, int idx, char *options);
+ extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -95,8 +95,14 @@ int no_printk(const char *fmt, ...)
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_EARLY_PRINTK
+ extern asmlinkage __printf(1, 2)
+ void early_printk(const char *fmt, ...);
++void early_vprintk(const char *fmt, va_list ap);
++#else
++static inline __printf(1, 2) __cold
++void early_printk(const char *s, ...) { }
++#endif
+ 
+ extern int printk_needs_cpu(int cpu);
+ extern void printk_tick(void);
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -48,13 +48,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/printk.h>
+ 
+-/*
+- * Architectures can override it:
+- */
+-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
+-{
+-}
+-
+ /* printk's without a loglevel use this.. */
+ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+ 
+@@ -756,6 +749,29 @@ module_param(ignore_loglevel, bool, S_IR
+ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+ 	"print all kernel messages to the console.");
+ 
++#ifdef CONFIG_EARLY_PRINTK
++struct console *early_console;
++
++void early_vprintk(const char *fmt, va_list ap)
++{
++	if (early_console) {
++		char buf[512];
++		int n = vscnprintf(buf, sizeof(buf), fmt, ap);
++
++		early_console->write(early_console, buf, n);
++	}
++}
++
++asmlinkage void early_printk(const char *fmt, ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	early_vprintk(fmt, ap);
++	va_end(ap);
++}
++#endif
++
+ #ifdef CONFIG_BOOT_PRINTK_DELAY
+ 
+ static int boot_delay; /* msecs delay after each printk during bootup */
diff --git a/patches/epoll-use-get-cpu-light.patch b/patches/epoll-use-get-cpu-light.patch
new file mode 100644
index 0000000..e419acc
--- /dev/null
+++ b/patches/epoll-use-get-cpu-light.patch
@@ -0,0 +1,26 @@
+Subject: epoll.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 08 Jul 2011 16:35:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/eventpoll.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -497,12 +497,12 @@ static int ep_poll_wakeup_proc(void *pri
+  */
+ static void ep_poll_safewake(wait_queue_head_t *wq)
+ {
+-	int this_cpu = get_cpu();
++	int this_cpu = get_cpu_light();
+ 
+ 	ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ 		       ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
+ 
+-	put_cpu();
++	put_cpu_light();
+ }
+ 
+ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
diff --git a/patches/filemap-fix-up.patch b/patches/filemap-fix-up.patch
new file mode 100644
index 0000000..26f78d7
--- /dev/null
+++ b/patches/filemap-fix-up.patch
@@ -0,0 +1,22 @@
+Subject: filemap-fix-up.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 Jun 2011 18:56:24 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Wrecked-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-m6yuzd6ul717hlnl2gj6p3ou@git.kernel.org
+---
+ mm/filemap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1955,7 +1955,7 @@ size_t iov_iter_copy_from_user_atomic(st
+ 	char *kaddr;
+ 	size_t copied;
+ 
+-	BUG_ON(!in_atomic());
++	BUG_ON(!pagefault_disabled());
+ 	kaddr = kmap_atomic(page);
+ 	if (likely(i->nr_segs == 1)) {
+ 		int left;
diff --git a/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch b/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch
new file mode 100644
index 0000000..f4c3003
--- /dev/null
+++ b/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch
@@ -0,0 +1,34 @@
+Subject: FIX [1/2] slub: Do not dereference NULL pointer in node_match
+From: Christoph Lameter <cl@linux.com>
+Date: Wed, 23 Jan 2013 21:45:47 +0000
+
+The variables accessed in slab_alloc are volatile and therefore
+the page pointer passed to node_match can be NULL. The processing
+of data in slab_alloc is tentative until either the cmpxhchg
+succeeds or the __slab_alloc slowpath is invoked. Both are
+able to perform the same allocation from the freelist.
+
+Check for the NULL pointer in node_match.
+
+A false positive will lead to a retry of the loop in __slab_alloc.
+
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slub.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2041,7 +2041,7 @@ static void flush_all(struct kmem_cache
+ static inline int node_match(struct page *page, int node)
+ {
+ #ifdef CONFIG_NUMA
+-	if (node != NUMA_NO_NODE && page_to_nid(page) != node)
++	if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
+ 		return 0;
+ #endif
+ 	return 1;
diff --git a/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch b/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch
new file mode 100644
index 0000000..0915b2a
--- /dev/null
+++ b/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch
@@ -0,0 +1,65 @@
+Subject: FIX [2/2] slub: Tid must be retrieved from the percpu area of the current processor
+From: Christoph Lameter <cl@linux.com>
+Date: Wed, 23 Jan 2013 21:45:48 +0000
+
+As Steven Rostedt has pointer out: Rescheduling could occur on a differnet processor
+after the determination of the per cpu pointer and before the tid is retrieved.
+This could result in allocation from the wrong node in slab_alloc.
+
+The effect is much more severe in slab_free() where we could free to the freelist
+of the wrong page.
+
+The window for something like that occurring is pretty small but it is possible.
+
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slub.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2331,13 +2331,13 @@ static __always_inline void *slab_alloc_
+ 
+ 	s = memcg_kmem_get_cache(s, gfpflags);
+ redo:
+-
+ 	/*
+-	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
+-	 * enabled. We may switch back and forth between cpus while
+-	 * reading from one cpu area. That does not matter as long
+-	 * as we end up on the original cpu again when doing the cmpxchg.
++	 * Preemption is disabled for the retrieval of the tid because that
++	 * must occur from the current processor. We cannot allow rescheduling
++	 * on a different processor between the determination of the pointer
++	 * and the retrieval of the tid.
+ 	 */
++	preempt_disable();
+ 	c = __this_cpu_ptr(s->cpu_slab);
+ 
+ 	/*
+@@ -2347,7 +2347,7 @@ redo:
+ 	 * linked list in between.
+ 	 */
+ 	tid = c->tid;
+-	barrier();
++	preempt_enable();
+ 
+ 	object = c->freelist;
+ 	page = c->page;
+@@ -2594,10 +2594,11 @@ redo:
+ 	 * data is retrieved via this pointer. If we are on the same cpu
+ 	 * during the cmpxchg then the free will succedd.
+ 	 */
++	preempt_disable();
+ 	c = __this_cpu_ptr(s->cpu_slab);
+ 
+ 	tid = c->tid;
+-	barrier();
++	preempt_enable();
+ 
+ 	if (likely(page == c->page)) {
+ 		set_freepointer(s, object, c->freelist);
diff --git a/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch b/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch
new file mode 100644
index 0000000..1ed928e
--- /dev/null
+++ b/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch
@@ -0,0 +1,34 @@
+Subject: printk: Fix rq->lock vs logbuf_lock unlock lock inversion
+From: "Bu, Yitian" <ybu@qti.qualcomm.com>
+Date: Mon, 18 Feb 2013 12:53:37 +0000
+
+commit 07354eb1a74d1 ("locking printk: Annotate logbuf_lock as raw")
+reintroduced a lock inversion problem which was fixed in commit
+0b5e1c5255 ("printk: Release console_sem after logbuf_lock"). This
+happened probably when fixing up patch rejects.
+
+Restore the ordering and unlock logbuf_lock before releasing
+console_sem.
+
+Signed-off-by: ybu <ybu@qti.qualcomm.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/E807E903FE6CBE4D95E420FBFCC273B827413C@nasanexd01h.na.qualcomm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/printk.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1358,9 +1358,9 @@ static int console_trylock_for_printk(un
+ 		}
+ 	}
+ 	logbuf_cpu = UINT_MAX;
++	raw_spin_unlock(&logbuf_lock);
+ 	if (wake)
+ 		up(&console_sem);
+-	raw_spin_unlock(&logbuf_lock);
+ 	return retval;
+ }
+ 
diff --git a/patches/fix-rt-int3-x86_32-3.2-rt.patch b/patches/fix-rt-int3-x86_32-3.2-rt.patch
new file mode 100644
index 0000000..8f450e9
--- /dev/null
+++ b/patches/fix-rt-int3-x86_32-3.2-rt.patch
@@ -0,0 +1,112 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Subject: x86: Do not disable preemption in int3 on 32bit
+
+Preemption must be disabled before enabling interrupts in do_trap
+on x86_64 because the stack in use for int3 and debug is a per CPU
+stack set by th IST. But 32bit does not have an IST and the stack
+still belongs to the current task and there is no problem in scheduling
+out the task.
+
+Keep preemption enabled on X86_32 when enabling interrupts for
+do_trap().
+
+The name of the function is changed from preempt_conditional_sti/cli()
+to conditional_sti/cli_ist(), to annotate that this function is used
+when the stack is on the IST.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/kernel/traps.c |   32 +++++++++++++++++++++++---------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -85,9 +85,21 @@ static inline void conditional_sti(struc
+ 		local_irq_enable();
+ }
+ 
+-static inline void preempt_conditional_sti(struct pt_regs *regs)
++static inline void conditional_sti_ist(struct pt_regs *regs)
+ {
++#ifdef CONFIG_X86_64
++	/*
++	 * X86_64 uses a per CPU stack on the IST for certain traps
++	 * like int3. The task can not be preempted when using one
++	 * of these stacks, thus preemption must be disabled, otherwise
++	 * the stack can be corrupted if the task is scheduled out,
++	 * and another task comes in and uses this stack.
++	 *
++	 * On x86_32 the task keeps its own stack and it is OK if the
++	 * task schedules out.
++	 */
+ 	inc_preempt_count();
++#endif
+ 	if (regs->flags & X86_EFLAGS_IF)
+ 		local_irq_enable();
+ }
+@@ -98,11 +110,13 @@ static inline void conditional_cli(struc
+ 		local_irq_disable();
+ }
+ 
+-static inline void preempt_conditional_cli(struct pt_regs *regs)
++static inline void conditional_cli_ist(struct pt_regs *regs)
+ {
+ 	if (regs->flags & X86_EFLAGS_IF)
+ 		local_irq_disable();
++#ifdef CONFIG_X86_64
+ 	dec_preempt_count();
++#endif
+ }
+ 
+ static int __kprobes
+@@ -229,9 +243,9 @@ dotraplinkage void do_stack_segment(stru
+ 	exception_enter(regs);
+ 	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+ 		       X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+-		preempt_conditional_sti(regs);
++		conditional_sti_ist(regs);
+ 		do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+-		preempt_conditional_cli(regs);
++		conditional_cli_ist(regs);
+ 	}
+ 	exception_exit(regs);
+ }
+@@ -331,9 +345,9 @@ dotraplinkage void __kprobes notrace do_
+ 	 * as we may switch to the interrupt stack.
+ 	 */
+ 	debug_stack_usage_inc();
+-	preempt_conditional_sti(regs);
++	conditional_sti_ist(regs);
+ 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
+-	preempt_conditional_cli(regs);
++	conditional_cli_ist(regs);
+ 	debug_stack_usage_dec();
+ exit:
+ 	exception_exit(regs);
+@@ -438,12 +452,12 @@ dotraplinkage void __kprobes do_debug(st
+ 	debug_stack_usage_inc();
+ 
+ 	/* It's safe to allow irq's after DR6 has been saved */
+-	preempt_conditional_sti(regs);
++	conditional_sti_ist(regs);
+ 
+ 	if (regs->flags & X86_VM_MASK) {
+ 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
+ 					X86_TRAP_DB);
+-		preempt_conditional_cli(regs);
++		conditional_cli_ist(regs);
+ 		debug_stack_usage_dec();
+ 		goto exit;
+ 	}
+@@ -463,7 +477,7 @@ dotraplinkage void __kprobes do_debug(st
+ 	si_code = get_si_code(tsk->thread.debugreg6);
+ 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
+ 		send_sigtrap(tsk, regs, error_code, si_code);
+-	preempt_conditional_cli(regs);
++	conditional_cli_ist(regs);
+ 	debug_stack_usage_dec();
+ 
+ exit:
diff --git a/patches/fs-block-rt-support.patch b/patches/fs-block-rt-support.patch
new file mode 100644
index 0000000..9246db6
--- /dev/null
+++ b/patches/fs-block-rt-support.patch
@@ -0,0 +1,40 @@
+Subject: fs-block-rt-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 14 Jun 2011 17:05:09 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ block/blk-core.c |    2 +-
+ fs/file.c        |    4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -241,7 +241,7 @@ EXPORT_SYMBOL(blk_delay_queue);
+  **/
+ void blk_start_queue(struct request_queue *q)
+ {
+-	WARN_ON(!irqs_disabled());
++	WARN_ON_NONRT(!irqs_disabled());
+ 
+ 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ 	__blk_run_queue(q);
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -98,14 +98,14 @@ static void free_fdtable_rcu(struct rcu_
+ 		kfree(fdt->open_fds);
+ 		kfree(fdt);
+ 	} else {
+-		fddef = &get_cpu_var(fdtable_defer_list);
++		fddef = &per_cpu(fdtable_defer_list, get_cpu_light());
+ 		spin_lock(&fddef->lock);
+ 		fdt->next = fddef->next;
+ 		fddef->next = fdt;
+ 		/* vmallocs are handled from the workqueue context */
+ 		schedule_work(&fddef->wq);
+ 		spin_unlock(&fddef->lock);
+-		put_cpu_var(fdtable_defer_list);
++		put_cpu_light();
+ 	}
+ }
+ 
diff --git a/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch b/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch
new file mode 100644
index 0000000..d6228dc
--- /dev/null
+++ b/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch
@@ -0,0 +1,94 @@
+Subject: fs: dcache: Use cpu_chill() in trylock loops
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 07 Mar 2012 21:00:34 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ fs/autofs4/autofs_i.h |    1 +
+ fs/autofs4/expire.c   |    2 +-
+ fs/dcache.c           |    7 ++++---
+ fs/namespace.c        |    3 ++-
+ 4 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/autofs4/autofs_i.h
++++ b/fs/autofs4/autofs_i.h
+@@ -34,6 +34,7 @@
+ #include <linux/sched.h>
+ #include <linux/mount.h>
+ #include <linux/namei.h>
++#include <linux/delay.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+ 
+--- a/fs/autofs4/expire.c
++++ b/fs/autofs4/expire.c
+@@ -166,7 +166,7 @@ again:
+ 			parent = p->d_parent;
+ 			if (!spin_trylock(&parent->d_lock)) {
+ 				spin_unlock(&p->d_lock);
+-				cpu_relax();
++				cpu_chill();
+ 				goto relock;
+ 			}
+ 			spin_unlock(&p->d_lock);
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -37,6 +37,7 @@
+ #include <linux/rculist_bl.h>
+ #include <linux/prefetch.h>
+ #include <linux/ratelimit.h>
++#include <linux/delay.h>
+ #include "internal.h"
+ #include "mount.h"
+ 
+@@ -470,7 +471,7 @@ static inline struct dentry *dentry_kill
+ 	if (inode && !spin_trylock(&inode->i_lock)) {
+ relock:
+ 		spin_unlock(&dentry->d_lock);
+-		cpu_relax();
++		cpu_chill();
+ 		return dentry; /* try again with same dentry */
+ 	}
+ 	if (IS_ROOT(dentry))
+@@ -852,7 +853,7 @@ relock:
+ 
+ 		if (!spin_trylock(&dentry->d_lock)) {
+ 			spin_unlock(&dcache_lru_lock);
+-			cpu_relax();
++			cpu_chill();
+ 			goto relock;
+ 		}
+ 
+@@ -2084,7 +2085,7 @@ again:
+ 	if (dentry->d_count == 1) {
+ 		if (!spin_trylock(&inode->i_lock)) {
+ 			spin_unlock(&dentry->d_lock);
+-			cpu_relax();
++			cpu_chill();
+ 			goto again;
+ 		}
+ 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -22,6 +22,7 @@
+ #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
+ #include <linux/uaccess.h>
+ #include <linux/proc_fs.h>
++#include <linux/delay.h>
+ #include "pnode.h"
+ #include "internal.h"
+ 
+@@ -315,7 +316,7 @@ int __mnt_want_write(struct vfsmount *m)
+ 	smp_mb();
+ 	while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ 		preempt_enable();
+-		cpu_relax();
++		cpu_chill();
+ 		preempt_disable();
+ 	}
+ 	/*
diff --git a/patches/fs-jbd-pull-plug-when-waiting-for-space.patch b/patches/fs-jbd-pull-plug-when-waiting-for-space.patch
new file mode 100644
index 0000000..0dd4c95
--- /dev/null
+++ b/patches/fs-jbd-pull-plug-when-waiting-for-space.patch
@@ -0,0 +1,29 @@
+From: Mike Galbraith <mgalbraith@suse.de>
+Date: Wed, 11 Jul 2012 22:05:20 +0000
+Subject: fs, jbd: pull your plug when waiting for space
+
+With an -rt kernel, and a heavy sync IO load, tasks can jam
+up on journal locks without unplugging, which can lead to
+terminal IO starvation.  Unplug and schedule when waiting for space.
+
+Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Theodore Tso <tytso@mit.edu>
+Link: http://lkml.kernel.org/r/1341812414.7370.73.camel@marge.simpson.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/jbd/checkpoint.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/jbd/checkpoint.c
++++ b/fs/jbd/checkpoint.c
+@@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *jou
+ 		if (journal->j_flags & JFS_ABORT)
+ 			return;
+ 		spin_unlock(&journal->j_state_lock);
++		if (current->plug)
++			io_schedule();
+ 		mutex_lock(&journal->j_checkpoint_mutex);
+ 
+ 		/*
diff --git a/patches/fs-jbd-replace-bh_state-lock.patch b/patches/fs-jbd-replace-bh_state-lock.patch
new file mode 100644
index 0000000..b7b48ce
--- /dev/null
+++ b/patches/fs-jbd-replace-bh_state-lock.patch
@@ -0,0 +1,100 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Mar 2011 10:11:25 +0100
+Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
+
+bit_spin_locks break under RT.
+
+Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+--
+
+ include/linux/buffer_head.h |   10 ++++++++++
+ include/linux/jbd_common.h  |   24 ++++++++++++++++++++++++
+ 2 files changed, 34 insertions(+)
+
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -74,6 +74,11 @@ struct buffer_head {
+ 	atomic_t b_count;		/* users using this buffer_head */
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ 	spinlock_t b_uptodate_lock;
++#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
++    defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
++	spinlock_t b_state_lock;
++	spinlock_t b_journal_head_lock;
++#endif
+ #endif
+ };
+ 
+@@ -105,6 +110,11 @@ static inline void buffer_head_init_lock
+ {
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ 	spin_lock_init(&bh->b_uptodate_lock);
++#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
++    defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
++	spin_lock_init(&bh->b_state_lock);
++	spin_lock_init(&bh->b_journal_head_lock);
++#endif
+ #endif
+ }
+ 
+--- a/include/linux/jbd_common.h
++++ b/include/linux/jbd_common.h
+@@ -39,32 +39,56 @@ static inline struct journal_head *bh2jh
+ 
+ static inline void jbd_lock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_lock(BH_State, &bh->b_state);
++#else
++	spin_lock(&bh->b_state_lock);
++#endif
+ }
+ 
+ static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	return bit_spin_trylock(BH_State, &bh->b_state);
++#else
++	return spin_trylock(&bh->b_state_lock);
++#endif
+ }
+ 
+ static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	return bit_spin_is_locked(BH_State, &bh->b_state);
++#else
++	return spin_is_locked(&bh->b_state_lock);
++#endif
+ }
+ 
+ static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_unlock(BH_State, &bh->b_state);
++#else
++	spin_unlock(&bh->b_state_lock);
++#endif
+ }
+ 
+ static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_lock(BH_JournalHead, &bh->b_state);
++#else
++	spin_lock(&bh->b_journal_head_lock);
++#endif
+ }
+ 
+ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_unlock(BH_JournalHead, &bh->b_state);
++#else
++	spin_unlock(&bh->b_journal_head_lock);
++#endif
+ }
+ 
+ #endif
diff --git a/patches/fs-namespace-preemption-fix.patch b/patches/fs-namespace-preemption-fix.patch
new file mode 100644
index 0000000..04805b8
--- /dev/null
+++ b/patches/fs-namespace-preemption-fix.patch
@@ -0,0 +1,30 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 19 Jul 2009 08:44:27 -0500
+Subject: fs: namespace preemption fix
+
+On RT we cannot loop with preemption disabled here as
+mnt_make_readonly() might have been preempted. We can safely enable
+preemption while waiting for MNT_WRITE_HOLD to be cleared. Safe on !RT
+as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/namespace.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -313,8 +313,11 @@ int __mnt_want_write(struct vfsmount *m)
+ 	 * incremented count after it has set MNT_WRITE_HOLD.
+ 	 */
+ 	smp_mb();
+-	while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
++	while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
++		preempt_enable();
+ 		cpu_relax();
++		preempt_disable();
++	}
+ 	/*
+ 	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+ 	 * be set to match its requirements. So we must not load that until
diff --git a/patches/fs-ntfs-disable-interrupt-non-rt.patch b/patches/fs-ntfs-disable-interrupt-non-rt.patch
new file mode 100644
index 0000000..f06ad29
--- /dev/null
+++ b/patches/fs-ntfs-disable-interrupt-non-rt.patch
@@ -0,0 +1,59 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 3 Jul 2009 08:44:12 -0500
+Subject: fs: ntfs: disable interrupt only on !RT
+
+On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote:
+> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
+>
+> > > [10138.175796]  [<c0105de3>] show_trace+0x12/0x14
+> > > [10138.180291]  [<c0105dfb>] dump_stack+0x16/0x18
+> > > [10138.184769]  [<c011609f>] native_smp_call_function_mask+0x138/0x13d
+> > > [10138.191117]  [<c0117606>] smp_call_function+0x1e/0x24
+> > > [10138.196210]  [<c012f85c>] on_each_cpu+0x25/0x50
+> > > [10138.200807]  [<c0115c74>] flush_tlb_all+0x1e/0x20
+> > > [10138.205553]  [<c016caaf>] kmap_high+0x1b6/0x417
+> > > [10138.210118]  [<c011ec88>] kmap+0x4d/0x4f
+> > > [10138.214102]  [<c026a9d8>] ntfs_end_buffer_async_read+0x228/0x2f9
+> > > [10138.220163]  [<c01a0e9e>] end_bio_bh_io_sync+0x26/0x3f
+> > > [10138.225352]  [<c01a2b09>] bio_endio+0x42/0x6d
+> > > [10138.229769]  [<c02c2a08>] __end_that_request_first+0x115/0x4ac
+> > > [10138.235682]  [<c02c2da7>] end_that_request_chunk+0x8/0xa
+> > > [10138.241052]  [<c0365943>] ide_end_request+0x55/0x10a
+> > > [10138.246058]  [<c036dae3>] ide_dma_intr+0x6f/0xac
+> > > [10138.250727]  [<c0366d83>] ide_intr+0x93/0x1e0
+> > > [10138.255125]  [<c015afb4>] handle_IRQ_event+0x5c/0xc9
+> >
+> > Looks like ntfs is kmap()ing from interrupt context. Should be using
+> > kmap_atomic instead, I think.
+>
+> it's not atomic interrupt context but irq thread context - and -rt
+> remaps kmap_atomic() to kmap() internally.
+
+Hm.  Looking at the change to mm/bounce.c, perhaps I should do this
+instead?
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/ntfs/aops.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ntfs/aops.c
++++ b/fs/ntfs/aops.c
+@@ -144,13 +144,13 @@ static void ntfs_end_buffer_async_read(s
+ 		recs = PAGE_CACHE_SIZE / rec_size;
+ 		/* Should have been verified before we got here... */
+ 		BUG_ON(!recs);
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		kaddr = kmap_atomic(page);
+ 		for (i = 0; i < recs; i++)
+ 			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
+ 					i * rec_size), rec_size);
+ 		kunmap_atomic(kaddr);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		flush_dcache_page(page);
+ 		if (likely(page_uptodate && !PageError(page)))
+ 			SetPageUptodate(page);
diff --git a/patches/fs-replace-bh_uptodate_lock-for-rt.patch b/patches/fs-replace-bh_uptodate_lock-for-rt.patch
new file mode 100644
index 0000000..466a0a3
--- /dev/null
+++ b/patches/fs-replace-bh_uptodate_lock-for-rt.patch
@@ -0,0 +1,161 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Mar 2011 09:18:52 +0100
+Subject: buffer_head: Replace bh_uptodate_lock for -rt
+
+Wrap the bit_spin_lock calls into a separate inline and add the RT
+replacements with a real spinlock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/buffer.c                 |   21 +++++++--------------
+ fs/ntfs/aops.c              |   10 +++-------
+ include/linux/buffer_head.h |   34 ++++++++++++++++++++++++++++++++++
+ 3 files changed, 44 insertions(+), 21 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -280,8 +280,7 @@ static void end_buffer_async_read(struct
+ 	 * decide that the page is now completely done.
+ 	 */
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	flags = bh_uptodate_lock_irqsave(first);
+ 	clear_buffer_async_read(bh);
+ 	unlock_buffer(bh);
+ 	tmp = bh;
+@@ -294,8 +293,7 @@ static void end_buffer_async_read(struct
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	} while (tmp != bh);
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	bh_uptodate_unlock_irqrestore(first, flags);
+ 
+ 	/*
+ 	 * If none of the buffers had errors and they are all
+@@ -307,9 +305,7 @@ static void end_buffer_async_read(struct
+ 	return;
+ 
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
+-	return;
++	bh_uptodate_unlock_irqrestore(first, flags);
+ }
+ 
+ /*
+@@ -343,8 +339,7 @@ void end_buffer_async_write(struct buffe
+ 	}
+ 
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	flags = bh_uptodate_lock_irqsave(first);
+ 
+ 	clear_buffer_async_write(bh);
+ 	unlock_buffer(bh);
+@@ -356,15 +351,12 @@ void end_buffer_async_write(struct buffe
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	}
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	bh_uptodate_unlock_irqrestore(first, flags);
+ 	end_page_writeback(page);
+ 	return;
+ 
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
+-	return;
++	bh_uptodate_unlock_irqrestore(first, flags);
+ }
+ EXPORT_SYMBOL(end_buffer_async_write);
+ 
+@@ -3256,6 +3248,7 @@ struct buffer_head *alloc_buffer_head(gf
+ 	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
+ 	if (ret) {
+ 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
++		buffer_head_init_locks(ret);
+ 		preempt_disable();
+ 		__this_cpu_inc(bh_accounting.nr);
+ 		recalc_bh_state();
+--- a/fs/ntfs/aops.c
++++ b/fs/ntfs/aops.c
+@@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(s
+ 				"0x%llx.", (unsigned long long)bh->b_blocknr);
+ 	}
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	flags = bh_uptodate_lock_irqsave(first);
+ 	clear_buffer_async_read(bh);
+ 	unlock_buffer(bh);
+ 	tmp = bh;
+@@ -124,8 +123,7 @@ static void ntfs_end_buffer_async_read(s
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	} while (tmp != bh);
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	bh_uptodate_unlock_irqrestore(first, flags);
+ 	/*
+ 	 * If none of the buffers had errors then we can set the page uptodate,
+ 	 * but we first have to perform the post read mst fixups, if the
+@@ -160,9 +158,7 @@ static void ntfs_end_buffer_async_read(s
+ 	unlock_page(page);
+ 	return;
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
+-	return;
++	bh_uptodate_unlock_irqrestore(first, flags);
+ }
+ 
+ /**
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -72,8 +72,42 @@ struct buffer_head {
+ 	struct address_space *b_assoc_map;	/* mapping this buffer is
+ 						   associated with */
+ 	atomic_t b_count;		/* users using this buffer_head */
++#ifdef CONFIG_PREEMPT_RT_BASE
++	spinlock_t b_uptodate_lock;
++#endif
+ };
+ 
++static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
++{
++	unsigned long flags;
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++	local_irq_save(flags);
++	bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
++#else
++	spin_lock_irqsave(&bh->b_uptodate_lock, flags);
++#endif
++	return flags;
++}
++
++static inline void
++bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
++{
++#ifndef CONFIG_PREEMPT_RT_BASE
++	bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
++	local_irq_restore(flags);
++#else
++	spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
++#endif
++}
++
++static inline void buffer_head_init_locks(struct buffer_head *bh)
++{
++#ifdef CONFIG_PREEMPT_RT_BASE
++	spin_lock_init(&bh->b_uptodate_lock);
++#endif
++}
++
+ /*
+  * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+  * and buffer_foo() functions.
diff --git a/patches/ftrace-migrate-disable-tracing.patch b/patches/ftrace-migrate-disable-tracing.patch
new file mode 100644
index 0000000..a85247d
--- /dev/null
+++ b/patches/ftrace-migrate-disable-tracing.patch
@@ -0,0 +1,73 @@
+Subject: ftrace-migrate-disable-tracing.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:56:42 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/ftrace_event.h |    3 ++-
+ kernel/trace/trace.c         |    9 ++++++---
+ kernel/trace/trace_events.c  |    1 +
+ kernel/trace/trace_output.c  |    5 +++++
+ 4 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/include/linux/ftrace_event.h
++++ b/include/linux/ftrace_event.h
+@@ -49,7 +49,8 @@ struct trace_entry {
+ 	unsigned char		flags;
+ 	unsigned char		preempt_count;
+ 	int			pid;
+-	int			padding;
++	unsigned short		migrate_disable;
++	unsigned short		padding;
+ };
+ 
+ #define FTRACE_MAX_EVENT						\
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1177,6 +1177,8 @@ tracing_generic_entry_update(struct trac
+ 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+ 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
++
++	entry->migrate_disable	= (tsk) ? tsk->migrate_disable & 0xFF : 0;
+ }
+ EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
+ 
+@@ -2034,9 +2036,10 @@ static void print_lat_help_header(struct
+ 	seq_puts(m, "#                | / _----=> need-resched    \n");
+ 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
+ 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
+-	seq_puts(m, "#                |||| /     delay             \n");
+-	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+-	seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
++	seq_puts(m, "#                |||| / _--=> migrate-disable\n");
++	seq_puts(m, "#                ||||| /     delay           \n");
++	seq_puts(m, "#  cmd     pid   |||||| time  |   caller     \n");
++	seq_puts(m, "#     \\   /      |||||  \\   |   /          \n");
+ }
+ 
+ static void print_event_info(struct trace_array *tr, struct seq_file *m)
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -116,6 +116,7 @@ static int trace_define_common_fields(vo
+ 	__common_field(unsigned char, flags);
+ 	__common_field(unsigned char, preempt_count);
+ 	__common_field(int, pid);
++	__common_field(unsigned short, migrate_disable);
+ 	__common_field(int, padding);
+ 
+ 	return ret;
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -593,6 +593,11 @@ int trace_print_lat_fmt(struct trace_seq
+ 	else
+ 		ret = trace_seq_putc(s, '.');
+ 
++	if (entry->migrate_disable)
++		ret = trace_seq_printf(s, "%x", entry->migrate_disable);
++	else
++		ret = trace_seq_putc(s, '.');
++
+ 	return ret;
+ }
+ 
diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch
new file mode 100644
index 0000000..9e8e808
--- /dev/null
+++ b/patches/futex-requeue-pi-fix.patch
@@ -0,0 +1,114 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Subject: futex: Fix bug on when a requeued RT task times out
+
+Requeue with timeout causes a bug with PREEMPT_RT_FULL.
+
+The bug comes from a timed out condition.
+
+
+	TASK 1				TASK 2
+	------				------
+    futex_wait_requeue_pi()
+	futex_wait_queue_me()
+	<timed out>
+
+					double_lock_hb();
+
+	raw_spin_lock(pi_lock);
+	if (current->pi_blocked_on) { 
+	} else {
+	    current->pi_blocked_on = PI_WAKE_INPROGRESS;
+	    run_spin_unlock(pi_lock);
+	    spin_lock(hb->lock); <-- blocked!
+
+
+					plist_for_each_entry_safe(this) {
+					    rt_mutex_start_proxy_lock();
+						task_blocks_on_rt_mutex();
+						BUG_ON(task->pi_blocked_on)!!!!
+
+The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
+problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
+grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
+it will block and set the "pi_blocked_on" to the hb->lock.
+
+When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
+because the task1's pi_blocked_on is no longer set to that, but instead,
+set to the hb->lock.
+
+The fix:
+
+When calling rt_mutex_start_proxy_lock() a check is made to see
+if the proxy tasks pi_blocked_on is set. If so, exit out early.
+Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
+the proxy task that it is being requeued, and will handle things
+appropriately.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+
+---
+ kernel/rtmutex.c        |   32 +++++++++++++++++++++++++++++++-
+ kernel/rtmutex_common.h |    1 +
+ 2 files changed, 32 insertions(+), 1 deletion(-)
+
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -69,7 +69,8 @@ static void fixup_rt_mutex_waiters(struc
+ 
+ static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
+ {
+-	return waiter && waiter != PI_WAKEUP_INPROGRESS;
++	return waiter && waiter != PI_WAKEUP_INPROGRESS &&
++		waiter != PI_REQUEUE_INPROGRESS;
+ }
+ 
+ /*
+@@ -981,6 +982,35 @@ int rt_mutex_start_proxy_lock(struct rt_
+ 		return 1;
+ 	}
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++	/*
++	 * In PREEMPT_RT there's an added race.
++	 * If the task, that we are about to requeue, times out,
++	 * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
++	 * to skip this task. But right after the task sets
++	 * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
++	 * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
++	 * This will replace the PI_WAKEUP_INPROGRESS with the actual
++	 * lock that it blocks on. We *must not* place this task
++	 * on this proxy lock in that case.
++	 *
++	 * To prevent this race, we first take the task's pi_lock
++	 * and check if it has updated its pi_blocked_on. If it has,
++	 * we assume that it woke up and we return -EAGAIN.
++	 * Otherwise, we set the task's pi_blocked_on to
++	 * PI_REQUEUE_INPROGRESS, so that if the task is waking up
++	 * it will know that we are in the process of requeuing it.
++	 */
++	raw_spin_lock_irq(&task->pi_lock);
++	if (task->pi_blocked_on) {
++		raw_spin_unlock_irq(&task->pi_lock);
++		raw_spin_unlock(&lock->wait_lock);
++		return -EAGAIN;
++	}
++	task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
++	raw_spin_unlock_irq(&task->pi_lock);
++#endif
++
+ 	ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
+ 
+ 	if (ret && !rt_mutex_owner(lock)) {
+--- a/kernel/rtmutex_common.h
++++ b/kernel/rtmutex_common.h
+@@ -104,6 +104,7 @@ static inline struct task_struct *rt_mut
+  * PI-futex support (proxy locking functions, etc.):
+  */
+ #define PI_WAKEUP_INPROGRESS	((struct rt_mutex_waiter *) 1)
++#define PI_REQUEUE_INPROGRESS	((struct rt_mutex_waiter *) 2)
+ 
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
diff --git a/patches/generic-cmpxchg-use-raw-local-irq.patch b/patches/generic-cmpxchg-use-raw-local-irq.patch
new file mode 100644
index 0000000..12d82a2
--- /dev/null
+++ b/patches/generic-cmpxchg-use-raw-local-irq.patch
@@ -0,0 +1,47 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:30 -0500
+Subject: generic: Use raw local irq variant for generic cmpxchg
+
+No point in tracing those.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/asm-generic/cmpxchg-local.h |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/include/asm-generic/cmpxchg-local.h
++++ b/include/asm-generic/cmpxchg-local.h
+@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_lo
+ 	if (size == 8 && sizeof(unsigned long) != 8)
+ 		wrong_size_cmpxchg(ptr);
+ 
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	switch (size) {
+ 	case 1: prev = *(u8 *)ptr;
+ 		if (prev == old)
+@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_lo
+ 	default:
+ 		wrong_size_cmpxchg(ptr);
+ 	}
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
+@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_gene
+ 	u64 prev;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	prev = *(u64 *)ptr;
+ 	if (prev == old)
+ 		*(u64 *)ptr = new;
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
diff --git a/patches/genirq-add-default-mask-cmdline-option.patch b/patches/genirq-add-default-mask-cmdline-option.patch
new file mode 100644
index 0000000..d452bf2
--- /dev/null
+++ b/patches/genirq-add-default-mask-cmdline-option.patch
@@ -0,0 +1,66 @@
+Subject: genirq: Add default affinity mask command line option
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 25 May 2012 16:59:47 +0200
+
+If we isolate CPUs, then we don't want random device interrupts on
+them. Even w/o the user space irq balancer enabled we can end up with
+irqs on non boot cpus.
+
+Allow to restrict the default irq affinity mask.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ Documentation/kernel-parameters.txt |    9 +++++++++
+ kernel/irq/irqdesc.c                |   21 +++++++++++++++++++--
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1182,6 +1182,15 @@ bytes respectively. Such letter suffixes
+ 			See comment before ip2_setup() in
+ 			drivers/char/ip2/ip2base.c.
+ 
++	irqaffinity=	[SMP] Set the default irq affinity mask
++			Format:
++			<cpu number>,...,<cpu number>
++			or
++			<cpu number>-<cpu number>
++			(must be a positive range in ascending order)
++			or a mixture
++			<cpu number>,...,<cpu number>-<cpu number>
++
+ 	irqfixup	[HW]
+ 			When an interrupt is not handled search all handlers
+ 			for it. Intended to get systems with badly broken
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -23,10 +23,27 @@
+ static struct lock_class_key irq_desc_lock_class;
+ 
+ #if defined(CONFIG_SMP)
++static int __init irq_affinity_setup(char *str)
++{
++	zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
++	cpulist_parse(str, irq_default_affinity);
++	/*
++	 * Set at least the boot cpu. We don't want to end up with
++	 * bugreports caused by random comandline masks
++	 */
++	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
++	return 1;
++}
++__setup("irqaffinity=", irq_affinity_setup);
++
+ static void __init init_irq_default_affinity(void)
+ {
+-	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+-	cpumask_setall(irq_default_affinity);
++#ifdef CONFIG_CPUMASK_OFFSTACK
++	if (!irq_default_affinity)
++		zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
++#endif
++	if (cpumask_empty(irq_default_affinity))
++		cpumask_setall(irq_default_affinity);
+ }
+ #else
+ static void __init init_irq_default_affinity(void)
diff --git a/patches/genirq-disable-irqpoll-on-rt.patch b/patches/genirq-disable-irqpoll-on-rt.patch
new file mode 100644
index 0000000..b9072ea
--- /dev/null
+++ b/patches/genirq-disable-irqpoll-on-rt.patch
@@ -0,0 +1,39 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:57 -0500
+Subject: genirq: disable irqpoll on -rt
+
+Creates long latencies for no value
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/irq/spurious.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/kernel/irq/spurious.c
++++ b/kernel/irq/spurious.c
+@@ -340,6 +340,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
+ 
+ static int __init irqfixup_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++	printk(KERN_WARNING "irqfixup boot option not supported "
++		"w/ CONFIG_PREEMPT_RT_BASE\n");
++	return 1;
++#endif
+ 	irqfixup = 1;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ 	printk(KERN_WARNING "This may impact system performance.\n");
+@@ -352,6 +357,11 @@ module_param(irqfixup, int, 0644);
+ 
+ static int __init irqpoll_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++	printk(KERN_WARNING "irqpoll boot option not supported "
++		"w/ CONFIG_PREEMPT_RT_BASE\n");
++	return 1;
++#endif
+ 	irqfixup = 2;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ 				"enabled\n");
diff --git a/patches/genirq-force-threading.patch b/patches/genirq-force-threading.patch
new file mode 100644
index 0000000..72fbf98
--- /dev/null
+++ b/patches/genirq-force-threading.patch
@@ -0,0 +1,46 @@
+Subject: genirq-force-threading.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 03 Apr 2011 11:57:29 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/interrupt.h |    8 ++++++--
+ kernel/irq/manage.c       |    2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -383,9 +383,13 @@ static inline int disable_irq_wake(unsig
+ 
+ 
+ #ifdef CONFIG_IRQ_FORCED_THREADING
+-extern bool force_irqthreads;
++# ifndef CONFIG_PREEMPT_RT_BASE
++   extern bool force_irqthreads;
++# else
++#  define force_irqthreads	(true)
++# endif
+ #else
+-#define force_irqthreads	(0)
++#define force_irqthreads	(false)
+ #endif
+ 
+ #ifndef __ARCH_SET_SOFTIRQ_PENDING
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -21,6 +21,7 @@
+ #include "internals.h"
+ 
+ #ifdef CONFIG_IRQ_FORCED_THREADING
++# ifndef CONFIG_PREEMPT_RT_BASE
+ __read_mostly bool force_irqthreads;
+ 
+ static int __init setup_forced_irqthreads(char *arg)
+@@ -29,6 +30,7 @@ static int __init setup_forced_irqthread
+ 	return 0;
+ }
+ early_param("threadirqs", setup_forced_irqthreads);
++# endif
+ #endif
+ 
+ /**
diff --git a/patches/genirq-nodebug-shirq.patch b/patches/genirq-nodebug-shirq.patch
new file mode 100644
index 0000000..4885fb4
--- /dev/null
+++ b/patches/genirq-nodebug-shirq.patch
@@ -0,0 +1,20 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 18 Mar 2011 10:22:04 +0100
+Subject: genirq: Disable DEBUG_SHIRQ for rt
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/Kconfig.debug |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -164,7 +164,7 @@ config DEBUG_KERNEL
+ 
+ config DEBUG_SHIRQ
+ 	bool "Debug shared IRQ handlers"
+-	depends on DEBUG_KERNEL && GENERIC_HARDIRQS
++	depends on DEBUG_KERNEL && GENERIC_HARDIRQS && !PREEMPT_RT_BASE
+ 	help
+ 	  Enable this to generate a spurious interrupt as soon as a shared
+ 	  interrupt handler is registered, and just before one is deregistered.
diff --git a/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch b/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch
new file mode 100644
index 0000000..8e94d15
--- /dev/null
+++ b/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch
@@ -0,0 +1,35 @@
+Subject: hardirq.h: Define softirq_count() as OUL to kill build warning
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Thu, 13 Oct 2011 17:19:09 +0800
+
+kernel/lockdep.c: In function ‘print_bad_irq_dependency’:
+kernel/lockdep.c:1476:3: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 7 has type ‘unsigned int’
+kernel/lockdep.c: In function ‘print_usage_bug’:
+kernel/lockdep.c:2193:3: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 7 has type ‘unsigned int’
+
+kernel/lockdep.i show this:
+ printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+  curr->comm, task_pid_nr(curr),
+  curr->hardirq_context, ((current_thread_info()->preempt_count) & (((1UL << (10))-1) << ((0 + 8) + 8))) >> ((0 + 8) + 8),
+  curr->softirq_context, (0U) >> (0 + 8),
+  curr->hardirqs_enabled,
+  curr->softirqs_enabled);
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/20111013091909.GA32739@zhy
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/hardirq.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/hardirq.h
++++ b/include/linux/hardirq.h
+@@ -85,7 +85,7 @@
+ # define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
+ # define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
+ #else
+-# define softirq_count()	(0U)
++# define softirq_count()	(0UL)
+ extern int in_serving_softirq(void);
+ #endif
+ 
diff --git a/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch b/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch
new file mode 100644
index 0000000..27f2ef3
--- /dev/null
+++ b/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch
@@ -0,0 +1,59 @@
+Subject: hotplug: Call cpu_unplug_begin() before DOWN_PREPARE
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Sun, 16 Oct 2011 18:56:44 +0800
+
+cpu_unplug_begin() should be called before CPU_DOWN_PREPARE, because
+at CPU_DOWN_PREPARE cpu_active is cleared and sched_domain is
+rebuilt. Otherwise the 'sync_unplug' thread will be running on the cpu
+on which it's created and not bound on the cpu which is about to go
+down.
+
+I found that by an incorrect warning on smp_processor_id() called by
+sync_unplug/1, and trace shows below:
+(echo 1 > /sys/device/system/cpu/cpu1/online)
+  bash-1664  [000]    83.136620: _cpu_down: Bind sync_unplug to cpu 1
+  bash-1664  [000]    83.136623: sched_wait_task: comm=sync_unplug/1 pid=1724 prio=120
+  bash-1664  [000]    83.136624: _cpu_down: Wake sync_unplug
+  bash-1664  [000]    83.136629: sched_wakeup: comm=sync_unplug/1 pid=1724 prio=120 success=1 target_cpu=000
+
+Wants to be folded back....
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/1318762607-2261-3-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c |   16 +++++++---------
+ 1 file changed, 7 insertions(+), 9 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -387,22 +387,20 @@ static int __ref _cpu_down(unsigned int
+ 		return -EBUSY;
+ 	}
+ 
+-	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
++	cpu_hotplug_begin();
++	err = cpu_unplug_begin(cpu);
+ 	if (err) {
+-		nr_calls--;
+-		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+-		printk("%s: attempt to take down CPU %u failed\n",
+-				__func__, cpu);
++		printk("cpu_unplug_begin(%d) failed\n", cpu);
+ 		goto out_cancel;
+ 	}
+ 
+-	cpu_hotplug_begin();
+-	err = cpu_unplug_begin(cpu);
++	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ 	if (err) {
+ 		nr_calls--;
+ 		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+-		printk("cpu_unplug_begin(%d) failed\n", cpu);
+-		goto out_cancel;
++		printk("%s: attempt to take down CPU %u failed\n",
++				__func__, cpu);
++		goto out_release;
+ 	}
+ 	smpboot_park_threads(cpu);
+ 
diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch
new file mode 100644
index 0000000..5b05139
--- /dev/null
+++ b/patches/hotplug-light-get-online-cpus.patch
@@ -0,0 +1,208 @@
+Subject: hotplug: Lightweight get online cpus
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 15 Jun 2011 12:36:06 +0200
+
+get_online_cpus() is a heavy weight function which involves a global
+mutex. migrate_disable() wants a simpler construct which prevents only
+a CPU from going doing while a task is in a migrate disabled section.
+
+Implement a per cpu lockless mechanism, which serializes only in the
+real unplug case on a global mutex. That serialization affects only
+tasks on the cpu which should be brought down.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/cpu.h |    4 +
+ kernel/cpu.c        |  127 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 128 insertions(+), 3 deletions(-)
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys;
+ 
+ extern void get_online_cpus(void);
+ extern void put_online_cpus(void);
++extern void pin_current_cpu(void);
++extern void unpin_current_cpu(void);
+ #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
+ #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
+ #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
+@@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_un
+ 
+ #define get_online_cpus()	do { } while (0)
+ #define put_online_cpus()	do { } while (0)
++static inline void pin_current_cpu(void) { }
++static inline void unpin_current_cpu(void) { }
+ #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+ /* These aren't inline functions due to a GCC bug. */
+ #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -63,6 +63,102 @@ static struct {
+ 	.refcount = 0,
+ };
+ 
++struct hotplug_pcp {
++	struct task_struct *unplug;
++	int refcount;
++	struct completion synced;
++};
++
++static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
++
++/**
++ * pin_current_cpu - Prevent the current cpu from being unplugged
++ *
++ * Lightweight version of get_online_cpus() to prevent cpu from being
++ * unplugged when code runs in a migration disabled region.
++ *
++ * Must be called with preemption disabled (preempt_count = 1)!
++ */
++void pin_current_cpu(void)
++{
++	struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
++
++retry:
++	if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
++	    hp->unplug == current || (current->flags & PF_STOMPER)) {
++		hp->refcount++;
++		return;
++	}
++	preempt_enable();
++	mutex_lock(&cpu_hotplug.lock);
++	mutex_unlock(&cpu_hotplug.lock);
++	preempt_disable();
++	goto retry;
++}
++
++/**
++ * unpin_current_cpu - Allow unplug of current cpu
++ *
++ * Must be called with preemption or interrupts disabled!
++ */
++void unpin_current_cpu(void)
++{
++	struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
++
++	WARN_ON(hp->refcount <= 0);
++
++	/* This is safe. sync_unplug_thread is pinned to this cpu */
++	if (!--hp->refcount && hp->unplug && hp->unplug != current &&
++	    !(current->flags & PF_STOMPER))
++		wake_up_process(hp->unplug);
++}
++
++/*
++ * FIXME: Is this really correct under all circumstances ?
++ */
++static int sync_unplug_thread(void *data)
++{
++	struct hotplug_pcp *hp = data;
++
++	preempt_disable();
++	hp->unplug = current;
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	while (hp->refcount) {
++		schedule_preempt_disabled();
++		set_current_state(TASK_UNINTERRUPTIBLE);
++	}
++	set_current_state(TASK_RUNNING);
++	preempt_enable();
++	complete(&hp->synced);
++	return 0;
++}
++
++/*
++ * Start the sync_unplug_thread on the target cpu and wait for it to
++ * complete.
++ */
++static int cpu_unplug_begin(unsigned int cpu)
++{
++	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++	struct task_struct *tsk;
++
++	init_completion(&hp->synced);
++	tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
++	if (IS_ERR(tsk))
++		return (PTR_ERR(tsk));
++	kthread_bind(tsk, cpu);
++	wake_up_process(tsk);
++	wait_for_completion(&hp->synced);
++	return 0;
++}
++
++static void cpu_unplug_done(unsigned int cpu)
++{
++	struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++
++	hp->unplug = NULL;
++}
++
+ void get_online_cpus(void)
+ {
+ 	might_sleep();
+@@ -260,13 +356,14 @@ static int __ref take_cpu_down(void *_pa
+ /* Requires cpu_add_remove_lock to be held */
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+-	int err, nr_calls = 0;
++	int mycpu, err, nr_calls = 0;
+ 	void *hcpu = (void *)(long)cpu;
+ 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ 	struct take_cpu_down_param tcd_param = {
+ 		.mod = mod,
+ 		.hcpu = hcpu,
+ 	};
++	cpumask_var_t cpumask;
+ 
+ 	if (num_online_cpus() == 1)
+ 		return -EBUSY;
+@@ -274,7 +371,20 @@ static int __ref _cpu_down(unsigned int
+ 	if (!cpu_online(cpu))
+ 		return -EINVAL;
+ 
+-	cpu_hotplug_begin();
++	/* Move the downtaker off the unplug cpu */
++	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
++		return -ENOMEM;
++	cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
++	set_cpus_allowed_ptr(current, cpumask);
++	free_cpumask_var(cpumask);
++	preempt_disable();
++	mycpu = smp_processor_id();
++	if (mycpu == cpu) {
++		printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
++		preempt_enable();
++		return -EBUSY;
++	}
++	preempt_enable();
+ 
+ 	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ 	if (err) {
+@@ -282,7 +392,16 @@ static int __ref _cpu_down(unsigned int
+ 		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+ 		printk("%s: attempt to take down CPU %u failed\n",
+ 				__func__, cpu);
+-		goto out_release;
++		goto out_cancel;
++	}
++
++	cpu_hotplug_begin();
++	err = cpu_unplug_begin(cpu);
++	if (err) {
++		nr_calls--;
++		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
++		printk("cpu_unplug_begin(%d) failed\n", cpu);
++		goto out_cancel;
+ 	}
+ 	smpboot_park_threads(cpu);
+ 
+@@ -314,6 +433,8 @@ static int __ref _cpu_down(unsigned int
+ 	check_for_tasks(cpu);
+ 
+ out_release:
++	cpu_unplug_done(cpu);
++out_cancel:
+ 	cpu_hotplug_done();
+ 	if (!err)
+ 		cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
diff --git a/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch b/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
new file mode 100644
index 0000000..c224c7c
--- /dev/null
+++ b/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
@@ -0,0 +1,24 @@
+Subject: hotplug: sync_unplug: No "\n" in task name
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Sun, 16 Oct 2011 18:56:43 +0800
+
+Otherwise the output will look a little odd.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -143,7 +143,7 @@ static int cpu_unplug_begin(unsigned int
+ 	struct task_struct *tsk;
+ 
+ 	init_completion(&hp->synced);
+-	tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
++	tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
+ 	if (IS_ERR(tsk))
+ 		return (PTR_ERR(tsk));
+ 	kthread_bind(tsk, cpu);
diff --git a/patches/hotplug-use-migrate-disable.patch b/patches/hotplug-use-migrate-disable.patch
new file mode 100644
index 0000000..876d123
--- /dev/null
+++ b/patches/hotplug-use-migrate-disable.patch
@@ -0,0 +1,36 @@
+Subject: hotplug-use-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 19:35:29 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -379,14 +379,13 @@ static int __ref _cpu_down(unsigned int
+ 	cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
+ 	set_cpus_allowed_ptr(current, cpumask);
+ 	free_cpumask_var(cpumask);
+-	preempt_disable();
++	migrate_disable();
+ 	mycpu = smp_processor_id();
+ 	if (mycpu == cpu) {
+ 		printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
+-		preempt_enable();
++		migrate_enable();
+ 		return -EBUSY;
+ 	}
+-	preempt_enable();
+ 
+ 	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ 	if (err) {
+@@ -437,6 +436,7 @@ static int __ref _cpu_down(unsigned int
+ out_release:
+ 	cpu_unplug_done(cpu);
+ out_cancel:
++	migrate_enable();
+ 	cpu_hotplug_done();
+ 	if (!err)
+ 		cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
diff --git a/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch b/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
new file mode 100644
index 0000000..930553a
--- /dev/null
+++ b/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
@@ -0,0 +1,461 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:31 -0500
+Subject: hrtimer: fixup hrtimer callback changes for preempt-rt
+
+In preempt-rt we can not call the callbacks which take sleeping locks
+from the timer interrupt context.
+
+Bring back the softirq split for now, until we fixed the signal
+delivery problem for real.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ include/linux/hrtimer.h  |    3 
+ kernel/hrtimer.c         |  220 ++++++++++++++++++++++++++++++++++++++++-------
+ kernel/sched/core.c      |    1 
+ kernel/sched/rt.c        |    1 
+ kernel/time/tick-sched.c |    1 
+ kernel/watchdog.c        |    1 
+ 6 files changed, 198 insertions(+), 29 deletions(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -111,6 +111,8 @@ struct hrtimer {
+ 	enum hrtimer_restart		(*function)(struct hrtimer *);
+ 	struct hrtimer_clock_base	*base;
+ 	unsigned long			state;
++	struct list_head		cb_entry;
++	int				irqsafe;
+ #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ 	ktime_t				praecox;
+ #endif
+@@ -150,6 +152,7 @@ struct hrtimer_clock_base {
+ 	int			index;
+ 	clockid_t		clockid;
+ 	struct timerqueue_head	active;
++	struct list_head	expired;
+ 	ktime_t			resolution;
+ 	ktime_t			(*get_time)(void);
+ 	ktime_t			softirq_time;
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -589,8 +589,7 @@ static int hrtimer_reprogram(struct hrti
+ 	 * When the callback is running, we do not reprogram the clock event
+ 	 * device. The timer callback is either running on a different CPU or
+ 	 * the callback is executed in the hrtimer_interrupt context. The
+-	 * reprogramming is handled either by the softirq, which called the
+-	 * callback or at the end of the hrtimer_interrupt.
++	 * reprogramming is handled at the end of the hrtimer_interrupt.
+ 	 */
+ 	if (hrtimer_callback_running(timer))
+ 		return 0;
+@@ -625,6 +624,9 @@ static int hrtimer_reprogram(struct hrti
+ 	return res;
+ }
+ 
++static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
++static int hrtimer_rt_defer(struct hrtimer *timer);
++
+ /*
+  * Initialize the high resolution related parts of cpu_base
+  */
+@@ -641,9 +643,18 @@ static inline void hrtimer_init_hres(str
+  * and expiry check is done in the hrtimer_interrupt or in the softirq.
+  */
+ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+-					    struct hrtimer_clock_base *base)
++					    struct hrtimer_clock_base *base,
++					    int wakeup)
+ {
+-	return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
++	if (!(base->cpu_base->hres_active && hrtimer_reprogram(timer, base)))
++		return 0;
++	if (!wakeup)
++		return -ETIME;
++#ifdef CONFIG_PREEMPT_RT_BASE
++	if (!hrtimer_rt_defer(timer))
++		return -ETIME;
++#endif
++	return 1;
+ }
+ 
+ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+@@ -724,12 +735,18 @@ static inline int hrtimer_switch_to_hres
+ static inline void
+ hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
+ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+-					    struct hrtimer_clock_base *base)
++					    struct hrtimer_clock_base *base,
++					    int wakeup)
+ {
+ 	return 0;
+ }
+ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+ static inline void retrigger_next_event(void *arg) { }
++static inline int hrtimer_reprogram(struct hrtimer *timer,
++				    struct hrtimer_clock_base *base)
++{
++	return 0;
++}
+ 
+ #endif /* CONFIG_HIGH_RES_TIMERS */
+ 
+@@ -861,9 +878,9 @@ void hrtimer_wait_for_timer(const struct
+ {
+ 	struct hrtimer_clock_base *base = timer->base;
+ 
+-	if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
++	if (base && base->cpu_base && !timer->irqsafe)
+ 		wait_event(base->cpu_base->wait,
+-				!(timer->state & HRTIMER_STATE_CALLBACK));
++			   !(timer->state & HRTIMER_STATE_CALLBACK));
+ }
+ 
+ #else
+@@ -913,6 +930,11 @@ static void __remove_hrtimer(struct hrti
+ 	if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+ 		goto out;
+ 
++	if (unlikely(!list_empty(&timer->cb_entry))) {
++		list_del_init(&timer->cb_entry);
++		goto out;
++	}
++
+ 	next_timer = timerqueue_getnext(&base->active);
+ 	timerqueue_del(&base->active, &timer->node);
+ 	if (&timer->node == next_timer) {
+@@ -1020,9 +1042,19 @@ int __hrtimer_start_range_ns(struct hrti
+ 	 *
+ 	 * XXX send_remote_softirq() ?
+ 	 */
+-	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
+-		&& hrtimer_enqueue_reprogram(timer, new_base)) {
+-		if (wakeup) {
++	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) {
++		ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
++		if (ret < 0) {
++			/*
++			 * In case we failed to reprogram the timer (mostly
++			 * because out current timer is already elapsed),
++			 * remove it again and report a failure. This avoids
++			 * stale base->first entries.
++			 */
++			debug_deactivate(timer);
++			__remove_hrtimer(timer, new_base,
++				timer->state & HRTIMER_STATE_CALLBACK, 0);
++		} else if (ret > 0) {
+ 			/*
+ 			 * We need to drop cpu_base->lock to avoid a
+ 			 * lock ordering issue vs. rq->lock.
+@@ -1030,9 +1062,7 @@ int __hrtimer_start_range_ns(struct hrti
+ 			raw_spin_unlock(&new_base->cpu_base->lock);
+ 			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ 			local_irq_restore(flags);
+-			return ret;
+-		} else {
+-			__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++			return 0;
+ 		}
+ 	}
+ 
+@@ -1199,6 +1229,7 @@ static void __hrtimer_init(struct hrtime
+ 
+ 	base = hrtimer_clockid_to_base(clock_id);
+ 	timer->base = &cpu_base->clock_base[base];
++	INIT_LIST_HEAD(&timer->cb_entry);
+ 	timerqueue_init(&timer->node);
+ 
+ #ifdef CONFIG_TIMER_STATS
+@@ -1282,10 +1313,128 @@ static void __run_hrtimer(struct hrtimer
+ 	timer->state &= ~HRTIMER_STATE_CALLBACK;
+ }
+ 
+-#ifdef CONFIG_HIGH_RES_TIMERS
+-
+ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
+ 
++#ifdef CONFIG_PREEMPT_RT_BASE
++static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
++				 struct hrtimer_clock_base *base)
++{
++	/*
++	 * Note, we clear the callback flag before we requeue the
++	 * timer otherwise we trigger the callback_running() check
++	 * in hrtimer_reprogram().
++	 */
++	timer->state &= ~HRTIMER_STATE_CALLBACK;
++
++	if (restart != HRTIMER_NORESTART) {
++		BUG_ON(hrtimer_active(timer));
++		/*
++		 * Enqueue the timer, if it's the leftmost timer then
++		 * we need to reprogram it.
++		 */
++		if (!enqueue_hrtimer(timer, base))
++			return;
++
++#ifndef CONFIG_HIGH_RES_TIMERS
++	}
++#else
++		if (base->cpu_base->hres_active &&
++		    hrtimer_reprogram(timer, base))
++			goto requeue;
++
++	} else if (hrtimer_active(timer)) {
++		/*
++		 * If the timer was rearmed on another CPU, reprogram
++		 * the event device.
++		 */
++		if (&timer->node == base->active.next &&
++		    base->cpu_base->hres_active &&
++		    hrtimer_reprogram(timer, base))
++			goto requeue;
++	}
++	return;
++
++requeue:
++	/*
++	 * Timer is expired. Thus move it from tree to pending list
++	 * again.
++	 */
++	__remove_hrtimer(timer, base, timer->state, 0);
++	list_add_tail(&timer->cb_entry, &base->expired);
++#endif
++}
++
++/*
++ * The changes in mainline which removed the callback modes from
++ * hrtimer are not yet working with -rt. The non wakeup_process()
++ * based callbacks which involve sleeping locks need to be treated
++ * seperately.
++ */
++static void hrtimer_rt_run_pending(void)
++{
++	enum hrtimer_restart (*fn)(struct hrtimer *);
++	struct hrtimer_cpu_base *cpu_base;
++	struct hrtimer_clock_base *base;
++	struct hrtimer *timer;
++	int index, restart;
++
++	local_irq_disable();
++	cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
++
++	raw_spin_lock(&cpu_base->lock);
++
++	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
++		base = &cpu_base->clock_base[index];
++
++		while (!list_empty(&base->expired)) {
++			timer = list_first_entry(&base->expired,
++						 struct hrtimer, cb_entry);
++
++			/*
++			 * Same as the above __run_hrtimer function
++			 * just we run with interrupts enabled.
++			 */
++			debug_hrtimer_deactivate(timer);
++			__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
++			timer_stats_account_hrtimer(timer);
++			fn = timer->function;
++
++			raw_spin_unlock_irq(&cpu_base->lock);
++			restart = fn(timer);
++			raw_spin_lock_irq(&cpu_base->lock);
++
++			hrtimer_rt_reprogram(restart, timer, base);
++		}
++	}
++
++	raw_spin_unlock_irq(&cpu_base->lock);
++
++	wake_up_timer_waiters(cpu_base);
++}
++
++static int hrtimer_rt_defer(struct hrtimer *timer)
++{
++	if (timer->irqsafe)
++		return 0;
++
++	__remove_hrtimer(timer, timer->base, timer->state, 0);
++	list_add_tail(&timer->cb_entry, &timer->base->expired);
++	return 1;
++}
++
++#else
++
++static inline void hrtimer_rt_run_pending(void)
++{
++	hrtimer_peek_ahead_timers();
++}
++
++static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
++
++#endif
++
++#ifdef CONFIG_HIGH_RES_TIMERS
++
+ /*
+  * High resolution timer interrupt
+  * Called with interrupts disabled
+@@ -1294,7 +1443,7 @@ void hrtimer_interrupt(struct clock_even
+ {
+ 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ 	ktime_t expires_next, now, entry_time, delta;
+-	int i, retries = 0;
++	int i, retries = 0, raise = 0;
+ 
+ 	BUG_ON(!cpu_base->hres_active);
+ 	cpu_base->nr_events++;
+@@ -1361,7 +1510,10 @@ retry:
+ 				break;
+ 			}
+ 
+-			__run_hrtimer(timer, &basenow);
++			if (!hrtimer_rt_defer(timer))
++				__run_hrtimer(timer, &basenow);
++			else
++				raise = 1;
+ 		}
+ 	}
+ 
+@@ -1376,6 +1528,10 @@ retry:
+ 	if (expires_next.tv64 == KTIME_MAX ||
+ 	    !tick_program_event(expires_next, 0)) {
+ 		cpu_base->hang_detected = 0;
++
++		if (raise)
++			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++
+ 		return;
+ 	}
+ 
+@@ -1456,24 +1612,26 @@ void hrtimer_peek_ahead_timers(void)
+ 	local_irq_restore(flags);
+ }
+ 
++#else /* CONFIG_HIGH_RES_TIMERS */
++
++static inline void __hrtimer_peek_ahead_timers(void) { }
++
++#endif	/* !CONFIG_HIGH_RES_TIMERS */
++
+ static void run_hrtimer_softirq(struct softirq_action *h)
+ {
++#ifdef CONFIG_HIGH_RES_TIMERS
+ 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ 
+ 	if (cpu_base->clock_was_set) {
+ 		cpu_base->clock_was_set = 0;
+ 		clock_was_set();
+ 	}
++#endif
+ 
+-	hrtimer_peek_ahead_timers();
++	hrtimer_rt_run_pending();
+ }
+ 
+-#else /* CONFIG_HIGH_RES_TIMERS */
+-
+-static inline void __hrtimer_peek_ahead_timers(void) { }
+-
+-#endif	/* !CONFIG_HIGH_RES_TIMERS */
+-
+ /*
+  * Called from timer softirq every jiffy, expire hrtimers:
+  *
+@@ -1506,7 +1664,7 @@ void hrtimer_run_queues(void)
+ 	struct timerqueue_node *node;
+ 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ 	struct hrtimer_clock_base *base;
+-	int index, gettime = 1;
++	int index, gettime = 1, raise = 0;
+ 
+ 	if (hrtimer_hres_active())
+ 		return;
+@@ -1531,12 +1689,16 @@ void hrtimer_run_queues(void)
+ 					hrtimer_get_expires_tv64(timer))
+ 				break;
+ 
+-			__run_hrtimer(timer, &base->softirq_time);
++			if (!hrtimer_rt_defer(timer))
++				__run_hrtimer(timer, &base->softirq_time);
++			else
++				raise = 1;
+ 		}
+ 		raw_spin_unlock(&cpu_base->lock);
+ 	}
+ 
+-	wake_up_timer_waiters(cpu_base);
++	if (raise)
++		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+ 
+ /*
+@@ -1558,6 +1720,7 @@ static enum hrtimer_restart hrtimer_wake
+ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+ {
+ 	sl->timer.function = hrtimer_wakeup;
++	sl->timer.irqsafe = 1;
+ 	sl->task = task;
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+@@ -1696,6 +1859,7 @@ static void __cpuinit init_hrtimers_cpu(
+ 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ 		cpu_base->clock_base[i].cpu_base = cpu_base;
+ 		timerqueue_init_head(&cpu_base->clock_base[i].active);
++		INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
+ 	}
+ 
+ 	hrtimer_init_hres(cpu_base);
+@@ -1814,9 +1978,7 @@ void __init hrtimers_init(void)
+ 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
+ 			  (void *)(long)smp_processor_id());
+ 	register_cpu_notifier(&hrtimers_nb);
+-#ifdef CONFIG_HIGH_RES_TIMERS
+ 	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+-#endif
+ }
+ 
+ /**
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -489,6 +489,7 @@ static void init_rq_hrtick(struct rq *rq
+ 
+ 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ 	rq->hrtick_timer.function = hrtick;
++	rq->hrtick_timer.irqsafe = 1;
+ }
+ #else	/* CONFIG_SCHED_HRTICK */
+ static inline void hrtick_clear(struct rq *rq)
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -41,6 +41,7 @@ void init_rt_bandwidth(struct rt_bandwid
+ 
+ 	hrtimer_init(&rt_b->rt_period_timer,
+ 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	rt_b->rt_period_timer.irqsafe = 1;
+ 	rt_b->rt_period_timer.function = sched_rt_period_timer;
+ }
+ 
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -867,6 +867,7 @@ void tick_setup_sched_timer(void)
+ 	 * Emulate tick processing via per-CPU hrtimers:
+ 	 */
+ 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++	ts->sched_timer.irqsafe = 1;
+ 	ts->sched_timer.function = tick_sched_timer;
+ 
+ 	/* Get the next period (per cpu) */
+--- a/kernel/watchdog.c
++++ b/kernel/watchdog.c
+@@ -358,6 +358,7 @@ static void watchdog_enable(unsigned int
+ 	/* kick off the timer for the hardlockup detector */
+ 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ 	hrtimer->function = watchdog_timer_fn;
++	hrtimer->irqsafe = 1;
+ 
+ 	if (!watchdog_enabled) {
+ 		kthread_park(current);
diff --git a/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch b/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
new file mode 100644
index 0000000..5c8bce5
--- /dev/null
+++ b/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
@@ -0,0 +1,37 @@
+Subject: hrtimer: Raise softirq if hrtimer irq stalled
+From: Watanabe <shunsuke.watanabe@tel.com>
+Date: Sun, 28 Oct 2012 11:13:44 +0100
+
+When the hrtimer stall detection hits the softirq is not raised.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/hrtimer.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -1528,11 +1528,7 @@ retry:
+ 	if (expires_next.tv64 == KTIME_MAX ||
+ 	    !tick_program_event(expires_next, 0)) {
+ 		cpu_base->hang_detected = 0;
+-
+-		if (raise)
+-			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+-
+-		return;
++		goto out;
+ 	}
+ 
+ 	/*
+@@ -1576,6 +1572,9 @@ retry:
+ 	tick_program_event(expires_next, 1);
+ 	printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+ 		    ktime_to_ns(delta));
++out:
++	if (raise)
++		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+ 
+ /*
diff --git a/patches/hrtimers-prepare-full-preemption.patch b/patches/hrtimers-prepare-full-preemption.patch
new file mode 100644
index 0000000..be6ba1d
--- /dev/null
+++ b/patches/hrtimers-prepare-full-preemption.patch
@@ -0,0 +1,195 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:34 -0500
+Subject: hrtimers: prepare full preemption
+
+Make cancellation of a running callback in softirq context safe
+against preemption.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/hrtimer.h |   10 ++++++++++
+ kernel/hrtimer.c        |   33 ++++++++++++++++++++++++++++++++-
+ kernel/itimer.c         |    1 +
+ kernel/posix-timers.c   |   33 +++++++++++++++++++++++++++++++++
+ 4 files changed, 76 insertions(+), 1 deletion(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -192,6 +192,9 @@ struct hrtimer_cpu_base {
+ 	unsigned long			nr_hangs;
+ 	ktime_t				max_hang_time;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_BASE
++	wait_queue_head_t		wait;
++#endif
+ 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
+ };
+ 
+@@ -385,6 +388,13 @@ static inline int hrtimer_restart(struct
+ 	return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ }
+ 
++/* Softirq preemption could deadlock timer removal */
++#ifdef CONFIG_PREEMPT_RT_BASE
++  extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
++#else
++# define hrtimer_wait_for_timer(timer)	do { cpu_relax(); } while (0)
++#endif
++
+ /* Query timers: */
+ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -844,6 +844,32 @@ u64 hrtimer_forward(struct hrtimer *time
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_forward);
+ 
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define wake_up_timer_waiters(b)	wake_up(&(b)->wait)
++
++/**
++ * hrtimer_wait_for_timer - Wait for a running timer
++ *
++ * @timer:	timer to wait for
++ *
++ * The function waits in case the timers callback function is
++ * currently executed on the waitqueue of the timer base. The
++ * waitqueue is woken up after the timer callback function has
++ * finished execution.
++ */
++void hrtimer_wait_for_timer(const struct hrtimer *timer)
++{
++	struct hrtimer_clock_base *base = timer->base;
++
++	if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
++		wait_event(base->cpu_base->wait,
++				!(timer->state & HRTIMER_STATE_CALLBACK));
++}
++
++#else
++# define wake_up_timer_waiters(b)	do { } while (0)
++#endif
++
+ /*
+  * enqueue_hrtimer - internal function to (re)start a timer
+  *
+@@ -1094,7 +1120,7 @@ int hrtimer_cancel(struct hrtimer *timer
+ 
+ 		if (ret >= 0)
+ 			return ret;
+-		cpu_relax();
++		hrtimer_wait_for_timer(timer);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_cancel);
+@@ -1509,6 +1535,8 @@ void hrtimer_run_queues(void)
+ 		}
+ 		raw_spin_unlock(&cpu_base->lock);
+ 	}
++
++	wake_up_timer_waiters(cpu_base);
+ }
+ 
+ /*
+@@ -1671,6 +1699,9 @@ static void __cpuinit init_hrtimers_cpu(
+ 	}
+ 
+ 	hrtimer_init_hres(cpu_base);
++#ifdef CONFIG_PREEMPT_RT_BASE
++	init_waitqueue_head(&cpu_base->wait);
++#endif
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+--- a/kernel/itimer.c
++++ b/kernel/itimer.c
+@@ -213,6 +213,7 @@ again:
+ 		/* We are sharing ->siglock with it_real_fn() */
+ 		if (hrtimer_try_to_cancel(timer) < 0) {
+ 			spin_unlock_irq(&tsk->sighand->siglock);
++			hrtimer_wait_for_timer(&tsk->signal->real_timer);
+ 			goto again;
+ 		}
+ 		expires = timeval_to_ktime(value->it_value);
+--- a/kernel/posix-timers.c
++++ b/kernel/posix-timers.c
+@@ -773,6 +773,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
+ 	return overrun;
+ }
+ 
++/*
++ * Protected by RCU!
++ */
++static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
++{
++#ifdef CONFIG_PREEMPT_RT_FULL
++	if (kc->timer_set == common_timer_set)
++		hrtimer_wait_for_timer(&timr->it.real.timer);
++	else
++		/* FIXME: Whacky hack for posix-cpu-timers */
++		schedule_timeout(1);
++#endif
++}
++
+ /* Set a POSIX.1b interval timer. */
+ /* timr->it_lock is taken. */
+ static int
+@@ -850,6 +864,7 @@ retry:
+ 	if (!timr)
+ 		return -EINVAL;
+ 
++	rcu_read_lock();
+ 	kc = clockid_to_kclock(timr->it_clock);
+ 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
+ 		error = -EINVAL;
+@@ -858,9 +873,12 @@ retry:
+ 
+ 	unlock_timer(timr, flag);
+ 	if (error == TIMER_RETRY) {
++		timer_wait_for_callback(kc, timr);
+ 		rtn = NULL;	// We already got the old time...
++		rcu_read_unlock();
+ 		goto retry;
+ 	}
++	rcu_read_unlock();
+ 
+ 	if (old_setting && !error &&
+ 	    copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+@@ -898,10 +916,15 @@ retry_delete:
+ 	if (!timer)
+ 		return -EINVAL;
+ 
++	rcu_read_lock();
+ 	if (timer_delete_hook(timer) == TIMER_RETRY) {
+ 		unlock_timer(timer, flags);
++		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
++					timer);
++		rcu_read_unlock();
+ 		goto retry_delete;
+ 	}
++	rcu_read_unlock();
+ 
+ 	spin_lock(&current->sighand->siglock);
+ 	list_del(&timer->list);
+@@ -927,8 +950,18 @@ static void itimer_delete(struct k_itime
+ retry_delete:
+ 	spin_lock_irqsave(&timer->it_lock, flags);
+ 
++	/* On RT we can race with a deletion */
++	if (!timer->it_signal) {
++		unlock_timer(timer, flags);
++		return;
++	}
++
+ 	if (timer_delete_hook(timer) == TIMER_RETRY) {
++		rcu_read_lock();
+ 		unlock_timer(timer, flags);
++		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
++					timer);
++		rcu_read_unlock();
+ 		goto retry_delete;
+ 	}
+ 	list_del(&timer->list);
diff --git a/patches/hwlatdetect.patch b/patches/hwlatdetect.patch
new file mode 100644
index 0000000..08045b3
--- /dev/null
+++ b/patches/hwlatdetect.patch
@@ -0,0 +1,1344 @@
+Subject: hwlatdetect.patch
+From: Carsten Emde <C.Emde@osadl.org>
+Date: Tue, 19 Jul 2011 13:53:12 +0100
+
+Jon Masters developed this wonderful SMI detector. For details please
+consult Documentation/hwlat_detector.txt. It could be ported to Linux
+3.0 RT without any major change.
+
+Signed-off-by: Carsten Emde <C.Emde@osadl.org>
+
+---
+ Documentation/hwlat_detector.txt |   64 ++
+ drivers/misc/Kconfig             |   29 
+ drivers/misc/Makefile            |    1 
+ drivers/misc/hwlat_detector.c    | 1212 +++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 1306 insertions(+)
+
+--- /dev/null
++++ b/Documentation/hwlat_detector.txt
+@@ -0,0 +1,64 @@
++Introduction:
++-------------
++
++The module hwlat_detector is a special purpose kernel module that is used to
++detect large system latencies induced by the behavior of certain underlying
++hardware or firmware, independent of Linux itself. The code was developed
++originally to detect SMIs (System Management Interrupts) on x86 systems,
++however there is nothing x86 specific about this patchset. It was
++originally written for use by the "RT" patch since the Real Time
++kernel is highly latency sensitive.
++
++SMIs are usually not serviced by the Linux kernel, which typically does not
++even know that they are occuring. SMIs are instead are set up by BIOS code
++and are serviced by BIOS code, usually for "critical" events such as
++management of thermal sensors and fans. Sometimes though, SMIs are used for
++other tasks and those tasks can spend an inordinate amount of time in the
++handler (sometimes measured in milliseconds). Obviously this is a problem if
++you are trying to keep event service latencies down in the microsecond range.
++
++The hardware latency detector works by hogging all of the cpus for configurable
++amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
++for some period, then looking for gaps in the TSC data. Any gap indicates a
++time when the polling was interrupted and since the machine is stopped and
++interrupts turned off the only thing that could do that would be an SMI.
++
++Note that the SMI detector should *NEVER* be used in a production environment.
++It is intended to be run manually to determine if the hardware platform has a
++problem with long system firmware service routines.
++
++Usage:
++------
++
++Loading the module hwlat_detector passing the parameter "enabled=1" (or by
++setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
++step required to start the hwlat_detector. It is possible to redefine the
++threshold in microseconds (us) above which latency spikes will be taken
++into account (parameter "threshold=").
++
++Example:
++
++	# modprobe hwlat_detector enabled=1 threshold=100
++
++After the module is loaded, it creates a directory named "hwlat_detector" under
++the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
++to have debugfs mounted, which might be on /sys/debug on your system.
++
++The /debug/hwlat_detector interface contains the following files:
++
++count			- number of latency spikes observed since last reset
++enable			- a global enable/disable toggle (0/1), resets count
++max			- maximum hardware latency actually observed (usecs)
++sample			- a pipe from which to read current raw sample data
++			  in the format <timestamp> <latency observed usecs>
++			  (can be opened O_NONBLOCK for a single sample)
++threshold		- minimum latency value to be considered (usecs)
++width			- time period to sample with CPUs held (usecs)
++			  must be less than the total window size (enforced)
++window			- total period of sampling, width being inside (usecs)
++
++By default we will set width to 500,000 and window to 1,000,000, meaning that
++we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
++observe any latencies that exceed the threshold (initially 100 usecs),
++then we write to a global sample ring buffer of 8K samples, which is
++consumed by reading from the "sample" (pipe) debugfs file interface.
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -121,6 +121,35 @@ config IBM_ASM
+ 	  for information on the specific driver level and support statement
+ 	  for your IBM server.
+ 
++config HWLAT_DETECTOR
++	tristate "Testing module to detect hardware-induced latencies"
++	depends on DEBUG_FS
++	depends on RING_BUFFER
++	default m
++	---help---
++	  A simple hardware latency detector. Use this module to detect
++	  large latencies introduced by the behavior of the underlying
++	  system firmware external to Linux. We do this using periodic
++	  use of stop_machine to grab all available CPUs and measure
++	  for unexplainable gaps in the CPU timestamp counter(s). By
++	  default, the module is not enabled until the "enable" file
++	  within the "hwlat_detector" debugfs directory is toggled.
++
++	  This module is often used to detect SMI (System Management
++	  Interrupts) on x86 systems, though is not x86 specific. To
++	  this end, we default to using a sample window of 1 second,
++	  during which we will sample for 0.5 seconds. If an SMI or
++	  similar event occurs during that time, it is recorded
++	  into an 8K samples global ring buffer until retreived.
++
++	  WARNING: This software should never be enabled (it can be built
++	  but should not be turned on after it is loaded) in a production
++	  environment where high latencies are a concern since the
++	  sampling mechanism actually introduces latencies for
++	  regular tasks while the CPU(s) are being held.
++
++	  If unsure, say N
++
+ config PHANTOM
+ 	tristate "Sensable PHANToM (PCI)"
+ 	depends on PCI
+--- a/drivers/misc/Makefile
++++ b/drivers/misc/Makefile
+@@ -49,3 +49,4 @@ obj-y				+= carma/
+ obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
+ obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
+ obj-$(CONFIG_INTEL_MEI)		+= mei/
++obj-$(CONFIG_HWLAT_DETECTOR)	+= hwlat_detector.o
+--- /dev/null
++++ b/drivers/misc/hwlat_detector.c
+@@ -0,0 +1,1212 @@
++/*
++ * hwlat_detector.c - A simple Hardware Latency detector.
++ *
++ * Use this module to detect large system latencies induced by the behavior of
++ * certain underlying system hardware or firmware, independent of Linux itself.
++ * The code was developed originally to detect the presence of SMIs on Intel
++ * and AMD systems, although there is no dependency upon x86 herein.
++ *
++ * The classical example usage of this module is in detecting the presence of
++ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
++ * somewhat special form of hardware interrupt spawned from earlier CPU debug
++ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
++ * LPC (or other device) to generate a special interrupt under certain
++ * circumstances, for example, upon expiration of a special SMI timer device,
++ * due to certain external thermal readings, on certain I/O address accesses,
++ * and other situations. An SMI hits a special CPU pin, triggers a special
++ * SMI mode (complete with special memory map), and the OS is unaware.
++ *
++ * Although certain hardware-inducing latencies are necessary (for example,
++ * a modern system often requires an SMI handler for correct thermal control
++ * and remote management) they can wreak havoc upon any OS-level performance
++ * guarantees toward low-latency, especially when the OS is not even made
++ * aware of the presence of these interrupts. For this reason, we need a
++ * somewhat brute force mechanism to detect these interrupts. In this case,
++ * we do it by hogging all of the CPU(s) for configurable timer intervals,
++ * sampling the built-in CPU timer, looking for discontiguous readings.
++ *
++ * WARNING: This implementation necessarily introduces latencies. Therefore,
++ *          you should NEVER use this module in a production environment
++ *          requiring any kind of low-latency performance guarantee(s).
++ *
++ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
++ *
++ * Includes useful feedback from Clark Williams <clark@redhat.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/ring_buffer.h>
++#include <linux/stop_machine.h>
++#include <linux/time.h>
++#include <linux/hrtimer.h>
++#include <linux/kthread.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/uaccess.h>
++#include <linux/version.h>
++#include <linux/delay.h>
++#include <linux/slab.h>
++
++#define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
++#define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
++#define U64STR_SIZE		22			/* 20 digits max */
++
++#define VERSION			"1.0.0"
++#define BANNER			"hwlat_detector: "
++#define DRVNAME			"hwlat_detector"
++#define DEFAULT_SAMPLE_WINDOW	1000000			/* 1s */
++#define DEFAULT_SAMPLE_WIDTH	500000			/* 0.5s */
++#define DEFAULT_LAT_THRESHOLD	10			/* 10us */
++
++/* Module metadata */
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
++MODULE_DESCRIPTION("A simple hardware latency detector");
++MODULE_VERSION(VERSION);
++
++/* Module parameters */
++
++static int debug;
++static int enabled;
++static int threshold;
++
++module_param(debug, int, 0);			/* enable debug */
++module_param(enabled, int, 0);			/* enable detector */
++module_param(threshold, int, 0);		/* latency threshold */
++
++/* Buffering and sampling */
++
++static struct ring_buffer *ring_buffer;		/* sample buffer */
++static DEFINE_MUTEX(ring_buffer_mutex);		/* lock changes */
++static unsigned long buf_size = BUF_SIZE_DEFAULT;
++static struct task_struct *kthread;		/* sampling thread */
++
++/* DebugFS filesystem entries */
++
++static struct dentry *debug_dir;		/* debugfs directory */
++static struct dentry *debug_max;		/* maximum TSC delta */
++static struct dentry *debug_count;		/* total detect count */
++static struct dentry *debug_sample_width;	/* sample width us */
++static struct dentry *debug_sample_window;	/* sample window us */
++static struct dentry *debug_sample;		/* raw samples us */
++static struct dentry *debug_threshold;		/* threshold us */
++static struct dentry *debug_enable;         	/* enable/disable */
++
++/* Individual samples and global state */
++
++struct sample;					/* latency sample */
++struct data;					/* Global state */
++
++/* Sampling functions */
++static int __buffer_add_sample(struct sample *sample);
++static struct sample *buffer_get_sample(struct sample *sample);
++static int get_sample(void *unused);
++
++/* Threading and state */
++static int kthread_fn(void *unused);
++static int start_kthread(void);
++static int stop_kthread(void);
++static void __reset_stats(void);
++static int init_stats(void);
++
++/* Debugfs interface */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++				size_t cnt, loff_t *ppos, const u64 *entry);
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++				 size_t cnt, loff_t *ppos, u64 *entry);
++static int debug_sample_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++				  size_t cnt, loff_t *ppos);
++static int debug_sample_release(struct inode *inode, struct file *filp);
++static int debug_enable_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++				  size_t cnt, loff_t *ppos);
++static ssize_t debug_enable_fwrite(struct file *file,
++				   const char __user *user_buffer,
++				   size_t user_size, loff_t *offset);
++
++/* Initialization functions */
++static int init_debugfs(void);
++static void free_debugfs(void);
++static int detector_init(void);
++static void detector_exit(void);
++
++/* Individual latency samples are stored here when detected and packed into
++ * the ring_buffer circular buffer, where they are overwritten when
++ * more than buf_size/sizeof(sample) samples are received. */
++struct sample {
++	u64		seqnum;		/* unique sequence */
++	u64		duration;	/* ktime delta */
++	struct timespec	timestamp;	/* wall time */
++	unsigned long   lost;
++};
++
++/* keep the global state somewhere. Mostly used under stop_machine. */
++static struct data {
++
++	struct mutex lock;		/* protect changes */
++
++	u64	count;			/* total since reset */
++	u64	max_sample;		/* max hardware latency */
++	u64	threshold;		/* sample threshold level */
++
++	u64	sample_window;		/* total sampling window (on+off) */
++	u64	sample_width;		/* active sampling portion of window */
++
++	atomic_t sample_open;		/* whether the sample file is open */
++
++	wait_queue_head_t wq;		/* waitqeue for new sample values */
++
++} data;
++
++/**
++ * __buffer_add_sample - add a new latency sample recording to the ring buffer
++ * @sample: The new latency sample value
++ *
++ * This receives a new latency sample and records it in a global ring buffer.
++ * No additional locking is used in this case - suited for stop_machine use.
++ */
++static int __buffer_add_sample(struct sample *sample)
++{
++	return ring_buffer_write(ring_buffer,
++				 sizeof(struct sample), sample);
++}
++
++/**
++ * buffer_get_sample - remove a hardware latency sample from the ring buffer
++ * @sample: Pre-allocated storage for the sample
++ *
++ * This retrieves a hardware latency sample from the global circular buffer
++ */
++static struct sample *buffer_get_sample(struct sample *sample)
++{
++	struct ring_buffer_event *e = NULL;
++	struct sample *s = NULL;
++	unsigned int cpu = 0;
++
++	if (!sample)
++		return NULL;
++
++	mutex_lock(&ring_buffer_mutex);
++	for_each_online_cpu(cpu) {
++		e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
++		if (e)
++			break;
++	}
++
++	if (e) {
++		s = ring_buffer_event_data(e);
++		memcpy(sample, s, sizeof(struct sample));
++	} else
++		sample = NULL;
++	mutex_unlock(&ring_buffer_mutex);
++
++	return sample;
++}
++
++/**
++ * get_sample - sample the CPU TSC and look for likely hardware latencies
++ * @unused: This is not used but is a part of the stop_machine API
++ *
++ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
++ * hardware-induced latency. Called under stop_machine, with data.lock held.
++ */
++static int get_sample(void *unused)
++{
++	ktime_t start, t1, t2;
++	s64 diff, total = 0;
++	u64 sample = 0;
++	int ret = 1;
++
++	start = ktime_get(); /* start timestamp */
++
++	do {
++
++		t1 = ktime_get();	/* we'll look for a discontinuity */
++		t2 = ktime_get();
++
++		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
++		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
++
++		/* This shouldn't happen */
++		if (diff < 0) {
++			printk(KERN_ERR BANNER "time running backwards\n");
++			goto out;
++		}
++
++		if (diff > sample)
++			sample = diff; /* only want highest value */
++
++	} while (total <= data.sample_width);
++
++	/* If we exceed the threshold value, we have found a hardware latency */
++	if (sample > data.threshold) {
++		struct sample s;
++
++		data.count++;
++		s.seqnum = data.count;
++		s.duration = sample;
++		s.timestamp = CURRENT_TIME;
++		__buffer_add_sample(&s);
++
++		/* Keep a running maximum ever recorded hardware latency */
++		if (sample > data.max_sample)
++			data.max_sample = sample;
++	}
++
++	ret = 0;
++out:
++	return ret;
++}
++
++/*
++ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
++ * @unused: A required part of the kthread API.
++ *
++ * Used to periodically sample the CPU TSC via a call to get_sample. We
++ * use stop_machine, whith does (intentionally) introduce latency since we
++ * need to ensure nothing else might be running (and thus pre-empting).
++ * Obviously this should never be used in production environments.
++ *
++ * stop_machine will schedule us typically only on CPU0 which is fine for
++ * almost every real-world hardware latency situation - but we might later
++ * generalize this if we find there are any actualy systems with alternate
++ * SMI delivery or other non CPU0 hardware latencies.
++ */
++static int kthread_fn(void *unused)
++{
++	int err = 0;
++	u64 interval = 0;
++
++	while (!kthread_should_stop()) {
++
++		mutex_lock(&data.lock);
++
++		err = stop_machine(get_sample, unused, 0);
++		if (err) {
++			/* Houston, we have a problem */
++			mutex_unlock(&data.lock);
++			goto err_out;
++		}
++
++		wake_up(&data.wq); /* wake up reader(s) */
++
++		interval = data.sample_window - data.sample_width;
++		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
++
++		mutex_unlock(&data.lock);
++
++		if (msleep_interruptible(interval))
++			goto out;
++	}
++		goto out;
++err_out:
++	printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
++	enabled = 0;
++out:
++	return err;
++
++}
++
++/**
++ * start_kthread - Kick off the hardware latency sampling/detector kthread
++ *
++ * This starts a kernel thread that will sit and sample the CPU timestamp
++ * counter (TSC or similar) and look for potential hardware latencies.
++ */
++static int start_kthread(void)
++{
++	kthread = kthread_run(kthread_fn, NULL,
++					DRVNAME);
++	if (IS_ERR(kthread)) {
++		printk(KERN_ERR BANNER "could not start sampling thread\n");
++		enabled = 0;
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++/**
++ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
++ *
++ * This kicks the running hardware latency sampling/detector kernel thread and
++ * tells it to stop sampling now. Use this on unload and at system shutdown.
++ */
++static int stop_kthread(void)
++{
++	int ret;
++
++	ret = kthread_stop(kthread);
++
++	return ret;
++}
++
++/**
++ * __reset_stats - Reset statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We call this
++ * function in order to reset those when "enable" is toggled on or off, and
++ * also at initialization. Should be called with data.lock held.
++ */
++static void __reset_stats(void)
++{
++	data.count = 0;
++	data.max_sample = 0;
++	ring_buffer_reset(ring_buffer); /* flush out old sample entries */
++}
++
++/**
++ * init_stats - Setup global state statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We also use
++ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
++ * induced system latencies. This function initializes these structures and
++ * allocates the global ring buffer also.
++ */
++static int init_stats(void)
++{
++	int ret = -ENOMEM;
++
++	mutex_init(&data.lock);
++	init_waitqueue_head(&data.wq);
++	atomic_set(&data.sample_open, 0);
++
++	ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
++
++	if (WARN(!ring_buffer, KERN_ERR BANNER
++			       "failed to allocate ring buffer!\n"))
++		goto out;
++
++	__reset_stats();
++	data.threshold = DEFAULT_LAT_THRESHOLD;	    /* threshold us */
++	data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
++	data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */
++
++	ret = 0;
++
++out:
++	return ret;
++
++}
++
++/*
++ * simple_data_read - Wrapper read function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ * @entry: The entry to read from
++ *
++ * This function provides a generic read implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_read directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual read (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++				size_t cnt, loff_t *ppos, const u64 *entry)
++{
++	char buf[U64STR_SIZE];
++	u64 val = 0;
++	int len = 0;
++
++	memset(buf, 0, sizeof(buf));
++
++	if (!entry)
++		return -EFAULT;
++
++	mutex_lock(&data.lock);
++	val = *entry;
++	mutex_unlock(&data.lock);
++
++	len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
++
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
++
++}
++
++/*
++ * simple_data_write - Wrapper write function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to write value from
++ * @cnt: The maximum number of bytes to write
++ * @ppos: The current "file" position
++ * @entry: The entry to write to
++ *
++ * This function provides a generic write implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_write directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual write (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++				 size_t cnt, loff_t *ppos, u64 *entry)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	*entry = val;
++	mutex_unlock(&data.lock);
++
++	return csize;
++}
++
++/**
++ * debug_count_fopen - Open function for "count" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "count" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_count_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_count_fread - Read function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to read the
++ * number of latency readings exceeding the configured threshold since
++ * the detector was last reset (e.g. by writing a zero into "count").
++ */
++static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
++				     size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_count_fwrite - Write function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to write a
++ * desired value, especially to zero the total count.
++ */
++static ssize_t  debug_count_fwrite(struct file *filp,
++				       const char __user *ubuf,
++				       size_t cnt,
++				       loff_t *ppos)
++{
++	return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_enable_fopen - Dummy open function for "enable" debugfs interface
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "enable" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_enable_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_enable_fread - Read function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * whether the detector is currently enabled ("0\n" or "1\n" returned).
++ */
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++				      size_t cnt, loff_t *ppos)
++{
++	char buf[4];
++
++	if ((cnt < sizeof(buf)) || (*ppos))
++		return 0;
++
++	buf[0] = enabled ? '1' : '0';
++	buf[1] = '\n';
++	buf[2] = '\0';
++	if (copy_to_user(ubuf, buf, strlen(buf)))
++		return -EFAULT;
++	return *ppos = strlen(buf);
++}
++
++/**
++ * debug_enable_fwrite - Write function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to enable or
++ * disable the detector, which will have the side-effect of possibly
++ * also resetting the global stats and kicking off the measuring
++ * kthread (on an enable) or the converse (upon a disable).
++ */
++static ssize_t  debug_enable_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	char buf[4];
++	int csize = min(cnt, sizeof(buf));
++	long val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[sizeof(buf)-1] = '\0';			/* just in case */
++	err = strict_strtoul(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	if (val) {
++		if (enabled)
++			goto unlock;
++		enabled = 1;
++		__reset_stats();
++		if (start_kthread())
++			return -EFAULT;
++	} else {
++		if (!enabled)
++			goto unlock;
++		enabled = 0;
++		err = stop_kthread();
++		if (err) {
++			printk(KERN_ERR BANNER "cannot stop kthread\n");
++			return -EFAULT;
++		}
++		wake_up(&data.wq);		/* reader(s) should return */
++	}
++unlock:
++	return csize;
++}
++
++/**
++ * debug_max_fopen - Open function for "max" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "max" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_max_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_max_fread - Read function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * the maximum latency value observed since it was last reset.
++ */
++static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
++				   size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++/**
++ * debug_max_fwrite - Write function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to reset the
++ * maximum or set it to some other desired value - if, then, subsequent
++ * measurements exceed this value, the maximum will be updated.
++ */
++static ssize_t  debug_max_fwrite(struct file *filp,
++				     const char __user *ubuf,
++				     size_t cnt,
++				     loff_t *ppos)
++{
++	return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++
++/**
++ * debug_sample_fopen - An open function for "sample" debugfs interface
++ * @inode: The in-kernel inode representation of this debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function handles opening the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. Can be opened blocking or non-blocking,
++ * affecting whether it behaves as a buffer read pipe, or does not.
++ * Implements simple locking to prevent multiple simultaneous use.
++ */
++static int debug_sample_fopen(struct inode *inode, struct file *filp)
++{
++	if (!atomic_add_unless(&data.sample_open, 1, 1))
++		return -EBUSY;
++	else
++		return 0;
++}
++
++/**
++ * debug_sample_fread - A read function for "sample" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that will contain the samples read
++ * @cnt: The maximum bytes to read from the debugfs "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function handles reading from the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. By default this will block pending a new
++ * value written into the sample buffer, unless there are already a
++ * number of value(s) waiting in the buffer, or the sample file was
++ * previously opened in a non-blocking mode of operation.
++ */
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++					size_t cnt, loff_t *ppos)
++{
++	int len = 0;
++	char buf[64];
++	struct sample *sample = NULL;
++
++	if (!enabled)
++		return 0;
++
++	sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
++	if (!sample)
++		return -ENOMEM;
++
++	while (!buffer_get_sample(sample)) {
++
++		DEFINE_WAIT(wait);
++
++		if (filp->f_flags & O_NONBLOCK) {
++			len = -EAGAIN;
++			goto out;
++		}
++
++		prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
++		schedule();
++		finish_wait(&data.wq, &wait);
++
++		if (signal_pending(current)) {
++			len = -EINTR;
++			goto out;
++		}
++
++		if (!enabled) {			/* enable was toggled */
++			len = 0;
++			goto out;
++		}
++	}
++
++	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
++		      sample->timestamp.tv_sec,
++		      sample->timestamp.tv_nsec,
++		      sample->duration);
++
++
++	/* handling partial reads is more trouble than it's worth */
++	if (len > cnt)
++		goto out;
++
++	if (copy_to_user(ubuf, buf, len))
++		len = -EFAULT;
++
++out:
++	kfree(sample);
++	return len;
++}
++
++/**
++ * debug_sample_release - Release function for "sample" debugfs interface
++ * @inode: The in-kernel inode represenation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function completes the close of the debugfs interface "sample" file.
++ * Frees the sample_open "lock" so that other users may open the interface.
++ */
++static int debug_sample_release(struct inode *inode, struct file *filp)
++{
++	atomic_dec(&data.sample_open);
++
++	return 0;
++}
++
++/**
++ * debug_threshold_fopen - Open function for "threshold" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "threshold" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_threshold_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_threshold_fread - Read function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * the current threshold level at which a latency will be recorded in the
++ * global ring buffer, typically on the order of 10us.
++ */
++static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
++					 size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
++}
++
++/**
++ * debug_threshold_fwrite - Write function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * the threshold level at which any subsequently detected latencies will
++ * be recorded into the global ring buffer.
++ */
++static ssize_t  debug_threshold_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	int ret;
++
++	ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
++
++	if (enabled)
++		wake_up_process(kthread);
++
++	return ret;
++}
++
++/**
++ * debug_width_fopen - Open function for "width" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "width" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_width_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_width_fread - Read function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latecy periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch.
++ */
++static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
++				     size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
++}
++
++/**
++ * debug_width_fwrite - Write function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latency periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch. It
++ * is enforced that width is less that the total window size.
++ */
++static ssize_t  debug_width_fwrite(struct file *filp,
++				       const char __user *ubuf,
++				       size_t cnt,
++				       loff_t *ppos)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	if (val < data.sample_window)
++		data.sample_width = val;
++	else {
++		mutex_unlock(&data.lock);
++		return -EINVAL;
++	}
++	mutex_unlock(&data.lock);
++
++	if (enabled)
++		wake_up_process(kthread);
++
++	return csize;
++}
++
++/**
++ * debug_window_fopen - Open function for "window" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs.
++ */
++static int debug_window_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_window_fread - Read function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to read the total window size.
++ */
++static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
++				      size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
++}
++
++/**
++ * debug_window_fwrite - Write function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "window" debufds
++ * interface to the hardware latency detetector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to write a new total window size. It
++ * is enfoced that any value written must be greater than the sample width
++ * size, or an error results.
++ */
++static ssize_t  debug_window_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	if (data.sample_width < val)
++		data.sample_window = val;
++	else {
++		mutex_unlock(&data.lock);
++		return -EINVAL;
++	}
++	mutex_unlock(&data.lock);
++
++	return csize;
++}
++
++/*
++ * Function pointers for the "count" debugfs file operations
++ */
++static const struct file_operations count_fops = {
++	.open		= debug_count_fopen,
++	.read		= debug_count_fread,
++	.write		= debug_count_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "enable" debugfs file operations
++ */
++static const struct file_operations enable_fops = {
++	.open		= debug_enable_fopen,
++	.read		= debug_enable_fread,
++	.write		= debug_enable_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "max" debugfs file operations
++ */
++static const struct file_operations max_fops = {
++	.open		= debug_max_fopen,
++	.read		= debug_max_fread,
++	.write		= debug_max_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "sample" debugfs file operations
++ */
++static const struct file_operations sample_fops = {
++	.open 		= debug_sample_fopen,
++	.read		= debug_sample_fread,
++	.release	= debug_sample_release,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "threshold" debugfs file operations
++ */
++static const struct file_operations threshold_fops = {
++	.open		= debug_threshold_fopen,
++	.read		= debug_threshold_fread,
++	.write		= debug_threshold_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "width" debugfs file operations
++ */
++static const struct file_operations width_fops = {
++	.open		= debug_width_fopen,
++	.read		= debug_width_fread,
++	.write		= debug_width_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "window" debugfs file operations
++ */
++static const struct file_operations window_fops = {
++	.open		= debug_window_fopen,
++	.read		= debug_window_fread,
++	.write		= debug_window_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/**
++ * init_debugfs - A function to initialize the debugfs interface files
++ *
++ * This function creates entries in debugfs for "hwlat_detector", including
++ * files to read values from the detector, current samples, and the
++ * maximum sample that has been captured since the hardware latency
++ * dectector was started.
++ */
++static int init_debugfs(void)
++{
++	int ret = -ENOMEM;
++
++	debug_dir = debugfs_create_dir(DRVNAME, NULL);
++	if (!debug_dir)
++		goto err_debug_dir;
++
++	debug_sample = debugfs_create_file("sample", 0444,
++					       debug_dir, NULL,
++					       &sample_fops);
++	if (!debug_sample)
++		goto err_sample;
++
++	debug_count = debugfs_create_file("count", 0444,
++					      debug_dir, NULL,
++					      &count_fops);
++	if (!debug_count)
++		goto err_count;
++
++	debug_max = debugfs_create_file("max", 0444,
++					    debug_dir, NULL,
++					    &max_fops);
++	if (!debug_max)
++		goto err_max;
++
++	debug_sample_window = debugfs_create_file("window", 0644,
++						      debug_dir, NULL,
++						      &window_fops);
++	if (!debug_sample_window)
++		goto err_window;
++
++	debug_sample_width = debugfs_create_file("width", 0644,
++						     debug_dir, NULL,
++						     &width_fops);
++	if (!debug_sample_width)
++		goto err_width;
++
++	debug_threshold = debugfs_create_file("threshold", 0644,
++						  debug_dir, NULL,
++						  &threshold_fops);
++	if (!debug_threshold)
++		goto err_threshold;
++
++	debug_enable = debugfs_create_file("enable", 0644,
++					       debug_dir, &enabled,
++					       &enable_fops);
++	if (!debug_enable)
++		goto err_enable;
++
++	else {
++		ret = 0;
++		goto out;
++	}
++
++err_enable:
++	debugfs_remove(debug_threshold);
++err_threshold:
++	debugfs_remove(debug_sample_width);
++err_width:
++	debugfs_remove(debug_sample_window);
++err_window:
++	debugfs_remove(debug_max);
++err_max:
++	debugfs_remove(debug_count);
++err_count:
++	debugfs_remove(debug_sample);
++err_sample:
++	debugfs_remove(debug_dir);
++err_debug_dir:
++out:
++	return ret;
++}
++
++/**
++ * free_debugfs - A function to cleanup the debugfs file interface
++ */
++static void free_debugfs(void)
++{
++	/* could also use a debugfs_remove_recursive */
++	debugfs_remove(debug_enable);
++	debugfs_remove(debug_threshold);
++	debugfs_remove(debug_sample_width);
++	debugfs_remove(debug_sample_window);
++	debugfs_remove(debug_max);
++	debugfs_remove(debug_count);
++	debugfs_remove(debug_sample);
++	debugfs_remove(debug_dir);
++}
++
++/**
++ * detector_init - Standard module initialization code
++ */
++static int detector_init(void)
++{
++	int ret = -ENOMEM;
++
++	printk(KERN_INFO BANNER "version %s\n", VERSION);
++
++	ret = init_stats();
++	if (0 != ret)
++		goto out;
++
++	ret = init_debugfs();
++	if (0 != ret)
++		goto err_stats;
++
++	if (enabled)
++		ret = start_kthread();
++
++	goto out;
++
++err_stats:
++	ring_buffer_free(ring_buffer);
++out:
++	return ret;
++
++}
++
++/**
++ * detector_exit - Standard module cleanup code
++ */
++static void detector_exit(void)
++{
++	int err;
++
++	if (enabled) {
++		enabled = 0;
++		err = stop_kthread();
++		if (err)
++			printk(KERN_ERR BANNER "cannot stop kthread\n");
++	}
++
++	free_debugfs();
++	ring_buffer_free(ring_buffer);	/* free up the ring buffer */
++
++}
++
++module_init(detector_init);
++module_exit(detector_exit);
diff --git a/patches/i2c-omap-drop-the-lock-hard-irq-context.patch b/patches/i2c-omap-drop-the-lock-hard-irq-context.patch
new file mode 100644
index 0000000..fe35c0a
--- /dev/null
+++ b/patches/i2c-omap-drop-the-lock-hard-irq-context.patch
@@ -0,0 +1,34 @@
+From 5145351047b216cca13aaca99f939a9a594c6c4d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 11:35:49 +0100
+Subject: [PATCH 2/3] i2c/omap: drop the lock hard irq context
+
+The lock is taken while reading two registers. On RT the first lock is
+taken in hard irq where it might sleep and in the threaded irq.
+The threaded irq runs in oneshot mode so the hard irq does not run until
+the thread the completes so there is no reason to grab the lock.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/i2c/busses/i2c-omap.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-omap.c
++++ b/drivers/i2c/busses/i2c-omap.c
+@@ -881,15 +881,12 @@ omap_i2c_isr(int irq, void *dev_id)
+ 	u16 mask;
+ 	u16 stat;
+ 
+-	spin_lock(&dev->lock);
+-	mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
+ 	stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
++	mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
+ 
+ 	if (stat & mask)
+ 		ret = IRQ_WAKE_THREAD;
+ 
+-	spin_unlock(&dev->lock);
+-
+ 	return ret;
+ }
+ 
diff --git a/patches/ide-use-nort-local-irq-variants.patch b/patches/ide-use-nort-local-irq-variants.patch
new file mode 100644
index 0000000..21d4299
--- /dev/null
+++ b/patches/ide-use-nort-local-irq-variants.patch
@@ -0,0 +1,169 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:16 -0500
+Subject: ide: Do not disable interrupts for PREEMPT-RT
+
+Use the local_irq_*_nort variants.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/ide/alim15x3.c     |    4 ++--
+ drivers/ide/hpt366.c       |    4 ++--
+ drivers/ide/ide-io-std.c   |    8 ++++----
+ drivers/ide/ide-io.c       |    2 +-
+ drivers/ide/ide-iops.c     |    4 ++--
+ drivers/ide/ide-probe.c    |    4 ++--
+ drivers/ide/ide-taskfile.c |    6 +++---
+ 7 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/drivers/ide/alim15x3.c
++++ b/drivers/ide/alim15x3.c
+@@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct p
+ 
+ 	isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	if (m5229_revision < 0xC2) {
+ 		/*
+@@ -325,7 +325,7 @@ out:
+ 	}
+ 	pci_dev_put(north);
+ 	pci_dev_put(isa_dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return 0;
+ }
+ 
+--- a/drivers/ide/hpt366.c
++++ b/drivers/ide/hpt366.c
+@@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *h
+ 
+ 	dma_old = inb(base + 2);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	dma_new = dma_old;
+ 	pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
+@@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *h
+ 	if (dma_new != dma_old)
+ 		outb(dma_new, base + 2);
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
+ 			 hwif->name, base, base + 7);
+--- a/drivers/ide/ide-io-std.c
++++ b/drivers/ide/ide-io-std.c
+@@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive,
+ 		unsigned long uninitialized_var(flags);
+ 
+ 		if ((io_32bit & 2) && !mmio) {
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			ata_vlb_sync(io_ports->nsect_addr);
+ 		}
+ 
+@@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive,
+ 			insl(data_addr, buf, words);
+ 
+ 		if ((io_32bit & 2) && !mmio)
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 		if (((len + 1) & 3) < 2)
+ 			return;
+@@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive,
+ 		unsigned long uninitialized_var(flags);
+ 
+ 		if ((io_32bit & 2) && !mmio) {
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			ata_vlb_sync(io_ports->nsect_addr);
+ 		}
+ 
+@@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive,
+ 			outsl(data_addr, buf, words);
+ 
+ 		if ((io_32bit & 2) && !mmio)
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 		if (((len + 1) & 3) < 2)
+ 			return;
+--- a/drivers/ide/ide-io.c
++++ b/drivers/ide/ide-io.c
+@@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long dat
+ 		/* disable_irq_nosync ?? */
+ 		disable_irq(hwif->irq);
+ 		/* local CPU only, as if we were handling an interrupt */
+-		local_irq_disable();
++		local_irq_disable_nort();
+ 		if (hwif->polling) {
+ 			startstop = handler(drive);
+ 		} else if (drive_is_ready(drive)) {
+--- a/drivers/ide/ide-iops.c
++++ b/drivers/ide/ide-iops.c
+@@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive,
+ 				if ((stat & ATA_BUSY) == 0)
+ 					break;
+ 
+-				local_irq_restore(flags);
++				local_irq_restore_nort(flags);
+ 				*rstat = stat;
+ 				return -EBUSY;
+ 			}
+ 		}
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
+ 	/*
+ 	 * Allow status to settle, then read it again.
+--- a/drivers/ide/ide-probe.c
++++ b/drivers/ide/ide-probe.c
+@@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *dri
+ 	int bswap = 1;
+ 
+ 	/* local CPU only; some systems need this */
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	/* read 512 bytes of id info */
+ 	hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	drive->dev_flags |= IDE_DFLAG_ID_READ;
+ #ifdef DEBUG
+--- a/drivers/ide/ide-taskfile.c
++++ b/drivers/ide/ide-taskfile.c
+@@ -251,7 +251,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
+ 
+ 		page_is_high = PageHighMem(page);
+ 		if (page_is_high)
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 
+ 		buf = kmap_atomic(page) + offset;
+ 
+@@ -272,7 +272,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
+ 		kunmap_atomic(buf);
+ 
+ 		if (page_is_high)
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 		len -= nr_bytes;
+ 	}
+@@ -415,7 +415,7 @@ static ide_startstop_t pre_task_out_intr
+ 	}
+ 
+ 	if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
+-		local_irq_disable();
++		local_irq_disable_nort();
+ 
+ 	ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
+ 
diff --git a/patches/idle-state.patch b/patches/idle-state.patch
new file mode 100644
index 0000000..4f1255c
--- /dev/null
+++ b/patches/idle-state.patch
@@ -0,0 +1,19 @@
+Subject: sched: Init idle->on_rq in init_idle()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 23:03:29 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4941,6 +4941,7 @@ void __cpuinit init_idle(struct task_str
+ 	rcu_read_unlock();
+ 
+ 	rq->curr = rq->idle = idle;
++	idle->on_rq = 1;
+ #if defined(CONFIG_SMP)
+ 	idle->on_cpu = 1;
+ #endif
diff --git a/patches/infiniband-mellanox-ib-use-nort-irq.patch b/patches/infiniband-mellanox-ib-use-nort-irq.patch
new file mode 100644
index 0000000..8b2c7fd
--- /dev/null
+++ b/patches/infiniband-mellanox-ib-use-nort-irq.patch
@@ -0,0 +1,40 @@
+From: Sven-Thorsten Dietrich <sdietrich@novell.com>
+Date: Fri, 3 Jul 2009 08:30:35 -0500
+Subject: infiniband: Mellanox IB driver patch use _nort() primitives
+
+Fixes in_atomic stack-dump, when Mellanox module is loaded into the RT
+Kernel.
+
+Michael S. Tsirkin <mst@dev.mellanox.co.il> sayeth:
+"Basically, if you just make spin_lock_irqsave (and spin_lock_irq) not disable
+interrupts for non-raw spinlocks, I think all of infiniband will be fine without
+changes."
+
+Signed-off-by: Sven-Thorsten Dietrich <sven@thebigcorporation.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_multicast.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+@@ -783,7 +783,7 @@ void ipoib_mcast_restart_task(struct wor
+ 
+ 	ipoib_mcast_stop_thread(dev, 0);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	netif_addr_lock(dev);
+ 	spin_lock(&priv->lock);
+ 
+@@ -865,7 +865,7 @@ void ipoib_mcast_restart_task(struct wor
+ 
+ 	spin_unlock(&priv->lock);
+ 	netif_addr_unlock(dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	/* We have to cancel outside of the spinlock */
+ 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
diff --git a/patches/inpt-gameport-use-local-irq-nort.patch b/patches/inpt-gameport-use-local-irq-nort.patch
new file mode 100644
index 0000000..fc11de2
--- /dev/null
+++ b/patches/inpt-gameport-use-local-irq-nort.patch
@@ -0,0 +1,44 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:16 -0500
+Subject: input: gameport: Do not disable interrupts on PREEMPT_RT
+
+Use the _nort() primitives.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/input/gameport/gameport.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/input/gameport/gameport.c
++++ b/drivers/input/gameport/gameport.c
+@@ -87,12 +87,12 @@ static int gameport_measure_speed(struct
+ 	tx = 1 << 30;
+ 
+ 	for(i = 0; i < 50; i++) {
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		GET_TIME(t1);
+ 		for (t = 0; t < 50; t++) gameport_read(gameport);
+ 		GET_TIME(t2);
+ 		GET_TIME(t3);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		udelay(i * 10);
+ 		if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
+ 	}
+@@ -111,11 +111,11 @@ static int gameport_measure_speed(struct
+ 	tx = 1 << 30;
+ 
+ 	for(i = 0; i < 50; i++) {
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		rdtscl(t1);
+ 		for (t = 0; t < 50; t++) gameport_read(gameport);
+ 		rdtscl(t2);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		udelay(i * 10);
+ 		if (t2 - t1 < tx) tx = t2 - t1;
+ 	}
diff --git a/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch b/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch
new file mode 100644
index 0000000..2d46f5d
--- /dev/null
+++ b/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch
@@ -0,0 +1,67 @@
+Subject: intel_idle: Convert i7300_idle_lock to raw spinlock
+From: Mike Galbraith <efault@gmx.de>
+Date: Wed, 07 Dec 2011 12:48:42 +0100
+
+24 core Intel box's first exposure to 3.0.12-rt30-rc3 didn't go well.
+
+[   27.104159] i7300_idle: loaded v1.55
+[   27.104192] BUG: scheduling while atomic: swapper/2/0/0x00000002
+[   27.104309] Pid: 0, comm: swapper/2 Tainted: G           N  3.0.12-rt30-rc3-rt #1
+[   27.104317] Call Trace:
+[   27.104338]  [<ffffffff810046a5>] dump_trace+0x85/0x2e0
+[   27.104372]  [<ffffffff8144eb00>] thread_return+0x12b/0x30b
+[   27.104381]  [<ffffffff8144f1b9>] schedule+0x29/0xb0
+[   27.104389]  [<ffffffff814506e5>] rt_spin_lock_slowlock+0xc5/0x240
+[   27.104401]  [<ffffffffa01f818f>] i7300_idle_notifier+0x3f/0x360 [i7300_idle]
+[   27.104415]  [<ffffffff814546c7>] notifier_call_chain+0x37/0x70
+[   27.104426]  [<ffffffff81454748>] __atomic_notifier_call_chain+0x48/0x70
+[   27.104439]  [<ffffffff81001a39>] cpu_idle+0x89/0xb0
+[   27.104449] bad: scheduling from the idle thread!
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1323258522.5057.73.camel@marge.simson.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/idle/i7300_idle.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/idle/i7300_idle.c
++++ b/drivers/idle/i7300_idle.c
+@@ -75,7 +75,7 @@ static unsigned long past_skip;
+ 
+ static struct pci_dev *fbd_dev;
+ 
+-static spinlock_t i7300_idle_lock;
++static raw_spinlock_t i7300_idle_lock;
+ static int i7300_idle_active;
+ 
+ static u8 i7300_idle_thrtctl_saved;
+@@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct no
+ 		idle_begin_time = ktime_get();
+ 	}
+ 
+-	spin_lock_irqsave(&i7300_idle_lock, flags);
++	raw_spin_lock_irqsave(&i7300_idle_lock, flags);
+ 	if (val == IDLE_START) {
+ 
+ 		cpumask_set_cpu(smp_processor_id(), idle_cpumask);
+@@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct no
+ 		}
+ 	}
+ end:
+-	spin_unlock_irqrestore(&i7300_idle_lock, flags);
++	raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
+ 	return 0;
+ }
+ 
+@@ -548,7 +548,7 @@ struct debugfs_file_info {
+ 
+ static int __init i7300_idle_init(void)
+ {
+-	spin_lock_init(&i7300_idle_lock);
++	raw_spin_lock_init(&i7300_idle_lock);
+ 	total_us = 0;
+ 
+ 	if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
diff --git a/patches/ipc-make-rt-aware.patch b/patches/ipc-make-rt-aware.patch
new file mode 100644
index 0000000..8b081cf
--- /dev/null
+++ b/patches/ipc-make-rt-aware.patch
@@ -0,0 +1,85 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:12 -0500
+Subject: ipc: Make the ipc code -rt aware
+
+RT serializes the code with the (rt)spinlock but keeps preemption
+enabled. Some parts of the code need to be atomic nevertheless.
+
+Protect it with preempt_disable/enable_rt pairts.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ ipc/mqueue.c |    5 +++++
+ ipc/msg.c    |   16 ++++++++++++++++
+ 2 files changed, 21 insertions(+)
+
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -912,12 +912,17 @@ static inline void pipelined_send(struct
+ 				  struct msg_msg *message,
+ 				  struct ext_wait_queue *receiver)
+ {
++	/*
++	 * Keep them in one critical section for PREEMPT_RT:
++	 */
++	preempt_disable_rt();
+ 	receiver->msg = message;
+ 	list_del(&receiver->list);
+ 	receiver->state = STATE_PENDING;
+ 	wake_up_process(receiver->task);
+ 	smp_wmb();
+ 	receiver->state = STATE_READY;
++	preempt_enable_rt();
+ }
+ 
+ /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -259,12 +259,20 @@ static void expunge_all(struct msg_queue
+ 	while (tmp != &msq->q_receivers) {
+ 		struct msg_receiver *msr;
+ 
++		/*
++		 * Make sure that the wakeup doesnt preempt
++		 * this CPU prematurely. (on PREEMPT_RT)
++		 */
++		preempt_disable_rt();
++
+ 		msr = list_entry(tmp, struct msg_receiver, r_list);
+ 		tmp = tmp->next;
+ 		msr->r_msg = NULL;
+ 		wake_up_process(msr->r_tsk);
+ 		smp_mb();
+ 		msr->r_msg = ERR_PTR(res);
++
++		preempt_enable_rt();
+ 	}
+ }
+ 
+@@ -614,6 +622,12 @@ static inline int pipelined_send(struct
+ 		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
+ 					       msr->r_msgtype, msr->r_mode)) {
+ 
++			/*
++			 * Make sure that the wakeup doesnt preempt
++			 * this CPU prematurely. (on PREEMPT_RT)
++			 */
++			preempt_disable_rt();
++
+ 			list_del(&msr->r_list);
+ 			if (msr->r_maxsize < msg->m_ts) {
+ 				msr->r_msg = NULL;
+@@ -627,9 +641,11 @@ static inline int pipelined_send(struct
+ 				wake_up_process(msr->r_tsk);
+ 				smp_mb();
+ 				msr->r_msg = msg;
++				preempt_enable_rt();
+ 
+ 				return 1;
+ 			}
++			preempt_enable_rt();
+ 		}
+ 	}
+ 	return 0;
diff --git a/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch b/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch
new file mode 100644
index 0000000..4d65d5b
--- /dev/null
+++ b/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch
@@ -0,0 +1,64 @@
+Subject: ipc/mqueue: Add a critical section to avoid a deadlock
+From: KOBAYASHI Yoshitake <yoshitake.kobayashi@toshiba.co.jp>
+Date: Sat, 23 Jul 2011 11:57:36 +0900
+
+(Repost for v3.0-rt1 and changed the distination addreses)
+I have tested the following patch on v3.0-rt1 with PREEMPT_RT_FULL.
+In POSIX message queue, if a sender process uses SCHED_FIFO and
+has a higher priority than a receiver process, the sender will
+be stuck at ipc/mqueue.c:452
+
+  452                 while (ewp->state == STATE_PENDING)
+  453                         cpu_relax();
+
+Description of the problem
+ (receiver process)
+   1. receiver changes sender's state to STATE_PENDING (mqueue.c:846)
+   2. wake up sender process and "switch to sender" (mqueue.c:847)
+      Note: This context switch only happens in PREEMPT_RT_FULL kernel.
+ (sender process)
+   3. sender check the own state in above loop (mqueue.c:452-453)
+   *. receiver will never wake up and cannot change sender's state to
+      STATE_READY because sender has higher priority
+
+
+Signed-off-by: Yoshitake Kobayashi <yoshitake.kobayashi@toshiba.co.jp>
+Cc: viro@zeniv.linux.org.uk
+Cc: dchinner@redhat.com
+Cc: npiggin@kernel.dk
+Cc: hch@lst.de
+Cc: arnd@arndb.de
+Link: http://lkml.kernel.org/r/4E2A38A0.1090601@toshiba.co.jp
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ ipc/mqueue.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -936,13 +936,18 @@ static inline void pipelined_receive(str
+ 		wake_up_interruptible(&info->wait_q);
+ 		return;
+ 	}
+-	if (msg_insert(sender->msg, info))
+-		return;
+-	list_del(&sender->list);
+-	sender->state = STATE_PENDING;
+-	wake_up_process(sender->task);
+-	smp_wmb();
+-	sender->state = STATE_READY;
++	/*
++	 * Keep them in one critical section for PREEMPT_RT:
++	 */
++	preempt_disable_rt();
++	if (!msg_insert(sender->msg, info)) {
++		list_del(&sender->list);
++		sender->state = STATE_PENDING;
++		wake_up_process(sender->task);
++		smp_wmb();
++		sender->state = STATE_READY;
++	}
++	preempt_enable_rt();
+ }
+ 
+ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
diff --git a/patches/ipc-sem-rework-semaphore-wakeups.patch b/patches/ipc-sem-rework-semaphore-wakeups.patch
new file mode 100644
index 0000000..a51262a
--- /dev/null
+++ b/patches/ipc-sem-rework-semaphore-wakeups.patch
@@ -0,0 +1,73 @@
+Subject: ipc/sem: Rework semaphore wakeups
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 14 Sep 2011 11:57:04 +0200
+
+Subject: ipc/sem: Rework semaphore wakeups
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Tue Sep 13 15:09:40 CEST 2011
+
+Current sysv sems have a weird ass wakeup scheme that involves keeping
+preemption disabled over a potential O(n^2) loop and busy waiting on
+that on other CPUs.
+
+Kill this and simply wake the task directly from under the sem_lock.
+
+This was discovered by a migrate_disable() debug feature that
+disallows:
+
+  spin_lock();
+  preempt_disable();
+  spin_unlock()
+  preempt_enable();
+
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Link: http://lkml.kernel.org/r/1315994224.5040.1.camel@twins
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ ipc/sem.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -461,6 +461,13 @@ undo:
+ static void wake_up_sem_queue_prepare(struct list_head *pt,
+ 				struct sem_queue *q, int error)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++	struct task_struct *p = q->sleeper;
++	get_task_struct(p);
++	q->status = error;
++	wake_up_process(p);
++	put_task_struct(p);
++#else
+ 	if (list_empty(pt)) {
+ 		/*
+ 		 * Hold preempt off so that we don't get preempted and have the
+@@ -472,6 +479,7 @@ static void wake_up_sem_queue_prepare(st
+ 	q->pid = error;
+ 
+ 	list_add_tail(&q->simple_list, pt);
++#endif
+ }
+ 
+ /**
+@@ -485,6 +493,7 @@ static void wake_up_sem_queue_prepare(st
+  */
+ static void wake_up_sem_queue_do(struct list_head *pt)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	struct sem_queue *q, *t;
+ 	int did_something;
+ 
+@@ -497,6 +506,7 @@ static void wake_up_sem_queue_do(struct
+ 	}
+ 	if (did_something)
+ 		preempt_enable();
++#endif
+ }
+ 
+ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
diff --git a/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch b/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
new file mode 100644
index 0000000..be85497
--- /dev/null
+++ b/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
@@ -0,0 +1,144 @@
+Subject: genirq: Allow disabling of softirq processing in irq thread context
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 31 Jan 2012 13:01:27 +0100
+
+The processing of softirqs in irq thread context is a performance gain
+for the non-rt workloads of a system, but it's counterproductive for
+interrupts which are explicitely related to the realtime
+workload. Allow such interrupts to prevent softirq processing in their
+thread context.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/interrupt.h |    2 ++
+ include/linux/irq.h       |    5 ++++-
+ kernel/irq/manage.c       |   13 ++++++++++++-
+ kernel/irq/settings.h     |   12 ++++++++++++
+ kernel/softirq.c          |    7 +++++++
+ 5 files changed, 37 insertions(+), 2 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -58,6 +58,7 @@
+  * IRQF_NO_THREAD - Interrupt cannot be threaded
+  * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
+  *                resume time.
++ * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
+  */
+ #define IRQF_DISABLED		0x00000020
+ #define IRQF_SHARED		0x00000080
+@@ -71,6 +72,7 @@
+ #define IRQF_FORCE_RESUME	0x00008000
+ #define IRQF_NO_THREAD		0x00010000
+ #define IRQF_EARLY_RESUME	0x00020000
++#define IRQF_NO_SOFTIRQ_CALL	0x00040000
+ 
+ #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
+ 
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -70,6 +70,7 @@ typedef	void (*irq_preflow_handler_t)(st
+  * IRQ_MOVE_PCNTXT		- Interrupt can be migrated from process context
+  * IRQ_NESTED_TRHEAD		- Interrupt nests into another thread
+  * IRQ_PER_CPU_DEVID		- Dev_id is a per-cpu variable
++ * IRQ_NO_SOFTIRQ_CALL		- No softirq processing in the irq thread context (RT)
+  */
+ enum {
+ 	IRQ_TYPE_NONE		= 0x00000000,
+@@ -94,12 +95,14 @@ enum {
+ 	IRQ_NESTED_THREAD	= (1 << 15),
+ 	IRQ_NOTHREAD		= (1 << 16),
+ 	IRQ_PER_CPU_DEVID	= (1 << 17),
++	IRQ_NO_SOFTIRQ_CALL	= (1 << 18),
+ };
+ 
+ #define IRQF_MODIFY_MASK	\
+ 	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
+ 	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
+-	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID)
++	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
++	 IRQ_NO_SOFTIRQ_CALL)
+ 
+ #define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
+ 
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -782,7 +782,15 @@ irq_forced_thread_fn(struct irq_desc *de
+ 	local_bh_disable();
+ 	ret = action->thread_fn(action->irq, action->dev_id);
+ 	irq_finalize_oneshot(desc, action);
+-	local_bh_enable();
++	/*
++	 * Interrupts which have real time requirements can be set up
++	 * to avoid softirq processing in the thread handler. This is
++	 * safe as these interrupts do not raise soft interrupts.
++	 */
++	if (irq_settings_no_softirq_call(desc))
++		_local_bh_enable();
++	else
++		local_bh_enable();
+ 	return ret;
+ }
+ 
+@@ -1127,6 +1135,9 @@ __setup_irq(unsigned int irq, struct irq
+ 			irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ 		}
+ 
++		if (new->flags & IRQF_NO_SOFTIRQ_CALL)
++			irq_settings_set_no_softirq_call(desc);
++
+ 		/* Set default affinity mask once everything is setup */
+ 		setup_affinity(irq, desc, mask);
+ 
+--- a/kernel/irq/settings.h
++++ b/kernel/irq/settings.h
+@@ -14,6 +14,7 @@ enum {
+ 	_IRQ_NO_BALANCING	= IRQ_NO_BALANCING,
+ 	_IRQ_NESTED_THREAD	= IRQ_NESTED_THREAD,
+ 	_IRQ_PER_CPU_DEVID	= IRQ_PER_CPU_DEVID,
++	_IRQ_NO_SOFTIRQ_CALL	= IRQ_NO_SOFTIRQ_CALL,
+ 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
+ };
+ 
+@@ -26,6 +27,7 @@ enum {
+ #define IRQ_NOAUTOEN		GOT_YOU_MORON
+ #define IRQ_NESTED_THREAD	GOT_YOU_MORON
+ #define IRQ_PER_CPU_DEVID	GOT_YOU_MORON
++#define IRQ_NO_SOFTIRQ_CALL	GOT_YOU_MORON
+ #undef IRQF_MODIFY_MASK
+ #define IRQF_MODIFY_MASK	GOT_YOU_MORON
+ 
+@@ -36,6 +38,16 @@ irq_settings_clr_and_set(struct irq_desc
+ 	desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
+ }
+ 
++static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
++{
++	return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
++}
++
++static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
++{
++	desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
++}
++
+ static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
+ {
+ 	return desc->status_use_accessors & _IRQ_PER_CPU;
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -417,6 +417,13 @@ void local_bh_enable_ip(unsigned long ip
+ }
+ EXPORT_SYMBOL(local_bh_enable_ip);
+ 
++void _local_bh_enable(void)
++{
++	current->softirq_nestcnt--;
++	migrate_enable();
++}
++EXPORT_SYMBOL(_local_bh_enable);
++
+ /* For tracing */
+ int notrace __in_softirq(void)
+ {
diff --git a/patches/jump-label-rt.patch b/patches/jump-label-rt.patch
new file mode 100644
index 0000000..b3c1ead
--- /dev/null
+++ b/patches/jump-label-rt.patch
@@ -0,0 +1,21 @@
+Subject: jump-label-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 13 Jul 2011 11:03:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/jump_label.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -50,7 +50,8 @@
+ #include <linux/compiler.h>
+ #include <linux/workqueue.h>
+ 
+-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
++#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && \
++	!defined(CONFIG_PREEMPT_BASE)
+ 
+ struct static_key {
+ 	atomic_t enabled;
diff --git a/patches/kconfig-disable-a-few-options-rt.patch b/patches/kconfig-disable-a-few-options-rt.patch
new file mode 100644
index 0000000..17aee87
--- /dev/null
+++ b/patches/kconfig-disable-a-few-options-rt.patch
@@ -0,0 +1,44 @@
+Subject: kconfig-disable-a-few-options-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 24 Jul 2011 12:11:43 +0200
+
+Disable stuff which is known to have issues on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/Kconfig        |    1 +
+ drivers/net/Kconfig |    1 +
+ mm/Kconfig          |    2 +-
+ 3 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -6,6 +6,7 @@ config OPROFILE
+ 	tristate "OProfile system profiling"
+ 	depends on PROFILING
+ 	depends on HAVE_OPROFILE
++	depends on !PREEMPT_RT_FULL
+ 	select RING_BUFFER
+ 	select RING_BUFFER_ALLOW_SWAP
+ 	help
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -164,6 +164,7 @@ config VXLAN
+ 
+ config NETCONSOLE
+ 	tristate "Network console logging support"
++	depends on !PREEMPT_RT_FULL
+ 	---help---
+ 	If you want to log kernel messages over the network, enable this.
+ 	See <file:Documentation/networking/netconsole.txt> for details.
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -353,7 +353,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
+ 
+ config TRANSPARENT_HUGEPAGE
+ 	bool "Transparent Hugepage Support"
+-	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
++	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
+ 	select COMPACTION
+ 	help
+ 	  Transparent Hugepages allows the kernel to use huge pages and
diff --git a/patches/kconfig-preempt-rt-full.patch b/patches/kconfig-preempt-rt-full.patch
new file mode 100644
index 0000000..62da6d7
--- /dev/null
+++ b/patches/kconfig-preempt-rt-full.patch
@@ -0,0 +1,56 @@
+Subject: kconfig-preempt-rt-full.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 14:58:57 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ init/Makefile          |    2 +-
+ kernel/Kconfig.preempt |    8 ++++++++
+ scripts/mkcompile_h    |    4 +++-
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/init/Makefile
++++ b/init/Makefile
+@@ -33,4 +33,4 @@ silent_chk_compile.h = :
+ include/generated/compile.h: FORCE
+ 	@$($(quiet)chk_compile.h)
+ 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
+-	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
++	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -73,6 +73,14 @@ config PREEMPT_RTB
+ 	  enables changes which are preliminary for the full preemptiple
+ 	  RT kernel.
+ 
++config PREEMPT_RT_FULL
++	bool "Fully Preemptible Kernel (RT)"
++	depends on IRQ_FORCED_THREADING
++	select PREEMPT_RT_BASE
++	select PREEMPT_RCU
++	help
++	  All and everything
++
+ endchoice
+ 
+ config PREEMPT_COUNT
+--- a/scripts/mkcompile_h
++++ b/scripts/mkcompile_h
+@@ -4,7 +4,8 @@ TARGET=$1
+ ARCH=$2
+ SMP=$3
+ PREEMPT=$4
+-CC=$5
++RT=$5
++CC=$6
+ 
+ vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
+ 
+@@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
+ CONFIG_FLAGS=""
+ if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
+ if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
++if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
+ UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
+ 
+ # Truncate to maximum length
diff --git a/patches/kgb-serial-hackaround.patch b/patches/kgb-serial-hackaround.patch
new file mode 100644
index 0000000..1a13d29
--- /dev/null
+++ b/patches/kgb-serial-hackaround.patch
@@ -0,0 +1,102 @@
+From: Jason Wessel <jason.wessel@windriver.com>
+Date: Thu, 28 Jul 2011 12:42:23 -0500
+Subject: kgdb/serial: Short term workaround
+
+On 07/27/2011 04:37 PM, Thomas Gleixner wrote:
+>  - KGDB (not yet disabled) is reportedly unusable on -rt right now due
+>    to missing hacks in the console locking which I dropped on purpose.
+>
+
+To work around this in the short term you can use this patch, in
+addition to the clocksource watchdog patch that Thomas brewed up.
+
+Comments are welcome of course.  Ultimately the right solution is to
+change separation between the console and the HW to have a polled mode
++ work queue so as not to introduce any kind of latency.
+
+Thanks,
+Jason.
+
+---
+ drivers/tty/serial/8250/8250.c |    3 ++-
+ include/linux/kdb.h            |    3 ++-
+ kernel/debug/kdb/kdb_io.c      |    6 ++----
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/tty/serial/8250/8250.c
++++ b/drivers/tty/serial/8250/8250.c
+@@ -38,6 +38,7 @@
+ #include <linux/nmi.h>
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
++#include <linux/kdb.h>
+ #ifdef CONFIG_SPARC
+ #include <linux/sunserialcore.h>
+ #endif
+@@ -2909,7 +2910,7 @@ serial8250_console_write(struct console
+ 
+ 	touch_nmi_watchdog();
+ 
+-	if (port->sysrq || oops_in_progress)
++	if (port->sysrq || oops_in_progress || in_kdb_printk())
+ 		locked = spin_trylock_irqsave(&port->lock, flags);
+ 	else
+ 		spin_lock_irqsave(&port->lock, flags);
+--- a/include/linux/kdb.h
++++ b/include/linux/kdb.h
+@@ -115,7 +115,7 @@ extern int kdb_trap_printk;
+ extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
+ extern __printf(1, 2) int kdb_printf(const char *, ...);
+ typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
+-
++#define in_kdb_printk() (kdb_trap_printk)
+ extern void kdb_init(int level);
+ 
+ /* Access to kdb specific polling devices */
+@@ -150,6 +150,7 @@ extern int kdb_register_repeat(char *, k
+ extern int kdb_unregister(char *);
+ #else /* ! CONFIG_KGDB_KDB */
+ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
++#define in_kdb_printk() (0)
+ static inline void kdb_init(int level) {}
+ static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
+ 			       char *help, short minlen) { return 0; }
+--- a/kernel/debug/kdb/kdb_io.c
++++ b/kernel/debug/kdb/kdb_io.c
+@@ -554,7 +554,6 @@ int vkdb_printf(const char *fmt, va_list
+ 	int linecount;
+ 	int colcount;
+ 	int logging, saved_loglevel = 0;
+-	int saved_trap_printk;
+ 	int got_printf_lock = 0;
+ 	int retlen = 0;
+ 	int fnd, len;
+@@ -565,8 +564,6 @@ int vkdb_printf(const char *fmt, va_list
+ 	unsigned long uninitialized_var(flags);
+ 
+ 	preempt_disable();
+-	saved_trap_printk = kdb_trap_printk;
+-	kdb_trap_printk = 0;
+ 
+ 	/* Serialize kdb_printf if multiple cpus try to write at once.
+ 	 * But if any cpu goes recursive in kdb, just print the output,
+@@ -833,7 +830,6 @@ kdb_print_out:
+ 	} else {
+ 		__release(kdb_printf_lock);
+ 	}
+-	kdb_trap_printk = saved_trap_printk;
+ 	preempt_enable();
+ 	return retlen;
+ }
+@@ -843,9 +839,11 @@ int kdb_printf(const char *fmt, ...)
+ 	va_list ap;
+ 	int r;
+ 
++	kdb_trap_printk++;
+ 	va_start(ap, fmt);
+ 	r = vkdb_printf(fmt, ap);
+ 	va_end(ap);
++	kdb_trap_printk--;
+ 
+ 	return r;
+ }
diff --git a/patches/latency-hist.patch b/patches/latency-hist.patch
new file mode 100644
index 0000000..6e646b3
--- /dev/null
+++ b/patches/latency-hist.patch
@@ -0,0 +1,1804 @@
+Subject: latency-hist.patch
+From: Carsten Emde <C.Emde@osadl.org>
+Date: Tue, 19 Jul 2011 14:03:41 +0100
+
+This patch provides a recording mechanism to store data of potential
+sources of system latencies. The recordings separately determine the
+latency caused by a delayed timer expiration, by a delayed wakeup of the
+related user space program and by the sum of both. The histograms can be
+enabled and reset individually. The data are accessible via the debug
+filesystem. For details please consult Documentation/trace/histograms.txt.
+
+Signed-off-by: Carsten Emde <C.Emde@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ Documentation/trace/histograms.txt  |  186 +++++
+ include/linux/hrtimer.h             |    3 
+ include/linux/sched.h               |    6 
+ include/trace/events/hist.h         |   69 ++
+ include/trace/events/latency_hist.h |   29 
+ kernel/hrtimer.c                    |   23 
+ kernel/trace/Kconfig                |  104 +++
+ kernel/trace/Makefile               |    4 
+ kernel/trace/latency_hist.c         | 1176 ++++++++++++++++++++++++++++++++++++
+ kernel/trace/trace_irqsoff.c        |   11 
+ 10 files changed, 1611 insertions(+)
+
+--- /dev/null
++++ b/Documentation/trace/histograms.txt
+@@ -0,0 +1,186 @@
++		Using the Linux Kernel Latency Histograms
++
++
++This document gives a short explanation how to enable, configure and use
++latency histograms. Latency histograms are primarily relevant in the
++context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
++and are used in the quality management of the Linux real-time
++capabilities.
++
++
++* Purpose of latency histograms
++
++A latency histogram continuously accumulates the frequencies of latency
++data. There are two types of histograms
++- potential sources of latencies
++- effective latencies
++
++
++* Potential sources of latencies
++
++Potential sources of latencies are code segments where interrupts,
++preemption or both are disabled (aka critical sections). To create
++histograms of potential sources of latency, the kernel stores the time
++stamp at the start of a critical section, determines the time elapsed
++when the end of the section is reached, and increments the frequency
++counter of that latency value - irrespective of whether any concurrently
++running process is affected by latency or not.
++- Configuration items (in the Kernel hacking/Tracers submenu)
++  CONFIG_INTERRUPT_OFF_LATENCY
++  CONFIG_PREEMPT_OFF_LATENCY
++
++
++* Effective latencies
++
++Effective latencies are actually occuring during wakeup of a process. To
++determine effective latencies, the kernel stores the time stamp when a
++process is scheduled to be woken up, and determines the duration of the
++wakeup time shortly before control is passed over to this process. Note
++that the apparent latency in user space may be somewhat longer, since the
++process may be interrupted after control is passed over to it but before
++the execution in user space takes place. Simply measuring the interval
++between enqueuing and wakeup may also not appropriate in cases when a
++process is scheduled as a result of a timer expiration. The timer may have
++missed its deadline, e.g. due to disabled interrupts, but this latency
++would not be registered. Therefore, the offsets of missed timers are
++recorded in a separate histogram. If both wakeup latency and missed timer
++offsets are configured and enabled, a third histogram may be enabled that
++records the overall latency as a sum of the timer latency, if any, and the
++wakeup latency. This histogram is called "timerandwakeup".
++- Configuration items (in the Kernel hacking/Tracers submenu)
++  CONFIG_WAKEUP_LATENCY
++  CONFIG_MISSED_TIMER_OFSETS
++
++
++* Usage
++
++The interface to the administration of the latency histograms is located
++in the debugfs file system. To mount it, either enter
++
++mount -t sysfs nodev /sys
++mount -t debugfs nodev /sys/kernel/debug
++
++from shell command line level, or add
++
++nodev	/sys			sysfs	defaults	0 0
++nodev	/sys/kernel/debug	debugfs	defaults	0 0
++
++to the file /etc/fstab. All latency histogram related files are then
++available in the directory /sys/kernel/debug/tracing/latency_hist. A
++particular histogram type is enabled by writing non-zero to the related
++variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
++Select "preemptirqsoff" for the histograms of potential sources of
++latencies and "wakeup" for histograms of effective latencies etc. The
++histogram data - one per CPU - are available in the files
++
++/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
++/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
++/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
++/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
++
++The histograms are reset by writing non-zero to the file "reset" in a
++particular latency directory. To reset all latency data, use
++
++#!/bin/sh
++
++TRACINGDIR=/sys/kernel/debug/tracing
++HISTDIR=$TRACINGDIR/latency_hist
++
++if test -d $HISTDIR
++then
++  cd $HISTDIR
++  for i in `find . | grep /reset$`
++  do
++    echo 1 >$i
++  done
++fi
++
++
++* Data format
++
++Latency data are stored with a resolution of one microsecond. The
++maximum latency is 10,240 microseconds. The data are only valid, if the
++overflow register is empty. Every output line contains the latency in
++microseconds in the first row and the number of samples in the second
++row. To display only lines with a positive latency count, use, for
++example,
++
++grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
++
++#Minimum latency: 0 microseconds.
++#Average latency: 0 microseconds.
++#Maximum latency: 25 microseconds.
++#Total samples: 3104770694
++#There are 0 samples greater or equal than 10240 microseconds
++#usecs	         samples
++    0	      2984486876
++    1	        49843506
++    2	        58219047
++    3	         5348126
++    4	         2187960
++    5	         3388262
++    6	          959289
++    7	          208294
++    8	           40420
++    9	            4485
++   10	           14918
++   11	           18340
++   12	           25052
++   13	           19455
++   14	            5602
++   15	             969
++   16	              47
++   17	              18
++   18	              14
++   19	               1
++   20	               3
++   21	               2
++   22	               5
++   23	               2
++   25	               1
++
++
++* Wakeup latency of a selected process
++
++To only collect wakeup latency data of a particular process, write the
++PID of the requested process to
++
++/sys/kernel/debug/tracing/latency_hist/wakeup/pid
++
++PIDs are not considered, if this variable is set to 0.
++
++
++* Details of the process with the highest wakeup latency so far
++
++Selected data of the process that suffered from the highest wakeup
++latency that occurred in a particular CPU are available in the file
++
++/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
++
++In addition, other relevant system data at the time when the
++latency occurred are given.
++
++The format of the data is (all in one line):
++<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
++<- <PID> <Priority> <Command> <Timestamp>
++
++The value of <Timeroffset> is only relevant in the combined timer
++and wakeup latency recording. In the wakeup recording, it is
++always 0, in the missed_timer_offsets recording, it is the same
++as <Latency>.
++
++When retrospectively searching for the origin of a latency and
++tracing was not enabled, it may be helpful to know the name and
++some basic data of the task that (finally) was switching to the
++late real-tlme task. In addition to the victim's data, also the
++data of the possible culprit are therefore displayed after the
++"<-" symbol.
++
++Finally, the timestamp of the time when the latency occurred
++in <seconds>.<microseconds> after the most recent system boot
++is provided.
++
++These data are also reset when the wakeup histogram is reset.
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -111,6 +111,9 @@ struct hrtimer {
+ 	enum hrtimer_restart		(*function)(struct hrtimer *);
+ 	struct hrtimer_clock_base	*base;
+ 	unsigned long			state;
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	ktime_t				praecox;
++#endif
+ #ifdef CONFIG_TIMER_STATS
+ 	int				start_pid;
+ 	void				*start_site;
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1598,6 +1598,12 @@ struct task_struct {
+ 	unsigned long trace;
+ 	/* bitmask and counter of trace recursion */
+ 	unsigned long trace_recursion;
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++	u64 preempt_timestamp_hist;
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	long timer_offset;
++#endif
++#endif
+ #endif /* CONFIG_TRACING */
+ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
+ 	struct memcg_batch_info {
+--- /dev/null
++++ b/include/trace/events/hist.h
+@@ -0,0 +1,69 @@
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM hist
++
++#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_HIST_H
++
++#include "latency_hist.h"
++#include <linux/tracepoint.h>
++
++#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
++#define trace_preemptirqsoff_hist(a,b)
++#else
++TRACE_EVENT(preemptirqsoff_hist,
++
++	TP_PROTO(int reason, int starthist),
++
++	TP_ARGS(reason, starthist),
++
++	TP_STRUCT__entry(
++		__field(int,	reason	)
++		__field(int,	starthist	)
++	),
++
++	TP_fast_assign(
++		__entry->reason		= reason;
++		__entry->starthist	= starthist;
++	),
++
++	TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
++		  __entry->starthist ? "start" : "stop")
++);
++#endif
++
++#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
++#define trace_hrtimer_interrupt(a,b,c,d)
++#else
++TRACE_EVENT(hrtimer_interrupt,
++
++	TP_PROTO(int cpu, long long offset, struct task_struct *curr, struct task_struct *task),
++
++	TP_ARGS(cpu, offset, curr, task),
++
++	TP_STRUCT__entry(
++		__field(int,		cpu	)
++		__field(long long,	offset	)
++		__array(char,		ccomm,	TASK_COMM_LEN)
++		__field(int,		cprio	)
++		__array(char,		tcomm,	TASK_COMM_LEN)
++		__field(int,		tprio	)
++	),
++
++	TP_fast_assign(
++		__entry->cpu	= cpu;
++		__entry->offset	= offset;
++		memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
++		__entry->cprio  = curr->prio;
++		memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", task != NULL ? TASK_COMM_LEN : 7);
++		__entry->tprio  = task != NULL ? task->prio : -1;
++	),
++
++	TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
++		__entry->cpu, __entry->offset, __entry->ccomm, __entry->cprio, __entry->tcomm, __entry->tprio)
++);
++#endif
++
++#endif /* _TRACE_HIST_H */
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+--- /dev/null
++++ b/include/trace/events/latency_hist.h
+@@ -0,0 +1,29 @@
++#ifndef _LATENCY_HIST_H
++#define _LATENCY_HIST_H
++
++enum hist_action {
++	IRQS_ON,
++	PREEMPT_ON,
++	TRACE_STOP,
++	IRQS_OFF,
++	PREEMPT_OFF,
++	TRACE_START,
++};
++
++static char *actions[] = {
++	"IRQS_ON",
++	"PREEMPT_ON",
++	"TRACE_STOP",
++	"IRQS_OFF",
++	"PREEMPT_OFF",
++	"TRACE_START",
++};
++
++static inline char *getaction(int action)
++{
++	if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
++		return(actions[action]);
++	return("unknown");
++}
++
++#endif /* _LATENCY_HIST_H */
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -49,6 +49,7 @@
+ #include <asm/uaccess.h>
+ 
+ #include <trace/events/timer.h>
++#include <trace/events/hist.h>
+ 
+ /*
+  * The timer bases:
+@@ -970,6 +971,17 @@ int __hrtimer_start_range_ns(struct hrti
+ #endif
+ 	}
+ 
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	{
++		ktime_t now = new_base->get_time();
++
++		if (ktime_to_ns(tim) < ktime_to_ns(now))
++			timer->praecox = now;
++		else
++			timer->praecox = ktime_set(0, 0);
++	}
++#endif
++
+ 	hrtimer_set_expires_range_ns(timer, tim, delta_ns);
+ 
+ 	timer_stats_hrtimer_set_start_info(timer);
+@@ -1246,6 +1258,8 @@ static void __run_hrtimer(struct hrtimer
+ 
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ 
++static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
++
+ /*
+  * High resolution timer interrupt
+  * Called with interrupts disabled
+@@ -1289,6 +1303,15 @@ retry:
+ 
+ 			timer = container_of(node, struct hrtimer, node);
+ 
++			trace_hrtimer_interrupt(raw_smp_processor_id(),
++			    ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
++				timer->praecox : hrtimer_get_expires(timer),
++				basenow)),
++			    current,
++			    timer->function == hrtimer_wakeup ?
++			    container_of(timer, struct hrtimer_sleeper,
++				timer)->task : NULL);
++
+ 			/*
+ 			 * The immediate goal for using the softexpires is
+ 			 * minimizing wakeups, not running timers at the
+--- a/kernel/trace/Kconfig
++++ b/kernel/trace/Kconfig
+@@ -202,6 +202,24 @@ config IRQSOFF_TRACER
+ 	  enabled. This option and the preempt-off timing option can be
+ 	  used together or separately.)
+ 
++config INTERRUPT_OFF_HIST
++	bool "Interrupts-off Latency Histogram"
++	depends on IRQSOFF_TRACER
++	help
++	  This option generates continuously updated histograms (one per cpu)
++	  of the duration of time periods with interrupts disabled. The
++	  histograms are disabled by default. To enable them, write a non-zero
++	  number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
++
++	  If PREEMPT_OFF_HIST is also selected, additional histograms (one
++	  per cpu) are generated that accumulate the duration of time periods
++	  when both interrupts and preemption are disabled. The histogram data
++	  will be located in the debug file system at
++
++	      /sys/kernel/debug/tracing/latency_hist/irqsoff
++
+ config PREEMPT_TRACER
+ 	bool "Preemption-off Latency Tracer"
+ 	default n
+@@ -224,6 +242,24 @@ config PREEMPT_TRACER
+ 	  enabled. This option and the irqs-off timing option can be
+ 	  used together or separately.)
+ 
++config PREEMPT_OFF_HIST
++	bool "Preemption-off Latency Histogram"
++	depends on PREEMPT_TRACER
++	help
++	  This option generates continuously updated histograms (one per cpu)
++	  of the duration of time periods with preemption disabled. The
++	  histograms are disabled by default. To enable them, write a non-zero
++	  number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
++
++	  If INTERRUPT_OFF_HIST is also selected, additional histograms (one
++	  per cpu) are generated that accumulate the duration of time periods
++	  when both interrupts and preemption are disabled. The histogram data
++	  will be located in the debug file system at
++
++	      /sys/kernel/debug/tracing/latency_hist/preemptoff
++
+ config SCHED_TRACER
+ 	bool "Scheduling Latency Tracer"
+ 	select GENERIC_TRACER
+@@ -233,6 +269,74 @@ config SCHED_TRACER
+ 	  This tracer tracks the latency of the highest priority task
+ 	  to be scheduled in, starting from the point it has woken up.
+ 
++config WAKEUP_LATENCY_HIST
++	bool "Scheduling Latency Histogram"
++	depends on SCHED_TRACER
++	help
++	  This option generates continuously updated histograms (one per cpu)
++	  of the scheduling latency of the highest priority task.
++	  The histograms are disabled by default. To enable them, write a
++	  non-zero number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/wakeup
++
++	  Two different algorithms are used, one to determine the latency of
++	  processes that exclusively use the highest priority of the system and
++	  another one to determine the latency of processes that share the
++	  highest system priority with other processes. The former is used to
++	  improve hardware and system software, the latter to optimize the
++	  priority design of a given system. The histogram data will be
++	  located in the debug file system at
++
++	      /sys/kernel/debug/tracing/latency_hist/wakeup
++
++	  and
++
++	      /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
++
++	  If both Scheduling Latency Histogram and Missed Timer Offsets
++	  Histogram are selected, additional histogram data will be collected
++	  that contain, in addition to the wakeup latency, the timer latency, in
++	  case the wakeup was triggered by an expired timer. These histograms
++	  are available in the
++
++	      /sys/kernel/debug/tracing/latency_hist/timerandwakeup
++
++	  directory. They reflect the apparent interrupt and scheduling latency
++	  and are best suitable to determine the worst-case latency of a given
++	  system. To enable these histograms, write a non-zero number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
++
++config MISSED_TIMER_OFFSETS_HIST
++	depends on HIGH_RES_TIMERS
++	select GENERIC_TRACER
++	bool "Missed Timer Offsets Histogram"
++	help
++	  Generate a histogram of missed timer offsets in microseconds. The
++	  histograms are disabled by default. To enable them, write a non-zero
++	  number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
++
++	  The histogram data will be located in the debug file system at
++
++	      /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
++
++	  If both Scheduling Latency Histogram and Missed Timer Offsets
++	  Histogram are selected, additional histogram data will be collected
++	  that contain, in addition to the wakeup latency, the timer latency, in
++	  case the wakeup was triggered by an expired timer. These histograms
++	  are available in the
++
++	      /sys/kernel/debug/tracing/latency_hist/timerandwakeup
++
++	  directory. They reflect the apparent interrupt and scheduling latency
++	  and are best suitable to determine the worst-case latency of a given
++	  system. To enable these histograms, write a non-zero number to
++
++	      /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
++
+ config ENABLE_DEFAULT_TRACERS
+ 	bool "Trace process context switches and events"
+ 	depends on !GENERIC_TRACER
+--- a/kernel/trace/Makefile
++++ b/kernel/trace/Makefile
+@@ -34,6 +34,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_f
+ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
+ obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
+ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
++obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
++obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
++obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
++obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
+ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
+ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+--- /dev/null
++++ b/kernel/trace/latency_hist.c
+@@ -0,0 +1,1176 @@
++/*
++ * kernel/trace/latency_hist.c
++ *
++ * Add support for histograms of preemption-off latency and
++ * interrupt-off latency and wakeup latency, it depends on
++ * Real-Time Preemption Support.
++ *
++ *  Copyright (C) 2005 MontaVista Software, Inc.
++ *  Yi Yang <yyang@ch.mvista.com>
++ *
++ *  Converted to work with the new latency tracer.
++ *  Copyright (C) 2008 Red Hat, Inc.
++ *    Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++#include <linux/module.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/percpu.h>
++#include <linux/kallsyms.h>
++#include <linux/uaccess.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <asm/atomic.h>
++#include <asm/div64.h>
++
++#include "trace.h"
++#include <trace/events/sched.h>
++
++#define NSECS_PER_USECS 1000L
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/hist.h>
++
++enum {
++	IRQSOFF_LATENCY = 0,
++	PREEMPTOFF_LATENCY,
++	PREEMPTIRQSOFF_LATENCY,
++	WAKEUP_LATENCY,
++	WAKEUP_LATENCY_SHAREDPRIO,
++	MISSED_TIMER_OFFSETS,
++	TIMERANDWAKEUP_LATENCY,
++	MAX_LATENCY_TYPE,
++};
++
++#define MAX_ENTRY_NUM 10240
++
++struct hist_data {
++	atomic_t hist_mode; /* 0 log, 1 don't log */
++	long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
++	long min_lat;
++	long max_lat;
++	unsigned long long below_hist_bound_samples;
++	unsigned long long above_hist_bound_samples;
++	long long accumulate_lat;
++	unsigned long long total_samples;
++	unsigned long long hist_array[MAX_ENTRY_NUM];
++};
++
++struct enable_data {
++	int latency_type;
++	int enabled;
++};
++
++static char *latency_hist_dir_root = "latency_hist";
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
++static char *irqsoff_hist_dir = "irqsoff";
++static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
++static DEFINE_PER_CPU(int, hist_irqsoff_counting);
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
++static char *preemptoff_hist_dir = "preemptoff";
++static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
++static DEFINE_PER_CPU(int, hist_preemptoff_counting);
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
++static char *preemptirqsoff_hist_dir = "preemptirqsoff";
++static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
++static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
++static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
++static struct enable_data preemptirqsoff_enabled_data = {
++	.latency_type = PREEMPTIRQSOFF_LATENCY,
++	.enabled = 0,
++};
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++struct maxlatproc_data {
++	char comm[FIELD_SIZEOF(struct task_struct, comm)];
++	char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
++	int pid;
++	int current_pid;
++	int prio;
++	int current_prio;
++	long latency;
++	long timeroffset;
++	cycle_t timestamp;
++};
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
++static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
++static char *wakeup_latency_hist_dir = "wakeup";
++static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
++static notrace void probe_wakeup_latency_hist_start(void *v,
++    struct task_struct *p, int success);
++static notrace void probe_wakeup_latency_hist_stop(void *v,
++    struct task_struct *prev, struct task_struct *next);
++static notrace void probe_sched_migrate_task(void *,
++    struct task_struct *task, int cpu);
++static struct enable_data wakeup_latency_enabled_data = {
++	.latency_type = WAKEUP_LATENCY,
++	.enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
++static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
++static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
++static DEFINE_PER_CPU(int, wakeup_sharedprio);
++static unsigned long wakeup_pid;
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
++static char *missed_timer_offsets_dir = "missed_timer_offsets";
++static notrace void probe_hrtimer_interrupt(void *v, int cpu,
++    long long offset, struct task_struct *curr, struct task_struct *task);
++static struct enable_data missed_timer_offsets_enabled_data = {
++	.latency_type = MISSED_TIMER_OFFSETS,
++	.enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
++static unsigned long missed_timer_offsets_pid;
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
++static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
++static struct enable_data timerandwakeup_enabled_data = {
++	.latency_type = TIMERANDWAKEUP_LATENCY,
++	.enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
++#endif
++
++void notrace latency_hist(int latency_type, int cpu, long latency,
++			  long timeroffset, cycle_t stop,
++			  struct task_struct *p)
++{
++	struct hist_data *my_hist;
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++	struct maxlatproc_data *mp = NULL;
++#endif
++
++	if (cpu < 0 || cpu >= NR_CPUS || latency_type < 0 ||
++	    latency_type >= MAX_LATENCY_TYPE)
++		return;
++
++	switch (latency_type) {
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	case IRQSOFF_LATENCY:
++		my_hist = &per_cpu(irqsoff_hist, cpu);
++		break;
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	case PREEMPTOFF_LATENCY:
++		my_hist = &per_cpu(preemptoff_hist, cpu);
++		break;
++#endif
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++	case PREEMPTIRQSOFF_LATENCY:
++		my_hist = &per_cpu(preemptirqsoff_hist, cpu);
++		break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++	case WAKEUP_LATENCY:
++		my_hist = &per_cpu(wakeup_latency_hist, cpu);
++		mp = &per_cpu(wakeup_maxlatproc, cpu);
++		break;
++	case WAKEUP_LATENCY_SHAREDPRIO:
++		my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
++		mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
++		break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	case MISSED_TIMER_OFFSETS:
++		my_hist = &per_cpu(missed_timer_offsets, cpu);
++		mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
++		break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++	case TIMERANDWAKEUP_LATENCY:
++		my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
++		mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
++		break;
++#endif
++
++	default:
++		return;
++	}
++
++	latency += my_hist->offset;
++
++	if (atomic_read(&my_hist->hist_mode) == 0)
++		return;
++
++	if (latency < 0 || latency >= MAX_ENTRY_NUM) {
++		if (latency < 0)
++			my_hist->below_hist_bound_samples++;
++		else
++			my_hist->above_hist_bound_samples++;
++	} else
++		my_hist->hist_array[latency]++;
++
++	if (unlikely(latency > my_hist->max_lat ||
++	    my_hist->min_lat == LONG_MAX)) {
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++		if (latency_type == WAKEUP_LATENCY ||
++		    latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
++		    latency_type == MISSED_TIMER_OFFSETS ||
++		    latency_type == TIMERANDWAKEUP_LATENCY) {
++			strncpy(mp->comm, p->comm, sizeof(mp->comm));
++			strncpy(mp->current_comm, current->comm,
++			    sizeof(mp->current_comm));
++			mp->pid = task_pid_nr(p);
++			mp->current_pid = task_pid_nr(current);
++			mp->prio = p->prio;
++			mp->current_prio = current->prio;
++			mp->latency = latency;
++			mp->timeroffset = timeroffset;
++			mp->timestamp = stop;
++		}
++#endif
++		my_hist->max_lat = latency;
++	}
++	if (unlikely(latency < my_hist->min_lat))
++		my_hist->min_lat = latency;
++	my_hist->total_samples++;
++	my_hist->accumulate_lat += latency;
++}
++
++static void *l_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t *index_ptr = NULL;
++	loff_t index = *pos;
++	struct hist_data *my_hist = m->private;
++
++	if (index == 0) {
++		char minstr[32], avgstr[32], maxstr[32];
++
++		atomic_dec(&my_hist->hist_mode);
++
++		if (likely(my_hist->total_samples)) {
++			long avg = (long) div64_s64(my_hist->accumulate_lat,
++			    my_hist->total_samples);
++			snprintf(minstr, sizeof(minstr), "%ld",
++			    my_hist->min_lat - my_hist->offset);
++			snprintf(avgstr, sizeof(avgstr), "%ld",
++			    avg - my_hist->offset);
++			snprintf(maxstr, sizeof(maxstr), "%ld",
++			    my_hist->max_lat - my_hist->offset);
++		} else {
++			strcpy(minstr, "<undef>");
++			strcpy(avgstr, minstr);
++			strcpy(maxstr, minstr);
++		}
++
++		seq_printf(m, "#Minimum latency: %s microseconds\n"
++			   "#Average latency: %s microseconds\n"
++			   "#Maximum latency: %s microseconds\n"
++			   "#Total samples: %llu\n"
++			   "#There are %llu samples lower than %ld"
++			   " microseconds.\n"
++			   "#There are %llu samples greater or equal"
++			   " than %ld microseconds.\n"
++			   "#usecs\t%16s\n",
++			   minstr, avgstr, maxstr,
++			   my_hist->total_samples,
++			   my_hist->below_hist_bound_samples,
++			   -my_hist->offset,
++			   my_hist->above_hist_bound_samples,
++			   MAX_ENTRY_NUM - my_hist->offset,
++			   "samples");
++	}
++	if (index < MAX_ENTRY_NUM) {
++		index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
++		if (index_ptr)
++			*index_ptr = index;
++	}
++
++	return index_ptr;
++}
++
++static void *l_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	loff_t *index_ptr = p;
++	struct hist_data *my_hist = m->private;
++
++	if (++*pos >= MAX_ENTRY_NUM) {
++		atomic_inc(&my_hist->hist_mode);
++		return NULL;
++	}
++	*index_ptr = *pos;
++	return index_ptr;
++}
++
++static void l_stop(struct seq_file *m, void *p)
++{
++	kfree(p);
++}
++
++static int l_show(struct seq_file *m, void *p)
++{
++	int index = *(loff_t *) p;
++	struct hist_data *my_hist = m->private;
++
++	seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
++	    my_hist->hist_array[index]);
++	return 0;
++}
++
++static struct seq_operations latency_hist_seq_op = {
++	.start = l_start,
++	.next  = l_next,
++	.stop  = l_stop,
++	.show  = l_show
++};
++
++static int latency_hist_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	ret = seq_open(file, &latency_hist_seq_op);
++	if (!ret) {
++		struct seq_file *seq = file->private_data;
++		seq->private = inode->i_private;
++	}
++	return ret;
++}
++
++static struct file_operations latency_hist_fops = {
++	.open = latency_hist_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release,
++};
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static void clear_maxlatprocdata(struct maxlatproc_data *mp)
++{
++	mp->comm[0] = mp->current_comm[0] = '\0';
++	mp->prio = mp->current_prio = mp->pid = mp->current_pid =
++	    mp->latency = mp->timeroffset = -1;
++	mp->timestamp = 0;
++}
++#endif
++
++static void hist_reset(struct hist_data *hist)
++{
++	atomic_dec(&hist->hist_mode);
++
++	memset(hist->hist_array, 0, sizeof(hist->hist_array));
++	hist->below_hist_bound_samples = 0ULL;
++	hist->above_hist_bound_samples = 0ULL;
++	hist->min_lat = LONG_MAX;
++	hist->max_lat = LONG_MIN;
++	hist->total_samples = 0ULL;
++	hist->accumulate_lat = 0LL;
++
++	atomic_inc(&hist->hist_mode);
++}
++
++static ssize_t
++latency_hist_reset(struct file *file, const char __user *a,
++		   size_t size, loff_t *off)
++{
++	int cpu;
++	struct hist_data *hist = NULL;
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++	struct maxlatproc_data *mp = NULL;
++#endif
++	off_t latency_type = (off_t) file->private_data;
++
++	for_each_online_cpu(cpu) {
++
++		switch (latency_type) {
++#ifdef CONFIG_PREEMPT_OFF_HIST
++		case PREEMPTOFF_LATENCY:
++			hist = &per_cpu(preemptoff_hist, cpu);
++			break;
++#endif
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++		case IRQSOFF_LATENCY:
++			hist = &per_cpu(irqsoff_hist, cpu);
++			break;
++#endif
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++		case PREEMPTIRQSOFF_LATENCY:
++			hist = &per_cpu(preemptirqsoff_hist, cpu);
++			break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++		case WAKEUP_LATENCY:
++			hist = &per_cpu(wakeup_latency_hist, cpu);
++			mp = &per_cpu(wakeup_maxlatproc, cpu);
++			break;
++		case WAKEUP_LATENCY_SHAREDPRIO:
++			hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
++			mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
++			break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++		case MISSED_TIMER_OFFSETS:
++			hist = &per_cpu(missed_timer_offsets, cpu);
++			mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
++			break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++		case TIMERANDWAKEUP_LATENCY:
++			hist = &per_cpu(timerandwakeup_latency_hist, cpu);
++			mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
++			break;
++#endif
++		}
++
++		hist_reset(hist);
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++		if (latency_type == WAKEUP_LATENCY ||
++		    latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
++		    latency_type == MISSED_TIMER_OFFSETS ||
++		    latency_type == TIMERANDWAKEUP_LATENCY)
++			clear_maxlatprocdata(mp);
++#endif
++	}
++
++	return size;
++}
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static ssize_t
++show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++	char buf[64];
++	int r;
++	unsigned long *this_pid = file->private_data;
++
++	r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
++
++static ssize_t do_pid(struct file *file, const char __user *ubuf,
++		      size_t cnt, loff_t *ppos)
++{
++	char buf[64];
++	unsigned long pid;
++	unsigned long *this_pid = file->private_data;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	buf[cnt] = '\0';
++
++	if (strict_strtoul(buf, 10, &pid))
++		return(-EINVAL);
++
++	*this_pid = pid;
++
++	return cnt;
++}
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static ssize_t
++show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++	int r;
++	struct maxlatproc_data *mp = file->private_data;
++	int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
++	unsigned long long t;
++	unsigned long usecs, secs;
++	char *buf;
++
++	if (mp->pid == -1 || mp->current_pid == -1) {
++		buf = "(none)\n";
++		return simple_read_from_buffer(ubuf, cnt, ppos, buf,
++		    strlen(buf));
++	}
++
++	buf = kmalloc(strmaxlen, GFP_KERNEL);
++	if (buf == NULL)
++		return -ENOMEM;
++
++	t = ns2usecs(mp->timestamp);
++	usecs = do_div(t, USEC_PER_SEC);
++	secs = (unsigned long) t;
++	r = snprintf(buf, strmaxlen,
++	    "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
++	    MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
++	    mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
++	    secs, usecs);
++	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++	kfree(buf);
++	return r;
++}
++#endif
++
++static ssize_t
++show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++	char buf[64];
++	struct enable_data *ed = file->private_data;
++	int r;
++
++	r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
++
++static ssize_t
++do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++	char buf[64];
++	long enable;
++	struct enable_data *ed = file->private_data;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	buf[cnt] = 0;
++
++	if (strict_strtol(buf, 10, &enable))
++		return(-EINVAL);
++
++	if ((enable && ed->enabled) || (!enable && !ed->enabled))
++		return cnt;
++
++	if (enable) {
++		int ret;
++
++		switch (ed->latency_type) {
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++		case PREEMPTIRQSOFF_LATENCY:
++			ret = register_trace_preemptirqsoff_hist(
++			    probe_preemptirqsoff_hist, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_preemptirqsoff_hist "
++				    "to trace_preemptirqsoff_hist\n");
++				return ret;
++			}
++			break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++		case WAKEUP_LATENCY:
++			ret = register_trace_sched_wakeup(
++			    probe_wakeup_latency_hist_start, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_wakeup_latency_hist_start "
++				    "to trace_sched_wakeup\n");
++				return ret;
++			}
++			ret = register_trace_sched_wakeup_new(
++			    probe_wakeup_latency_hist_start, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_wakeup_latency_hist_start "
++				    "to trace_sched_wakeup_new\n");
++				unregister_trace_sched_wakeup(
++				    probe_wakeup_latency_hist_start, NULL);
++				return ret;
++			}
++			ret = register_trace_sched_switch(
++			    probe_wakeup_latency_hist_stop, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_wakeup_latency_hist_stop "
++				    "to trace_sched_switch\n");
++				unregister_trace_sched_wakeup(
++				    probe_wakeup_latency_hist_start, NULL);
++				unregister_trace_sched_wakeup_new(
++				    probe_wakeup_latency_hist_start, NULL);
++				return ret;
++			}
++			ret = register_trace_sched_migrate_task(
++			    probe_sched_migrate_task, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_sched_migrate_task "
++				    "to trace_sched_migrate_task\n");
++				unregister_trace_sched_wakeup(
++				    probe_wakeup_latency_hist_start, NULL);
++				unregister_trace_sched_wakeup_new(
++				    probe_wakeup_latency_hist_start, NULL);
++				unregister_trace_sched_switch(
++				    probe_wakeup_latency_hist_stop, NULL);
++				return ret;
++			}
++			break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++		case MISSED_TIMER_OFFSETS:
++			ret = register_trace_hrtimer_interrupt(
++			    probe_hrtimer_interrupt, NULL);
++			if (ret) {
++				pr_info("wakeup trace: Couldn't assign "
++				    "probe_hrtimer_interrupt "
++				    "to trace_hrtimer_interrupt\n");
++				return ret;
++			}
++			break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++		case TIMERANDWAKEUP_LATENCY:
++			if (!wakeup_latency_enabled_data.enabled ||
++			    !missed_timer_offsets_enabled_data.enabled)
++				return -EINVAL;
++			break;
++#endif
++		default:
++			break;
++		}
++	} else {
++		switch (ed->latency_type) {
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++		case PREEMPTIRQSOFF_LATENCY:
++			{
++				int cpu;
++
++				unregister_trace_preemptirqsoff_hist(
++				    probe_preemptirqsoff_hist, NULL);
++				for_each_online_cpu(cpu) {
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++					per_cpu(hist_irqsoff_counting,
++					    cpu) = 0;
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++					per_cpu(hist_preemptoff_counting,
++					    cpu) = 0;
++#endif
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++					per_cpu(hist_preemptirqsoff_counting,
++					    cpu) = 0;
++#endif
++				}
++			}
++			break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++		case WAKEUP_LATENCY:
++			{
++				int cpu;
++
++				unregister_trace_sched_wakeup(
++				    probe_wakeup_latency_hist_start, NULL);
++				unregister_trace_sched_wakeup_new(
++				    probe_wakeup_latency_hist_start, NULL);
++				unregister_trace_sched_switch(
++				    probe_wakeup_latency_hist_stop, NULL);
++				unregister_trace_sched_migrate_task(
++				    probe_sched_migrate_task, NULL);
++
++				for_each_online_cpu(cpu) {
++					per_cpu(wakeup_task, cpu) = NULL;
++					per_cpu(wakeup_sharedprio, cpu) = 0;
++				}
++			}
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++			timerandwakeup_enabled_data.enabled = 0;
++#endif
++			break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++		case MISSED_TIMER_OFFSETS:
++			unregister_trace_hrtimer_interrupt(
++			    probe_hrtimer_interrupt, NULL);
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++			timerandwakeup_enabled_data.enabled = 0;
++#endif
++			break;
++#endif
++		default:
++			break;
++		}
++	}
++	ed->enabled = enable;
++	return cnt;
++}
++
++static const struct file_operations latency_hist_reset_fops = {
++	.open = tracing_open_generic,
++	.write = latency_hist_reset,
++};
++
++static const struct file_operations enable_fops = {
++	.open = tracing_open_generic,
++	.read = show_enable,
++	.write = do_enable,
++};
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static const struct file_operations pid_fops = {
++	.open = tracing_open_generic,
++	.read = show_pid,
++	.write = do_pid,
++};
++
++static const struct file_operations maxlatproc_fops = {
++	.open = tracing_open_generic,
++	.read = show_maxlatproc,
++};
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++static notrace void probe_preemptirqsoff_hist(void *v, int reason,
++    int starthist)
++{
++	int cpu = raw_smp_processor_id();
++	int time_set = 0;
++
++	if (starthist) {
++		cycle_t uninitialized_var(start);
++
++		if (!preempt_count() && !irqs_disabled())
++			return;
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++		if ((reason == IRQS_OFF || reason == TRACE_START) &&
++		    !per_cpu(hist_irqsoff_counting, cpu)) {
++			per_cpu(hist_irqsoff_counting, cpu) = 1;
++			start = ftrace_now(cpu);
++			time_set++;
++			per_cpu(hist_irqsoff_start, cpu) = start;
++		}
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++		if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
++		    !per_cpu(hist_preemptoff_counting, cpu)) {
++			per_cpu(hist_preemptoff_counting, cpu) = 1;
++			if (!(time_set++))
++				start = ftrace_now(cpu);
++			per_cpu(hist_preemptoff_start, cpu) = start;
++		}
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++		if (per_cpu(hist_irqsoff_counting, cpu) &&
++		    per_cpu(hist_preemptoff_counting, cpu) &&
++		    !per_cpu(hist_preemptirqsoff_counting, cpu)) {
++			per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
++			if (!time_set)
++				start = ftrace_now(cpu);
++			per_cpu(hist_preemptirqsoff_start, cpu) = start;
++		}
++#endif
++	} else {
++		cycle_t uninitialized_var(stop);
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++		if ((reason == IRQS_ON || reason == TRACE_STOP) &&
++		    per_cpu(hist_irqsoff_counting, cpu)) {
++			cycle_t start = per_cpu(hist_irqsoff_start, cpu);
++
++			stop = ftrace_now(cpu);
++			time_set++;
++			if (start) {
++				long latency = ((long) (stop - start)) /
++				    NSECS_PER_USECS;
++
++				latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
++				    stop, NULL);
++			}
++			per_cpu(hist_irqsoff_counting, cpu) = 0;
++		}
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++		if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
++		    per_cpu(hist_preemptoff_counting, cpu)) {
++			cycle_t start = per_cpu(hist_preemptoff_start, cpu);
++
++			if (!(time_set++))
++				stop = ftrace_now(cpu);
++			if (start) {
++				long latency = ((long) (stop - start)) /
++				    NSECS_PER_USECS;
++
++				latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
++				    0, stop, NULL);
++			}
++			per_cpu(hist_preemptoff_counting, cpu) = 0;
++		}
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++		if ((!per_cpu(hist_irqsoff_counting, cpu) ||
++		     !per_cpu(hist_preemptoff_counting, cpu)) &&
++		   per_cpu(hist_preemptirqsoff_counting, cpu)) {
++			cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
++
++			if (!time_set)
++				stop = ftrace_now(cpu);
++			if (start) {
++				long latency = ((long) (stop - start)) /
++				    NSECS_PER_USECS;
++
++				latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
++				    latency, 0, stop, NULL);
++			}
++			per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
++		}
++#endif
++	}
++}
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++static DEFINE_RAW_SPINLOCK(wakeup_lock);
++static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
++    int cpu)
++{
++	int old_cpu = task_cpu(task);
++
++	if (cpu != old_cpu) {
++		unsigned long flags;
++		struct task_struct *cpu_wakeup_task;
++
++		raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++		cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
++		if (task == cpu_wakeup_task) {
++			put_task_struct(cpu_wakeup_task);
++			per_cpu(wakeup_task, old_cpu) = NULL;
++			cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
++			get_task_struct(cpu_wakeup_task);
++		}
++
++		raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++	}
++}
++
++static notrace void probe_wakeup_latency_hist_start(void *v,
++    struct task_struct *p, int success)
++{
++	unsigned long flags;
++	struct task_struct *curr = current;
++	int cpu = task_cpu(p);
++	struct task_struct *cpu_wakeup_task;
++
++	raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++	cpu_wakeup_task = per_cpu(wakeup_task, cpu);
++
++	if (wakeup_pid) {
++		if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
++		    p->prio == curr->prio)
++			per_cpu(wakeup_sharedprio, cpu) = 1;
++		if (likely(wakeup_pid != task_pid_nr(p)))
++			goto out;
++	} else {
++		if (likely(!rt_task(p)) ||
++		    (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
++		    p->prio > curr->prio)
++			goto out;
++		if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
++		    p->prio == curr->prio)
++			per_cpu(wakeup_sharedprio, cpu) = 1;
++	}
++
++	if (cpu_wakeup_task)
++		put_task_struct(cpu_wakeup_task);
++	cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
++	get_task_struct(cpu_wakeup_task);
++	cpu_wakeup_task->preempt_timestamp_hist =
++		ftrace_now(raw_smp_processor_id());
++out:
++	raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++}
++
++static notrace void probe_wakeup_latency_hist_stop(void *v,
++    struct task_struct *prev, struct task_struct *next)
++{
++	unsigned long flags;
++	int cpu = task_cpu(next);
++	long latency;
++	cycle_t stop;
++	struct task_struct *cpu_wakeup_task;
++
++	raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++	cpu_wakeup_task = per_cpu(wakeup_task, cpu);
++
++	if (cpu_wakeup_task == NULL)
++		goto out;
++
++	/* Already running? */
++	if (unlikely(current == cpu_wakeup_task))
++		goto out_reset;
++
++	if (next != cpu_wakeup_task) {
++		if (next->prio < cpu_wakeup_task->prio)
++			goto out_reset;
++
++		if (next->prio == cpu_wakeup_task->prio)
++			per_cpu(wakeup_sharedprio, cpu) = 1;
++
++		goto out;
++	}
++
++	if (current->prio == cpu_wakeup_task->prio)
++		per_cpu(wakeup_sharedprio, cpu) = 1;
++
++	/*
++	 * The task we are waiting for is about to be switched to.
++	 * Calculate latency and store it in histogram.
++	 */
++	stop = ftrace_now(raw_smp_processor_id());
++
++	latency = ((long) (stop - next->preempt_timestamp_hist)) /
++	    NSECS_PER_USECS;
++
++	if (per_cpu(wakeup_sharedprio, cpu)) {
++		latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
++		    next);
++		per_cpu(wakeup_sharedprio, cpu) = 0;
++	} else {
++		latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++		if (timerandwakeup_enabled_data.enabled) {
++			latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
++			    next->timer_offset + latency, next->timer_offset,
++			    stop, next);
++		}
++#endif
++	}
++
++out_reset:
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	next->timer_offset = 0;
++#endif
++	put_task_struct(cpu_wakeup_task);
++	per_cpu(wakeup_task, cpu) = NULL;
++out:
++	raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++}
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++static notrace void probe_hrtimer_interrupt(void *v, int cpu,
++    long long latency_ns, struct task_struct *curr, struct task_struct *task)
++{
++	if (latency_ns <= 0 && task != NULL && rt_task(task) &&
++	    (task->prio < curr->prio ||
++	    (task->prio == curr->prio &&
++	    !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
++		long latency;
++		cycle_t now;
++
++		if (missed_timer_offsets_pid) {
++			if (likely(missed_timer_offsets_pid !=
++			    task_pid_nr(task)))
++				return;
++		}
++
++		now = ftrace_now(cpu);
++		latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
++		latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
++		    task);
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++		task->timer_offset = latency;
++#endif
++	}
++}
++#endif
++
++static __init int latency_hist_init(void)
++{
++	struct dentry *latency_hist_root = NULL;
++	struct dentry *dentry;
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++	struct dentry *dentry_sharedprio;
++#endif
++	struct dentry *entry;
++	struct dentry *enable_root;
++	int i = 0;
++	struct hist_data *my_hist;
++	char name[64];
++	char *cpufmt = "CPU%d";
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++	char *cpufmt_maxlatproc = "max_latency-CPU%d";
++	struct maxlatproc_data *mp = NULL;
++#endif
++
++	dentry = tracing_init_dentry();
++	latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
++	enable_root = debugfs_create_dir("enable", latency_hist_root);
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(irqsoff_hist, i), &latency_hist_fops);
++		my_hist = &per_cpu(irqsoff_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++	}
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	dentry = debugfs_create_dir(preemptoff_hist_dir,
++	    latency_hist_root);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(preemptoff_hist, i), &latency_hist_fops);
++		my_hist = &per_cpu(preemptoff_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++	}
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++	dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
++	    latency_hist_root);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
++		my_hist = &per_cpu(preemptirqsoff_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++	}
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++	entry = debugfs_create_file("preemptirqsoff", 0644,
++	    enable_root, (void *)&preemptirqsoff_enabled_data,
++	    &enable_fops);
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++	dentry = debugfs_create_dir(wakeup_latency_hist_dir,
++	    latency_hist_root);
++	dentry_sharedprio = debugfs_create_dir(
++	    wakeup_latency_hist_dir_sharedprio, dentry);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(wakeup_latency_hist, i),
++		    &latency_hist_fops);
++		my_hist = &per_cpu(wakeup_latency_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++
++		entry = debugfs_create_file(name, 0444, dentry_sharedprio,
++		    &per_cpu(wakeup_latency_hist_sharedprio, i),
++		    &latency_hist_fops);
++		my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++
++		sprintf(name, cpufmt_maxlatproc, i);
++
++		mp = &per_cpu(wakeup_maxlatproc, i);
++		entry = debugfs_create_file(name, 0444, dentry, mp,
++		    &maxlatproc_fops);
++		clear_maxlatprocdata(mp);
++
++		mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
++		entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
++		    &maxlatproc_fops);
++		clear_maxlatprocdata(mp);
++	}
++	entry = debugfs_create_file("pid", 0644, dentry,
++	    (void *)&wakeup_pid, &pid_fops);
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
++	entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
++	    (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
++	entry = debugfs_create_file("wakeup", 0644,
++	    enable_root, (void *)&wakeup_latency_enabled_data,
++	    &enable_fops);
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++	dentry = debugfs_create_dir(missed_timer_offsets_dir,
++	    latency_hist_root);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
++		my_hist = &per_cpu(missed_timer_offsets, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++
++		sprintf(name, cpufmt_maxlatproc, i);
++		mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
++		entry = debugfs_create_file(name, 0444, dentry, mp,
++		    &maxlatproc_fops);
++		clear_maxlatprocdata(mp);
++	}
++	entry = debugfs_create_file("pid", 0644, dentry,
++	    (void *)&missed_timer_offsets_pid, &pid_fops);
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
++	entry = debugfs_create_file("missed_timer_offsets", 0644,
++	    enable_root, (void *)&missed_timer_offsets_enabled_data,
++	    &enable_fops);
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++    defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++	dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
++	    latency_hist_root);
++	for_each_possible_cpu(i) {
++		sprintf(name, cpufmt, i);
++		entry = debugfs_create_file(name, 0444, dentry,
++		    &per_cpu(timerandwakeup_latency_hist, i),
++		    &latency_hist_fops);
++		my_hist = &per_cpu(timerandwakeup_latency_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = LONG_MAX;
++
++		sprintf(name, cpufmt_maxlatproc, i);
++		mp = &per_cpu(timerandwakeup_maxlatproc, i);
++		entry = debugfs_create_file(name, 0444, dentry, mp,
++		    &maxlatproc_fops);
++		clear_maxlatprocdata(mp);
++	}
++	entry = debugfs_create_file("reset", 0644, dentry,
++	    (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
++	entry = debugfs_create_file("timerandwakeup", 0644,
++	    enable_root, (void *)&timerandwakeup_enabled_data,
++	    &enable_fops);
++#endif
++	return 0;
++}
++
++__initcall(latency_hist_init);
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -17,6 +17,7 @@
+ #include <linux/fs.h>
+ 
+ #include "trace.h"
++#include <trace/events/hist.h>
+ 
+ static struct trace_array		*irqsoff_trace __read_mostly;
+ static int				tracer_enabled __read_mostly;
+@@ -438,11 +439,13 @@ void start_critical_timings(void)
+ {
+ 	if (preempt_trace() || irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
++	trace_preemptirqsoff_hist(TRACE_START, 1);
+ }
+ EXPORT_SYMBOL_GPL(start_critical_timings);
+ 
+ void stop_critical_timings(void)
+ {
++	trace_preemptirqsoff_hist(TRACE_STOP, 0);
+ 	if (preempt_trace() || irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -452,6 +455,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings)
+ #ifdef CONFIG_PROVE_LOCKING
+ void time_hardirqs_on(unsigned long a0, unsigned long a1)
+ {
++	trace_preemptirqsoff_hist(IRQS_ON, 0);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(a0, a1);
+ }
+@@ -460,6 +464,7 @@ void time_hardirqs_off(unsigned long a0,
+ {
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(a0, a1);
++	trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+ 
+ #else /* !CONFIG_PROVE_LOCKING */
+@@ -485,6 +490,7 @@ inline void print_irqtrace_events(struct
+  */
+ void trace_hardirqs_on(void)
+ {
++	trace_preemptirqsoff_hist(IRQS_ON, 0);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -494,11 +500,13 @@ void trace_hardirqs_off(void)
+ {
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
++	trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_off);
+ 
+ void trace_hardirqs_on_caller(unsigned long caller_addr)
+ {
++	trace_preemptirqsoff_hist(IRQS_ON, 0);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, caller_addr);
+ }
+@@ -508,6 +516,7 @@ void trace_hardirqs_off_caller(unsigned
+ {
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, caller_addr);
++	trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_off_caller);
+ 
+@@ -517,12 +526,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller)
+ #ifdef CONFIG_PREEMPT_TRACER
+ void trace_preempt_on(unsigned long a0, unsigned long a1)
+ {
++	trace_preemptirqsoff_hist(PREEMPT_ON, 0);
+ 	if (preempt_trace() && !irq_trace())
+ 		stop_critical_timing(a0, a1);
+ }
+ 
+ void trace_preempt_off(unsigned long a0, unsigned long a1)
+ {
++	trace_preemptirqsoff_hist(PREEMPT_ON, 1);
+ 	if (preempt_trace() && !irq_trace())
+ 		start_critical_timing(a0, a1);
+ }
diff --git a/patches/lglocks-rt.patch b/patches/lglocks-rt.patch
new file mode 100644
index 0000000..f870ffc
--- /dev/null
+++ b/patches/lglocks-rt.patch
@@ -0,0 +1,173 @@
+Subject: lglocks-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 15 Jun 2011 11:02:21 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/lglock.h |   19 +++++++++++++++--
+ kernel/lglock.c        |   54 ++++++++++++++++++++++++++++++++-----------------
+ 2 files changed, 53 insertions(+), 20 deletions(-)
+
+--- a/include/linux/lglock.h
++++ b/include/linux/lglock.h
+@@ -42,22 +42,37 @@
+ #endif
+ 
+ struct lglock {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	arch_spinlock_t __percpu *lock;
++#else
++	struct rt_mutex __percpu *lock;
++#endif
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	struct lock_class_key lock_key;
+ 	struct lockdep_map    lock_dep_map;
+ #endif
+ };
+ 
+-#define DEFINE_LGLOCK(name)						\
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define DEFINE_LGLOCK(name)						\
+ 	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
+ 	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+ 	struct lglock name = { .lock = &name ## _lock }
+ 
+-#define DEFINE_STATIC_LGLOCK(name)					\
++# define DEFINE_STATIC_LGLOCK(name)					\
+ 	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
+ 	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+ 	static struct lglock name = { .lock = &name ## _lock }
++#else
++
++# define DEFINE_LGLOCK(name)						\
++	static DEFINE_PER_CPU(struct rt_mutex, name ## _lock);		\
++	struct lglock name = { .lock = &name ## _lock }
++
++# define DEFINE_STATIC_LGLOCK(name)					\
++	static DEFINE_PER_CPU(struct rt_mutex, name ## _lock);		\
++	static struct lglock name = { .lock = &name ## _lock }
++#endif
+ 
+ void lg_lock_init(struct lglock *lg, char *name);
+ void lg_local_lock(struct lglock *lg);
+--- a/kernel/lglock.c
++++ b/kernel/lglock.c
+@@ -4,6 +4,15 @@
+ #include <linux/cpu.h>
+ #include <linux/string.h>
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define lg_lock_ptr		arch_spinlock_t
++# define lg_do_lock(l)		arch_spin_lock(l)
++# define lg_do_unlock(l)	arch_spin_unlock(l)
++#else
++# define lg_lock_ptr		struct rt_mutex
++# define lg_do_lock(l)		__rt_spin_lock(l)
++# define lg_do_unlock(l)	__rt_spin_unlock(l)
++#endif
+ /*
+  * Note there is no uninit, so lglocks cannot be defined in
+  * modules (but it's fine to use them from there)
+@@ -12,51 +21,60 @@
+ 
+ void lg_lock_init(struct lglock *lg, char *name)
+ {
++#ifdef CONFIG_PREEMPT_RT_FULL
++	int i;
++
++	for_each_possible_cpu(i) {
++		struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
++
++		rt_mutex_init(lock);
++	}
++#endif
+ 	LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
+ }
+ EXPORT_SYMBOL(lg_lock_init);
+ 
+ void lg_local_lock(struct lglock *lg)
+ {
+-	arch_spinlock_t *lock;
++	lg_lock_ptr *lock;
+ 
+-	preempt_disable();
++	migrate_disable();
+ 	rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ 	lock = this_cpu_ptr(lg->lock);
+-	arch_spin_lock(lock);
++	lg_do_lock(lock);
+ }
+ EXPORT_SYMBOL(lg_local_lock);
+ 
+ void lg_local_unlock(struct lglock *lg)
+ {
+-	arch_spinlock_t *lock;
++	lg_lock_ptr *lock;
+ 
+ 	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ 	lock = this_cpu_ptr(lg->lock);
+-	arch_spin_unlock(lock);
+-	preempt_enable();
++	lg_do_unlock(lock);
++	migrate_enable();
+ }
+ EXPORT_SYMBOL(lg_local_unlock);
+ 
+ void lg_local_lock_cpu(struct lglock *lg, int cpu)
+ {
+-	arch_spinlock_t *lock;
++	lg_lock_ptr *lock;
+ 
+-	preempt_disable();
++	preempt_disable_nort();
+ 	rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ 	lock = per_cpu_ptr(lg->lock, cpu);
+-	arch_spin_lock(lock);
++	lg_do_lock(lock);
+ }
+ EXPORT_SYMBOL(lg_local_lock_cpu);
+ 
+ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
+ {
+-	arch_spinlock_t *lock;
++	lg_lock_ptr *lock;
+ 
+ 	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ 	lock = per_cpu_ptr(lg->lock, cpu);
+-	arch_spin_unlock(lock);
+-	preempt_enable();
++	lg_do_unlock(lock);
++	preempt_enable_nort();
+ }
+ EXPORT_SYMBOL(lg_local_unlock_cpu);
+ 
+@@ -64,12 +82,12 @@ void lg_global_lock(struct lglock *lg)
+ {
+ 	int i;
+ 
+-	preempt_disable();
++	preempt_disable_nort();
+ 	rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ 	for_each_possible_cpu(i) {
+-		arch_spinlock_t *lock;
++		lg_lock_ptr *lock;
+ 		lock = per_cpu_ptr(lg->lock, i);
+-		arch_spin_lock(lock);
++		lg_do_lock(lock);
+ 	}
+ }
+ EXPORT_SYMBOL(lg_global_lock);
+@@ -80,10 +98,10 @@ void lg_global_unlock(struct lglock *lg)
+ 
+ 	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ 	for_each_possible_cpu(i) {
+-		arch_spinlock_t *lock;
++		lg_lock_ptr *lock;
+ 		lock = per_cpu_ptr(lg->lock, i);
+-		arch_spin_unlock(lock);
++		lg_do_unlock(lock);
+ 	}
+-	preempt_enable();
++	preempt_enable_nort();
+ }
+ EXPORT_SYMBOL(lg_global_unlock);
diff --git a/patches/list-add-list-last-entry.patch b/patches/list-add-list-last-entry.patch
new file mode 100644
index 0000000..60a4798
--- /dev/null
+++ b/patches/list-add-list-last-entry.patch
@@ -0,0 +1,29 @@
+Subject: list-add-list-last-entry.patch
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 21 Jun 2011 11:22:36 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/list.h |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -362,6 +362,17 @@ static inline void list_splice_tail_init
+ 	list_entry((ptr)->next, type, member)
+ 
+ /**
++ * list_last_entry - get the last element from a list
++ * @ptr:	the list head to take the element from.
++ * @type:	the type of the struct this is embedded in.
++ * @member:	the name of the list_struct within the struct.
++ *
++ * Note, that list is expected to be not empty.
++ */
++#define list_last_entry(ptr, type, member) \
++	list_entry((ptr)->prev, type, member)
++
++/**
+  * list_for_each	-	iterate over a list
+  * @pos:	the &struct list_head to use as a loop cursor.
+  * @head:	the head for your list.
diff --git a/patches/local-irq-rt-depending-variants.patch b/patches/local-irq-rt-depending-variants.patch
new file mode 100644
index 0000000..e53e0b4
--- /dev/null
+++ b/patches/local-irq-rt-depending-variants.patch
@@ -0,0 +1,52 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Jul 2009 22:34:14 +0200
+Subject: rt: local_irq_* variants depending on RT/!RT
+
+Add local_irq_*_(no)rt variant which are mainly used to break
+interrupt disabled sections on PREEMPT_RT or to explicitely disable
+interrupts on PREEMPT_RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/interrupt.h |    2 +-
+ include/linux/irqflags.h  |   19 +++++++++++++++++++
+ 2 files changed, 20 insertions(+), 1 deletion(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -211,7 +211,7 @@ extern void devm_free_irq(struct device
+ #ifdef CONFIG_LOCKDEP
+ # define local_irq_enable_in_hardirq()	do { } while (0)
+ #else
+-# define local_irq_enable_in_hardirq()	local_irq_enable()
++# define local_irq_enable_in_hardirq()	local_irq_enable_nort()
+ #endif
+ 
+ extern void disable_irq_nosync(unsigned int irq);
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -147,4 +147,23 @@
+ 
+ #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
+ 
++/*
++ * local_irq* variants depending on RT/!RT
++ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define local_irq_disable_nort()	do { } while (0)
++# define local_irq_enable_nort()	do { } while (0)
++# define local_irq_save_nort(flags)	do { local_save_flags(flags); } while (0)
++# define local_irq_restore_nort(flags)	do { (void)(flags); } while (0)
++# define local_irq_disable_rt()		local_irq_disable()
++# define local_irq_enable_rt()		local_irq_enable()
++#else
++# define local_irq_disable_nort()	local_irq_disable()
++# define local_irq_enable_nort()	local_irq_enable()
++# define local_irq_save_nort(flags)	local_irq_save(flags)
++# define local_irq_restore_nort(flags)	local_irq_restore(flags)
++# define local_irq_disable_rt()		do { } while (0)
++# define local_irq_enable_rt()		do { } while (0)
++#endif
++
+ #endif
diff --git a/patches/local-var.patch b/patches/local-var.patch
new file mode 100644
index 0000000..e39a312
--- /dev/null
+++ b/patches/local-var.patch
@@ -0,0 +1,23 @@
+Subject: local-var.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 24 Jun 2011 18:40:37 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/percpu.h |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/include/linux/percpu.h
++++ b/include/linux/percpu.h
+@@ -48,6 +48,11 @@
+ 	preempt_enable();				\
+ } while (0)
+ 
++#define get_local_var(var)	get_cpu_var(var)
++#define put_local_var(var)	put_cpu_var(var)
++#define get_local_ptr(var)	get_cpu_ptr(var)
++#define put_local_ptr(var)	put_cpu_ptr(var)
++
+ /* minimum unit size, also is the maximum supported allocation size */
+ #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
+ 
diff --git a/patches/local-vars-migrate-disable.patch b/patches/local-vars-migrate-disable.patch
new file mode 100644
index 0000000..ebd6557
--- /dev/null
+++ b/patches/local-vars-migrate-disable.patch
@@ -0,0 +1,46 @@
+Subject: local-vars-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 20:42:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/percpu.h |   28 ++++++++++++++++++++++++----
+ 1 file changed, 24 insertions(+), 4 deletions(-)
+
+--- a/include/linux/percpu.h
++++ b/include/linux/percpu.h
+@@ -48,10 +48,30 @@
+ 	preempt_enable();				\
+ } while (0)
+ 
+-#define get_local_var(var)	get_cpu_var(var)
+-#define put_local_var(var)	put_cpu_var(var)
+-#define get_local_ptr(var)	get_cpu_ptr(var)
+-#define put_local_ptr(var)	put_cpu_ptr(var)
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define get_local_var(var)	get_cpu_var(var)
++# define put_local_var(var)	put_cpu_var(var)
++# define get_local_ptr(var)	get_cpu_ptr(var)
++# define put_local_ptr(var)	put_cpu_ptr(var)
++#else
++# define get_local_var(var) (*({			\
++	migrate_disable();				\
++	&__get_cpu_var(var); }))
++
++# define put_local_var(var) do {			\
++	(void)&(var);					\
++	migrate_enable();				\
++} while (0)
++
++# define get_local_ptr(var) ({				\
++	migrate_disable();				\
++	this_cpu_ptr(var); })
++
++# define put_local_ptr(var) do {			\
++	(void)(var);					\
++	migrate_enable();				\
++} while (0)
++#endif
+ 
+ /* minimum unit size, also is the maximum supported allocation size */
+ #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
diff --git a/patches/localversion.patch b/patches/localversion.patch
new file mode 100644
index 0000000..56edefb
--- /dev/null
+++ b/patches/localversion.patch
@@ -0,0 +1,15 @@
+Subject: localversion.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 08 Jul 2011 20:25:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-8vdw4bfcsds27cvox6rpb334@git.kernel.org
+---
+ localversion-rt |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- /dev/null
++++ b/localversion-rt
+@@ -0,0 +1 @@
++-rt1
diff --git a/patches/lockdep-no-softirq-accounting-on-rt.patch b/patches/lockdep-no-softirq-accounting-on-rt.patch
new file mode 100644
index 0000000..dd739db
--- /dev/null
+++ b/patches/lockdep-no-softirq-accounting-on-rt.patch
@@ -0,0 +1,56 @@
+Subject: lockdep-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 18:51:23 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/irqflags.h |   10 +++++++---
+ kernel/lockdep.c         |    2 ++
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -25,8 +25,6 @@
+ # define trace_softirqs_enabled(p)	((p)->softirqs_enabled)
+ # define trace_hardirq_enter()	do { current->hardirq_context++; } while (0)
+ # define trace_hardirq_exit()	do { current->hardirq_context--; } while (0)
+-# define lockdep_softirq_enter()	do { current->softirq_context++; } while (0)
+-# define lockdep_softirq_exit()	do { current->softirq_context--; } while (0)
+ # define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
+ #else
+ # define trace_hardirqs_on()		do { } while (0)
+@@ -39,9 +37,15 @@
+ # define trace_softirqs_enabled(p)	0
+ # define trace_hardirq_enter()		do { } while (0)
+ # define trace_hardirq_exit()		do { } while (0)
++# define INIT_TRACE_IRQFLAGS
++#endif
++
++#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
++# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
++# define lockdep_softirq_exit()	 do { current->softirq_context--; } while (0)
++#else
+ # define lockdep_softirq_enter()	do { } while (0)
+ # define lockdep_softirq_exit()		do { } while (0)
+-# define INIT_TRACE_IRQFLAGS
+ #endif
+ 
+ #if defined(CONFIG_IRQSOFF_TRACER) || \
+--- a/kernel/lockdep.c
++++ b/kernel/lockdep.c
+@@ -3534,6 +3534,7 @@ static void check_flags(unsigned long fl
+ 		}
+ 	}
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	/*
+ 	 * We dont accurately track softirq state in e.g.
+ 	 * hardirq contexts (such as on 4KSTACKS), so only
+@@ -3548,6 +3549,7 @@ static void check_flags(unsigned long fl
+ 			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ 		}
+ 	}
++#endif
+ 
+ 	if (!debug_locks)
+ 		print_irqtrace_events(current);
diff --git a/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch b/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch
new file mode 100644
index 0000000..8edb4d8
--- /dev/null
+++ b/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch
@@ -0,0 +1,90 @@
+Subject: lockdep: Selftest: convert spinlock to raw spinlock
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Mon, 16 Apr 2012 15:01:55 +0800
+
+From: Yong Zhang <yong.zhang@windriver.com>
+
+spinlock is sleepable on -rt and can not be used in
+interrupt context.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Yong Zhang <yong.zhang@windriver.com>
+Link: http://lkml.kernel.org/r/1334559716-18447-2-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/locking-selftest.c |   34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_de
+  * Normal standalone locks, for the circular and irq-context
+  * dependency tests:
+  */
+-static DEFINE_SPINLOCK(lock_A);
+-static DEFINE_SPINLOCK(lock_B);
+-static DEFINE_SPINLOCK(lock_C);
+-static DEFINE_SPINLOCK(lock_D);
++static DEFINE_RAW_SPINLOCK(lock_A);
++static DEFINE_RAW_SPINLOCK(lock_B);
++static DEFINE_RAW_SPINLOCK(lock_C);
++static DEFINE_RAW_SPINLOCK(lock_D);
+ 
+ static DEFINE_RWLOCK(rwlock_A);
+ static DEFINE_RWLOCK(rwlock_B);
+@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
+  * but X* and Y* are different classes. We do this so that
+  * we do not trigger a real lockup:
+  */
+-static DEFINE_SPINLOCK(lock_X1);
+-static DEFINE_SPINLOCK(lock_X2);
+-static DEFINE_SPINLOCK(lock_Y1);
+-static DEFINE_SPINLOCK(lock_Y2);
+-static DEFINE_SPINLOCK(lock_Z1);
+-static DEFINE_SPINLOCK(lock_Z2);
++static DEFINE_RAW_SPINLOCK(lock_X1);
++static DEFINE_RAW_SPINLOCK(lock_X2);
++static DEFINE_RAW_SPINLOCK(lock_Y1);
++static DEFINE_RAW_SPINLOCK(lock_Y2);
++static DEFINE_RAW_SPINLOCK(lock_Z1);
++static DEFINE_RAW_SPINLOCK(lock_Z2);
+ 
+ static DEFINE_RWLOCK(rwlock_X1);
+ static DEFINE_RWLOCK(rwlock_X2);
+@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
+  */
+ #define INIT_CLASS_FUNC(class) 				\
+ static noinline void					\
+-init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
+-		 struct rw_semaphore *rwsem)		\
++init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
++	struct mutex *mutex, struct rw_semaphore *rwsem)\
+ {							\
+-	spin_lock_init(lock);				\
++	raw_spin_lock_init(lock);			\
+ 	rwlock_init(rwlock);				\
+ 	mutex_init(mutex);				\
+ 	init_rwsem(rwsem);				\
+@@ -168,10 +168,10 @@ static void init_shared_classes(void)
+  * Shortcuts for lock/unlock API variants, to keep
+  * the testcases compact:
+  */
+-#define L(x)			spin_lock(&lock_##x)
+-#define U(x)			spin_unlock(&lock_##x)
++#define L(x)			raw_spin_lock(&lock_##x)
++#define U(x)			raw_spin_unlock(&lock_##x)
+ #define LU(x)			L(x); U(x)
+-#define SI(x)			spin_lock_init(&lock_##x)
++#define SI(x)			raw_spin_lock_init(&lock_##x)
+ 
+ #define WL(x)			write_lock(&rwlock_##x)
+ #define WU(x)			write_unlock(&rwlock_##x)
+@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
+ 
+ #define I2(x)					\
+ 	do {					\
+-		spin_lock_init(&lock_##x);	\
++		raw_spin_lock_init(&lock_##x);	\
+ 		rwlock_init(&rwlock_##x);	\
+ 		mutex_init(&mutex_##x);		\
+ 		init_rwsem(&rwsem_##x);		\
diff --git a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch
new file mode 100644
index 0000000..f476b56
--- /dev/null
+++ b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch
@@ -0,0 +1,56 @@
+Subject: lockdep: Selftest: Only do hardirq context test for raw spinlock
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Mon, 16 Apr 2012 15:01:56 +0800
+
+From: Yong Zhang <yong.zhang@windriver.com>
+
+On -rt there is no softirq context any more and rwlock is sleepable,
+disable softirq context test and rwlock+irq test.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Yong Zhang <yong.zhang@windriver.com>
+Link: http://lkml.kernel.org/r/1334559716-18447-3-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/locking-selftest.c |   23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -1175,6 +1175,7 @@ void locking_selftest(void)
+ 
+ 	printk("  --------------------------------------------------------------------------\n");
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	/*
+ 	 * irq-context testcases:
+ 	 */
+@@ -1187,6 +1188,28 @@ void locking_selftest(void)
+ 
+ 	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+ //	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
++#else
++	/* On -rt, we only do hardirq context test for raw spinlock */
++	DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
++	DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
++
++	DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
++	DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
++
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
++
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
++	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
++#endif
+ 
+ 	if (unexpected_testcase_failures) {
+ 		printk("-----------------------------------------------------------------\n");
diff --git a/patches/locking-various-init-fixes.patch b/patches/locking-various-init-fixes.patch
new file mode 100644
index 0000000..9ccea4d
--- /dev/null
+++ b/patches/locking-various-init-fixes.patch
@@ -0,0 +1,74 @@
+Subject: locking-various-init-fixes.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:25:03 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/char/random.c        |    6 +++---
+ drivers/usb/chipidea/debug.c |    2 +-
+ fs/file.c                    |    2 +-
+ include/linux/idr.h          |    2 +-
+ 4 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -445,7 +445,7 @@ static struct entropy_store input_pool =
+ 	.poolinfo = &poolinfo_table[0],
+ 	.name = "input",
+ 	.limit = 1,
+-	.lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
++	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+ 	.pool = input_pool_data
+ };
+ 
+@@ -454,7 +454,7 @@ static struct entropy_store blocking_poo
+ 	.name = "blocking",
+ 	.limit = 1,
+ 	.pull = &input_pool,
+-	.lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
++	.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
+ 	.pool = blocking_pool_data
+ };
+ 
+@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_
+ 	.poolinfo = &poolinfo_table[1],
+ 	.name = "nonblocking",
+ 	.pull = &input_pool,
+-	.lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
++	.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
+ 	.pool = nonblocking_pool_data
+ };
+ 
+--- a/drivers/usb/chipidea/debug.c
++++ b/drivers/usb/chipidea/debug.c
+@@ -222,7 +222,7 @@ static struct {
+ } dbg_data = {
+ 	.idx = 0,
+ 	.tty = 0,
+-	.lck = __RW_LOCK_UNLOCKED(lck)
++	.lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
+ };
+ 
+ /**
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -516,7 +516,7 @@ struct files_struct init_files = {
+ 		.close_on_exec	= init_files.close_on_exec_init,
+ 		.open_fds	= init_files.open_fds_init,
+ 	},
+-	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
++	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
+ };
+ 
+ /*
+--- a/include/linux/idr.h
++++ b/include/linux/idr.h
+@@ -136,7 +136,7 @@ struct ida {
+ 	struct ida_bitmap	*free_bitmap;
+ };
+ 
+-#define IDA_INIT(name)		{ .idr = IDR_INIT(name), .free_bitmap = NULL, }
++#define IDA_INIT(name)		{ .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
+ #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
+ 
+ int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md-raid5-percpu-handling-rt-aware.patch
new file mode 100644
index 0000000..c4e56e5
--- /dev/null
+++ b/patches/md-raid5-percpu-handling-rt-aware.patch
@@ -0,0 +1,61 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 6 Apr 2010 16:51:31 +0200
+Subject: md: raid5: Make raid5_percpu handling RT aware
+
+__raid_run_ops() disables preemption with get_cpu() around the access
+to the raid5_percpu variables. That causes scheduling while atomic
+spews on RT.
+
+Serialize the access to the percpu data with a lock and keep the code
+preemptible.
+
+Reported-by: Udo van den Heuvel <udovdh@xs4all.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
+
+---
+ drivers/md/raid5.c |    7 +++++--
+ drivers/md/raid5.h |    1 +
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -1415,8 +1415,9 @@ static void __raid_run_ops(struct stripe
+ 	struct raid5_percpu *percpu;
+ 	unsigned long cpu;
+ 
+-	cpu = get_cpu();
++	cpu = get_cpu_light();
+ 	percpu = per_cpu_ptr(conf->percpu, cpu);
++	spin_lock(&percpu->lock);
+ 	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
+ 		ops_run_biofill(sh);
+ 		overlap_clear++;
+@@ -1468,7 +1469,8 @@ static void __raid_run_ops(struct stripe
+ 			if (test_and_clear_bit(R5_Overlap, &dev->flags))
+ 				wake_up(&sh->raid_conf->wait_for_overlap);
+ 		}
+-	put_cpu();
++	spin_unlock(&percpu->lock);
++	put_cpu_light();
+ }
+ 
+ #ifdef CONFIG_MULTICORE_RAID456
+@@ -5093,6 +5095,7 @@ static int raid5_alloc_percpu(struct r5c
+ 			break;
+ 		}
+ 		per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
++		spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
+ 	}
+ #ifdef CONFIG_HOTPLUG_CPU
+ 	conf->cpu_notify.notifier_call = raid456_cpu_notify;
+--- a/drivers/md/raid5.h
++++ b/drivers/md/raid5.h
+@@ -428,6 +428,7 @@ struct r5conf {
+ 	int			recovery_disabled;
+ 	/* per cpu variables */
+ 	struct raid5_percpu {
++		spinlock_t	lock;	     /* Protection for -RT */
+ 		struct page	*spare_page; /* Used when checking P/Q in raid6 */
+ 		void		*scribble;   /* space for constructing buffer
+ 					      * lists and performing address
diff --git a/patches/might-sleep-check-for-idle.patch b/patches/might-sleep-check-for-idle.patch
new file mode 100644
index 0000000..be17e41
--- /dev/null
+++ b/patches/might-sleep-check-for-idle.patch
@@ -0,0 +1,23 @@
+Subject: sched: Check for idle task in might_sleep()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 23:34:08 +0100
+
+Idle is not allowed to call sleeping functions ever!
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7391,7 +7391,8 @@ void __might_sleep(const char *file, int
+ 	static unsigned long prev_jiffy;	/* ratelimiting */
+ 
+ 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
+-	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
++	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
++	     !is_idle_task(current)) ||
+ 	    system_state != SYSTEM_RUNNING || oops_in_progress)
+ 		return;
+ 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
diff --git a/patches/migrate-disable-rt-variant.patch b/patches/migrate-disable-rt-variant.patch
new file mode 100644
index 0000000..68958de
--- /dev/null
+++ b/patches/migrate-disable-rt-variant.patch
@@ -0,0 +1,27 @@
+Subject: migrate-disable-rt-variant.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 19:48:20 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/preempt.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -121,11 +121,15 @@ extern void migrate_enable(void);
+ # define preempt_enable_rt()		preempt_enable()
+ # define preempt_disable_nort()		do { } while (0)
+ # define preempt_enable_nort()		do { } while (0)
++# define migrate_disable_rt()		migrate_disable()
++# define migrate_enable_rt()		migrate_enable()
+ #else
+ # define preempt_disable_rt()		do { } while (0)
+ # define preempt_enable_rt()		do { } while (0)
+ # define preempt_disable_nort()		preempt_disable()
+ # define preempt_enable_nort()		preempt_enable()
++# define migrate_disable_rt()		do { } while (0)
++# define migrate_enable_rt()		do { } while (0)
+ #endif
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/patches/mips-disable-highmem-on-rt.patch b/patches/mips-disable-highmem-on-rt.patch
new file mode 100644
index 0000000..eb1312b
--- /dev/null
+++ b/patches/mips-disable-highmem-on-rt.patch
@@ -0,0 +1,20 @@
+Subject: mips-disable-highmem-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:10:12 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/mips/Kconfig |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -2102,7 +2102,7 @@ config CPU_R4400_WORKAROUNDS
+ #
+ config HIGHMEM
+ 	bool "High Memory Support"
+-	depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM
++	depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL
+ 
+ config CPU_SUPPORTS_HIGHMEM
+ 	bool
diff --git a/patches/mips-enable-interrupts-in-signal.patch b/patches/mips-enable-interrupts-in-signal.patch
new file mode 100644
index 0000000..571c43a
--- /dev/null
+++ b/patches/mips-enable-interrupts-in-signal.patch
@@ -0,0 +1,19 @@
+Subject: mips-enable-interrupts-in-signal.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 21:32:10 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/mips/kernel/signal.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/mips/kernel/signal.c
++++ b/arch/mips/kernel/signal.c
+@@ -601,6 +601,7 @@ asmlinkage void do_notify_resume(struct
+ 	__u32 thread_info_flags)
+ {
+ 	local_irq_enable();
++	preempt_check_resched();
+ 
+ 	/* deal with pending signal delivery */
+ 	if (thread_info_flags & _TIF_SIGPENDING)
diff --git a/patches/mm-allow-slab-rt.patch b/patches/mm-allow-slab-rt.patch
new file mode 100644
index 0000000..864280a
--- /dev/null
+++ b/patches/mm-allow-slab-rt.patch
@@ -0,0 +1,29 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:44:03 -0500
+Subject: mm: Allow only slab on RT
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ init/Kconfig |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1493,6 +1493,7 @@ config SLAB
+ 
+ config SLUB
+ 	bool "SLUB (Unqueued Allocator)"
++	depends on !PREEMPT_RT_FULL
+ 	help
+ 	   SLUB is a slab allocator that minimizes cache line usage
+ 	   instead of managing queues of cached objects (SLAB approach).
+@@ -1504,6 +1505,7 @@ config SLUB
+ config SLOB
+ 	depends on EXPERT
+ 	bool "SLOB (Simple Allocator)"
++	depends on !PREEMPT_RT_FULL
+ 	help
+ 	   SLOB replaces the stock allocator with a drastically simpler
+ 	   allocator. SLOB is generally more space efficient but
diff --git a/patches/mm-bounce-local-irq-save-nort.patch b/patches/mm-bounce-local-irq-save-nort.patch
new file mode 100644
index 0000000..7a0219d
--- /dev/null
+++ b/patches/mm-bounce-local-irq-save-nort.patch
@@ -0,0 +1,27 @@
+Subject: mm: bounce: Use local_irq_save_nort
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 10:33:09 +0100
+
+kmap_atomic() is preemptible on RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/bounce.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/bounce.c
++++ b/mm/bounce.c
+@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_v
+ 	unsigned long flags;
+ 	unsigned char *vto;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	vto = kmap_atomic(to->bv_page);
+ 	memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+ 	kunmap_atomic(vto);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ 
+ #else /* CONFIG_HIGHMEM */
diff --git a/patches/mm-cgroup-page-bit-spinlock.patch b/patches/mm-cgroup-page-bit-spinlock.patch
new file mode 100644
index 0000000..3b3b3a7
--- /dev/null
+++ b/patches/mm-cgroup-page-bit-spinlock.patch
@@ -0,0 +1,91 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 19 Aug 2009 09:56:42 +0200
+Subject: mm: Replace cgroup_page bit spinlock
+
+Bit spinlocks are not working on RT. Replace them.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/page_cgroup.h |   15 +++++++++++++++
+ mm/page_cgroup.c            |   11 +++++++++++
+ 2 files changed, 26 insertions(+)
+
+--- a/include/linux/page_cgroup.h
++++ b/include/linux/page_cgroup.h
+@@ -24,6 +24,9 @@ enum {
+  */
+ struct page_cgroup {
+ 	unsigned long flags;
++#ifdef CONFIG_PREEMPT_RT_BASE
++	spinlock_t pcg_lock;
++#endif
+ 	struct mem_cgroup *mem_cgroup;
+ };
+ 
+@@ -74,12 +77,20 @@ static inline void lock_page_cgroup(stru
+ 	 * Don't take this lock in IRQ context.
+ 	 * This lock is for pc->mem_cgroup, USED, MIGRATION
+ 	 */
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_lock(PCG_LOCK, &pc->flags);
++#else
++	spin_lock(&pc->pcg_lock);
++#endif
+ }
+ 
+ static inline void unlock_page_cgroup(struct page_cgroup *pc)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	bit_spin_unlock(PCG_LOCK, &pc->flags);
++#else
++	spin_unlock(&pc->pcg_lock);
++#endif
+ }
+ 
+ #else /* CONFIG_MEMCG */
+@@ -102,6 +113,10 @@ static inline void __init page_cgroup_in
+ {
+ }
+ 
++static inline void page_cgroup_lock_init(struct page_cgroup *pc)
++{
++}
++
+ #endif /* CONFIG_MEMCG */
+ 
+ #include <linux/swap.h>
+--- a/mm/page_cgroup.c
++++ b/mm/page_cgroup.c
+@@ -13,6 +13,14 @@
+ 
+ static unsigned long total_usage;
+ 
++static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
++{
++#ifdef CONFIG_PREEMPT_RT_BASE
++	for (; nr_pages; nr_pages--, pc++)
++		spin_lock_init(&pc->pcg_lock);
++#endif
++}
++
+ #if !defined(CONFIG_SPARSEMEM)
+ 
+ 
+@@ -60,6 +68,7 @@ static int __init alloc_node_page_cgroup
+ 		return -ENOMEM;
+ 	NODE_DATA(nid)->node_page_cgroup = base;
+ 	total_usage += table_size;
++	page_cgroup_lock_init(base, nr_pages);
+ 	return 0;
+ }
+ 
+@@ -150,6 +159,8 @@ static int __meminit init_section_page_c
+ 		return -ENOMEM;
+ 	}
+ 
++	page_cgroup_lock_init(base, PAGES_PER_SECTION);
++
+ 	/*
+ 	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
+ 	 * we need to apply a mask.
diff --git a/patches/mm-convert-swap-to-percpu-locked.patch b/patches/mm-convert-swap-to-percpu-locked.patch
new file mode 100644
index 0000000..0e8ac43
--- /dev/null
+++ b/patches/mm-convert-swap-to-percpu-locked.patch
@@ -0,0 +1,113 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:51 -0500
+Subject: mm: convert swap to percpu locked
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/swap.c |   30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -30,6 +30,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/memcontrol.h>
+ #include <linux/gfp.h>
++#include <linux/locallock.h>
+ 
+ #include "internal.h"
+ 
+@@ -40,6 +41,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_
+ static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
+ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+ 
++static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
++static DEFINE_LOCAL_IRQ_LOCK(swap_lock);
++
+ /*
+  * This path almost never happens for VM activity - pages are normally
+  * freed via pagevecs.  But it gets used by networking.
+@@ -354,11 +358,11 @@ void rotate_reclaimable_page(struct page
+ 		unsigned long flags;
+ 
+ 		page_cache_get(page);
+-		local_irq_save(flags);
++		local_lock_irqsave(rotate_lock, flags);
+ 		pvec = &__get_cpu_var(lru_rotate_pvecs);
+ 		if (!pagevec_add(pvec, page))
+ 			pagevec_move_tail(pvec);
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(rotate_lock, flags);
+ 	}
+ }
+ 
+@@ -403,12 +407,13 @@ static void activate_page_drain(int cpu)
+ void activate_page(struct page *page)
+ {
+ 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+-		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
++		struct pagevec *pvec = &get_locked_var(swap_lock,
++						       activate_page_pvecs);
+ 
+ 		page_cache_get(page);
+ 		if (!pagevec_add(pvec, page))
+ 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
+-		put_cpu_var(activate_page_pvecs);
++		put_locked_var(swap_lock, activate_page_pvecs);
+ 	}
+ }
+ 
+@@ -456,13 +461,13 @@ EXPORT_SYMBOL(mark_page_accessed);
+  */
+ void __lru_cache_add(struct page *page, enum lru_list lru)
+ {
+-	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
++	struct pagevec *pvec = &get_locked_var(swap_lock, lru_add_pvecs)[lru];
+ 
+ 	page_cache_get(page);
+ 	if (!pagevec_space(pvec))
+ 		__pagevec_lru_add(pvec, lru);
+ 	pagevec_add(pvec, page);
+-	put_cpu_var(lru_add_pvecs);
++	put_locked_var(swap_lock, lru_add_pvecs);
+ }
+ EXPORT_SYMBOL(__lru_cache_add);
+ 
+@@ -597,9 +602,9 @@ void lru_add_drain_cpu(int cpu)
+ 		unsigned long flags;
+ 
+ 		/* No harm done if a racing interrupt already did this */
+-		local_irq_save(flags);
++		local_lock_irqsave(rotate_lock, flags);
+ 		pagevec_move_tail(pvec);
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(rotate_lock, flags);
+ 	}
+ 
+ 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+@@ -627,18 +632,19 @@ void deactivate_page(struct page *page)
+ 		return;
+ 
+ 	if (likely(get_page_unless_zero(page))) {
+-		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
++		struct pagevec *pvec = &get_locked_var(swap_lock,
++						       lru_deactivate_pvecs);
+ 
+ 		if (!pagevec_add(pvec, page))
+ 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+-		put_cpu_var(lru_deactivate_pvecs);
++		put_locked_var(swap_lock, lru_deactivate_pvecs);
+ 	}
+ }
+ 
+ void lru_add_drain(void)
+ {
+-	lru_add_drain_cpu(get_cpu());
+-	put_cpu();
++	lru_add_drain_cpu(local_lock_cpu(swap_lock));
++	local_unlock_cpu(swap_lock);
+ }
+ 
+ static void lru_add_drain_per_cpu(struct work_struct *dummy)
diff --git a/patches/mm-enable-slub.patch b/patches/mm-enable-slub.patch
new file mode 100644
index 0000000..160b577
--- /dev/null
+++ b/patches/mm-enable-slub.patch
@@ -0,0 +1,402 @@
+Subject: mm: Enable SLUB for RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 25 Oct 2012 10:32:35 +0100
+
+Make SLUB RT aware and remove the restriction in Kconfig.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/slub_def.h |    2 
+ init/Kconfig             |    1 
+ mm/slub.c                |  119 +++++++++++++++++++++++++++++++++++------------
+ 3 files changed, 92 insertions(+), 30 deletions(-)
+
+--- a/include/linux/slub_def.h
++++ b/include/linux/slub_def.h
+@@ -54,7 +54,7 @@ struct kmem_cache_cpu {
+ };
+ 
+ struct kmem_cache_node {
+-	spinlock_t list_lock;	/* Protect partial list and nr_partial */
++	raw_spinlock_t list_lock;	/* Protect partial list and nr_partial */
+ 	unsigned long nr_partial;
+ 	struct list_head partial;
+ #ifdef CONFIG_SLUB_DEBUG
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1494,7 +1494,6 @@ config SLAB
+ 
+ config SLUB
+ 	bool "SLUB (Unqueued Allocator)"
+-	depends on !PREEMPT_RT_FULL
+ 	help
+ 	   SLUB is a slab allocator that minimizes cache line usage
+ 	   instead of managing queues of cached objects (SLAB approach).
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1070,7 +1070,7 @@ static noinline struct kmem_cache_node *
+ {
+ 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ 
+-	spin_lock_irqsave(&n->list_lock, *flags);
++	raw_spin_lock_irqsave(&n->list_lock, *flags);
+ 	slab_lock(page);
+ 
+ 	if (!check_slab(s, page))
+@@ -1118,7 +1118,7 @@ out:
+ 
+ fail:
+ 	slab_unlock(page);
+-	spin_unlock_irqrestore(&n->list_lock, *flags);
++	raw_spin_unlock_irqrestore(&n->list_lock, *flags);
+ 	slab_fix(s, "Object at 0x%p not freed", object);
+ 	return NULL;
+ }
+@@ -1253,6 +1253,12 @@ static inline void slab_free_hook(struct
+ 
+ #endif /* CONFIG_SLUB_DEBUG */
+ 
++struct slub_free_list {
++	raw_spinlock_t		lock;
++	struct list_head	list;
++};
++static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
++
+ /*
+  * Slab allocation and freeing
+  */
+@@ -1277,7 +1283,11 @@ static struct page *allocate_slab(struct
+ 
+ 	flags &= gfp_allowed_mask;
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++	if (system_state == SYSTEM_RUNNING)
++#else
+ 	if (flags & __GFP_WAIT)
++#endif
+ 		local_irq_enable();
+ 
+ 	flags |= s->allocflags;
+@@ -1317,7 +1327,11 @@ static struct page *allocate_slab(struct
+ 			kmemcheck_mark_unallocated_pages(page, pages);
+ 	}
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++	if (system_state == SYSTEM_RUNNING)
++#else
+ 	if (flags & __GFP_WAIT)
++#endif
+ 		local_irq_disable();
+ 	if (!page)
+ 		return NULL;
+@@ -1414,6 +1428,16 @@ static void __free_slab(struct kmem_cach
+ 	__free_memcg_kmem_pages(page, order);
+ }
+ 
++static void free_delayed(struct kmem_cache *s, struct list_head *h)
++{
++	while(!list_empty(h)) {
++		struct page *page = list_first_entry(h, struct page, lru);
++
++		list_del(&page->lru);
++		__free_slab(s, page);
++	}
++}
++
+ #define need_reserve_slab_rcu						\
+ 	(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
+ 
+@@ -1448,6 +1472,12 @@ static void free_slab(struct kmem_cache
+ 		}
+ 
+ 		call_rcu(head, rcu_free_slab);
++	} else if (irqs_disabled()) {
++		struct slub_free_list *f = &__get_cpu_var(slub_free_list);
++
++		raw_spin_lock(&f->lock);
++		list_add(&page->lru, &f->list);
++		raw_spin_unlock(&f->lock);
+ 	} else
+ 		__free_slab(s, page);
+ }
+@@ -1549,7 +1579,7 @@ static void *get_partial_node(struct kme
+ 	if (!n || !n->nr_partial)
+ 		return NULL;
+ 
+-	spin_lock(&n->list_lock);
++	raw_spin_lock(&n->list_lock);
+ 	list_for_each_entry_safe(page, page2, &n->partial, lru) {
+ 		void *t;
+ 		int available;
+@@ -1574,7 +1604,7 @@ static void *get_partial_node(struct kme
+ 			break;
+ 
+ 	}
+-	spin_unlock(&n->list_lock);
++	raw_spin_unlock(&n->list_lock);
+ 	return object;
+ }
+ 
+@@ -1816,7 +1846,7 @@ redo:
+ 			 * that acquire_slab() will see a slab page that
+ 			 * is frozen
+ 			 */
+-			spin_lock(&n->list_lock);
++			raw_spin_lock(&n->list_lock);
+ 		}
+ 	} else {
+ 		m = M_FULL;
+@@ -1827,7 +1857,7 @@ redo:
+ 			 * slabs from diagnostic functions will not see
+ 			 * any frozen slabs.
+ 			 */
+-			spin_lock(&n->list_lock);
++			raw_spin_lock(&n->list_lock);
+ 		}
+ 	}
+ 
+@@ -1862,7 +1892,7 @@ redo:
+ 		goto redo;
+ 
+ 	if (lock)
+-		spin_unlock(&n->list_lock);
++		raw_spin_unlock(&n->list_lock);
+ 
+ 	if (m == M_FREE) {
+ 		stat(s, DEACTIVATE_EMPTY);
+@@ -1893,10 +1923,10 @@ static void unfreeze_partials(struct kme
+ 		n2 = get_node(s, page_to_nid(page));
+ 		if (n != n2) {
+ 			if (n)
+-				spin_unlock(&n->list_lock);
++				raw_spin_unlock(&n->list_lock);
+ 
+ 			n = n2;
+-			spin_lock(&n->list_lock);
++			raw_spin_lock(&n->list_lock);
+ 		}
+ 
+ 		do {
+@@ -1925,7 +1955,7 @@ static void unfreeze_partials(struct kme
+ 	}
+ 
+ 	if (n)
+-		spin_unlock(&n->list_lock);
++		raw_spin_unlock(&n->list_lock);
+ 
+ 	while (discard_page) {
+ 		page = discard_page;
+@@ -1961,14 +1991,21 @@ static int put_cpu_partial(struct kmem_c
+ 			pobjects = oldpage->pobjects;
+ 			pages = oldpage->pages;
+ 			if (drain && pobjects > s->cpu_partial) {
++				struct slub_free_list *f;
+ 				unsigned long flags;
++				LIST_HEAD(tofree);
+ 				/*
+ 				 * partial array is full. Move the existing
+ 				 * set to the per node partial list.
+ 				 */
+ 				local_irq_save(flags);
+ 				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++				f = &__get_cpu_var(slub_free_list);
++				raw_spin_lock(&f->lock);
++				list_splice_init(&f->list, &tofree);
++				raw_spin_unlock(&f->lock);
+ 				local_irq_restore(flags);
++				free_delayed(s, &tofree);
+ 				oldpage = NULL;
+ 				pobjects = 0;
+ 				pages = 0;
+@@ -2031,7 +2068,22 @@ static bool has_cpu_slab(int cpu, void *
+ 
+ static void flush_all(struct kmem_cache *s)
+ {
++	LIST_HEAD(tofree);
++	int cpu;
++
+ 	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
++	for_each_online_cpu(cpu) {
++		struct slub_free_list *f;
++
++		if (!has_cpu_slab(cpu, s))
++			continue;
++
++		f = &per_cpu(slub_free_list, cpu);
++		raw_spin_lock_irq(&f->lock);
++		list_splice_init(&f->list, &tofree);
++		raw_spin_unlock_irq(&f->lock);
++		free_delayed(s, &tofree);
++	}
+ }
+ 
+ /*
+@@ -2059,10 +2111,10 @@ static unsigned long count_partial(struc
+ 	unsigned long x = 0;
+ 	struct page *page;
+ 
+-	spin_lock_irqsave(&n->list_lock, flags);
++	raw_spin_lock_irqsave(&n->list_lock, flags);
+ 	list_for_each_entry(page, &n->partial, lru)
+ 		x += get_count(page);
+-	spin_unlock_irqrestore(&n->list_lock, flags);
++	raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 	return x;
+ }
+ 
+@@ -2205,9 +2257,11 @@ static inline void *get_freelist(struct
+ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			  unsigned long addr, struct kmem_cache_cpu *c)
+ {
++	struct slub_free_list *f;
+ 	void *freelist;
+ 	struct page *page;
+ 	unsigned long flags;
++	LIST_HEAD(tofree);
+ 
+ 	local_irq_save(flags);
+ #ifdef CONFIG_PREEMPT
+@@ -2270,7 +2324,13 @@ load_freelist:
+ 	VM_BUG_ON(!c->page->frozen);
+ 	c->freelist = get_freepointer(s, freelist);
+ 	c->tid = next_tid(c->tid);
++out:
++	f = &__get_cpu_var(slub_free_list);
++	raw_spin_lock(&f->lock);
++	list_splice_init(&f->list, &tofree);
++	raw_spin_unlock(&f->lock);
+ 	local_irq_restore(flags);
++	free_delayed(s, &tofree);
+ 	return freelist;
+ 
+ new_slab:
+@@ -2288,9 +2348,7 @@ new_slab:
+ 	if (unlikely(!freelist)) {
+ 		if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
+ 			slab_out_of_memory(s, gfpflags, node);
+-
+-		local_irq_restore(flags);
+-		return NULL;
++		goto out;
+ 	}
+ 
+ 	page = c->page;
+@@ -2304,8 +2362,7 @@ new_slab:
+ 	deactivate_slab(s, page, get_freepointer(s, freelist));
+ 	c->page = NULL;
+ 	c->freelist = NULL;
+-	local_irq_restore(flags);
+-	return freelist;
++	goto out;
+ }
+ 
+ /*
+@@ -2477,7 +2534,7 @@ static void __slab_free(struct kmem_cach
+ 
+ 	do {
+ 		if (unlikely(n)) {
+-			spin_unlock_irqrestore(&n->list_lock, flags);
++			raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 			n = NULL;
+ 		}
+ 		prior = page->freelist;
+@@ -2507,7 +2564,7 @@ static void __slab_free(struct kmem_cach
+ 				 * Otherwise the list_lock will synchronize with
+ 				 * other processors updating the list of slabs.
+ 				 */
+-				spin_lock_irqsave(&n->list_lock, flags);
++				raw_spin_lock_irqsave(&n->list_lock, flags);
+ 
+ 			}
+ 		}
+@@ -2548,7 +2605,7 @@ static void __slab_free(struct kmem_cach
+ 		add_partial(n, page, DEACTIVATE_TO_TAIL);
+ 		stat(s, FREE_ADD_PARTIAL);
+ 	}
+-	spin_unlock_irqrestore(&n->list_lock, flags);
++	raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 	return;
+ 
+ slab_empty:
+@@ -2562,7 +2619,7 @@ slab_empty:
+ 		/* Slab must be on the full list */
+ 		remove_full(s, page);
+ 
+-	spin_unlock_irqrestore(&n->list_lock, flags);
++	raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 	stat(s, FREE_SLAB);
+ 	discard_slab(s, page);
+ }
+@@ -2764,7 +2821,7 @@ static void
+ init_kmem_cache_node(struct kmem_cache_node *n)
+ {
+ 	n->nr_partial = 0;
+-	spin_lock_init(&n->list_lock);
++	raw_spin_lock_init(&n->list_lock);
+ 	INIT_LIST_HEAD(&n->partial);
+ #ifdef CONFIG_SLUB_DEBUG
+ 	atomic_long_set(&n->nr_slabs, 0);
+@@ -3451,7 +3508,7 @@ int kmem_cache_shrink(struct kmem_cache
+ 		for (i = 0; i < objects; i++)
+ 			INIT_LIST_HEAD(slabs_by_inuse + i);
+ 
+-		spin_lock_irqsave(&n->list_lock, flags);
++		raw_spin_lock_irqsave(&n->list_lock, flags);
+ 
+ 		/*
+ 		 * Build lists indexed by the items in use in each slab.
+@@ -3472,7 +3529,7 @@ int kmem_cache_shrink(struct kmem_cache
+ 		for (i = objects - 1; i > 0; i--)
+ 			list_splice(slabs_by_inuse + i, n->partial.prev);
+ 
+-		spin_unlock_irqrestore(&n->list_lock, flags);
++		raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 
+ 		/* Release empty slabs */
+ 		list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
+@@ -3642,6 +3699,12 @@ void __init kmem_cache_init(void)
+ 		boot_kmem_cache_node;
+ 	int i;
+ 	int caches = 2;
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
++		INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
++	}
+ 
+ 	if (debug_guardpage_minorder())
+ 		slub_max_order = 0;
+@@ -4033,7 +4096,7 @@ static int validate_slab_node(struct kme
+ 	struct page *page;
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&n->list_lock, flags);
++	raw_spin_lock_irqsave(&n->list_lock, flags);
+ 
+ 	list_for_each_entry(page, &n->partial, lru) {
+ 		validate_slab_slab(s, page, map);
+@@ -4056,7 +4119,7 @@ static int validate_slab_node(struct kme
+ 			atomic_long_read(&n->nr_slabs));
+ 
+ out:
+-	spin_unlock_irqrestore(&n->list_lock, flags);
++	raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 	return count;
+ }
+ 
+@@ -4246,12 +4309,12 @@ static int list_locations(struct kmem_ca
+ 		if (!atomic_long_read(&n->nr_slabs))
+ 			continue;
+ 
+-		spin_lock_irqsave(&n->list_lock, flags);
++		raw_spin_lock_irqsave(&n->list_lock, flags);
+ 		list_for_each_entry(page, &n->partial, lru)
+ 			process_slab(&t, s, page, alloc, map);
+ 		list_for_each_entry(page, &n->full, lru)
+ 			process_slab(&t, s, page, alloc, map);
+-		spin_unlock_irqrestore(&n->list_lock, flags);
++		raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ 	}
+ 
+ 	for (i = 0; i < t.count; i++) {
diff --git a/patches/mm-make-vmstat-rt-aware.patch b/patches/mm-make-vmstat-rt-aware.patch
new file mode 100644
index 0000000..52da0b1
--- /dev/null
+++ b/patches/mm-make-vmstat-rt-aware.patch
@@ -0,0 +1,84 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:13 -0500
+Subject: [PATCH] mm: make vmstat -rt aware
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/vmstat.h |    4 ++++
+ mm/vmstat.c            |    6 ++++++
+ 2 files changed, 10 insertions(+)
+
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -29,7 +29,9 @@ DECLARE_PER_CPU(struct vm_event_state, v
+ 
+ static inline void __count_vm_event(enum vm_event_item item)
+ {
++	preempt_disable_rt();
+ 	__this_cpu_inc(vm_event_states.event[item]);
++	preempt_enable_rt();
+ }
+ 
+ static inline void count_vm_event(enum vm_event_item item)
+@@ -39,7 +41,9 @@ static inline void count_vm_event(enum v
+ 
+ static inline void __count_vm_events(enum vm_event_item item, long delta)
+ {
++	preempt_disable_rt();
+ 	__this_cpu_add(vm_event_states.event[item], delta);
++	preempt_enable_rt();
+ }
+ 
+ static inline void count_vm_events(enum vm_event_item item, long delta)
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *
+ 	long x;
+ 	long t;
+ 
++	preempt_disable_rt();
+ 	x = delta + __this_cpu_read(*p);
+ 
+ 	t = __this_cpu_read(pcp->stat_threshold);
+@@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *
+ 		x = 0;
+ 	}
+ 	__this_cpu_write(*p, x);
++	preempt_enable_rt();
+ }
+ EXPORT_SYMBOL(__mod_zone_page_state);
+ 
+@@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone,
+ 	s8 __percpu *p = pcp->vm_stat_diff + item;
+ 	s8 v, t;
+ 
++	preempt_disable_rt();
+ 	v = __this_cpu_inc_return(*p);
+ 	t = __this_cpu_read(pcp->stat_threshold);
+ 	if (unlikely(v > t)) {
+@@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone,
+ 		zone_page_state_add(v + overstep, zone, item);
+ 		__this_cpu_write(*p, -overstep);
+ 	}
++	preempt_enable_rt();
+ }
+ 
+ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+@@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone,
+ 	s8 __percpu *p = pcp->vm_stat_diff + item;
+ 	s8 v, t;
+ 
++	preempt_disable_rt();
+ 	v = __this_cpu_dec_return(*p);
+ 	t = __this_cpu_read(pcp->stat_threshold);
+ 	if (unlikely(v < - t)) {
+@@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone,
+ 		zone_page_state_add(v - overstep, zone, item);
+ 		__this_cpu_write(*p, overstep);
+ 	}
++	preempt_enable_rt();
+ }
+ 
+ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
diff --git a/patches/mm-page-alloc-fix.patch b/patches/mm-page-alloc-fix.patch
new file mode 100644
index 0000000..2256353
--- /dev/null
+++ b/patches/mm-page-alloc-fix.patch
@@ -0,0 +1,22 @@
+Subject: mm-page-alloc-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 16:47:49 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2204,8 +2204,8 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+ 		struct page *page;
+ 
+ 		/* Page migration frees to the PCP lists but we want merging */
+-		drain_pages(get_cpu());
+-		put_cpu();
++		drain_pages(get_cpu_light());
++		put_cpu_light();
+ 
+ 		page = get_page_from_freelist(gfp_mask, nodemask,
+ 				order, zonelist, high_zoneidx,
diff --git a/patches/mm-page-alloc-use-list-last-entry.patch b/patches/mm-page-alloc-use-list-last-entry.patch
new file mode 100644
index 0000000..5704803
--- /dev/null
+++ b/patches/mm-page-alloc-use-list-last-entry.patch
@@ -0,0 +1,20 @@
+Subject: mm-page-alloc-use-list-last-entry.patch
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 21 Jun 2011 11:24:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -658,7 +658,7 @@ static void free_pcppages_bulk(struct zo
+ 		do {
+ 			int mt;	/* migratetype of the to-be-freed page */
+ 
+-			page = list_entry(list->prev, struct page, lru);
++			page = list_last_entry(list, struct page, lru);
+ 			/* must delete as __free_one_page list manipulates */
+ 			list_del(&page->lru);
+ 			mt = get_freepage_migratetype(page);
diff --git a/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch b/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch
new file mode 100644
index 0000000..8d12bf7
--- /dev/null
+++ b/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch
@@ -0,0 +1,55 @@
+Subject: mm: page_alloc: Use local_lock_on() instead of plain spinlock
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 27 Sep 2012 11:11:46 +0200
+
+The plain spinlock while sufficient does not update the local_lock
+internals. Use a proper local_lock function instead to ease debugging.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/locallock.h |   11 +++++++++++
+ mm/page_alloc.c           |    4 ++--
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/include/linux/locallock.h
++++ b/include/linux/locallock.h
+@@ -137,6 +137,12 @@ static inline int __local_lock_irqsave(s
+ 		_flags = __get_cpu_var(lvar).flags;			\
+ 	} while (0)
+ 
++#define local_lock_irqsave_on(lvar, _flags, cpu)			\
++	do {								\
++		__local_lock_irqsave(&per_cpu(lvar, cpu));		\
++		_flags = per_cpu(lvar, cpu).flags;			\
++	} while (0)
++
+ static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
+ 					    unsigned long flags)
+ {
+@@ -156,6 +162,11 @@ static inline int __local_unlock_irqrest
+ 			put_local_var(lvar);				\
+ 	} while (0)
+ 
++#define local_unlock_irqrestore_on(lvar, flags, cpu)			\
++	do {								\
++		__local_unlock_irqrestore(&per_cpu(lvar, cpu), flags);	\
++	} while (0)
++
+ #define local_spin_trylock_irq(lvar, lock)				\
+ 	({								\
+ 		int __locked;						\
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -224,9 +224,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+ 
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ # define cpu_lock_irqsave(cpu, flags)		\
+-	spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
++	local_lock_irqsave_on(pa_lock, flags, cpu)
+ # define cpu_unlock_irqrestore(cpu, flags)	\
+-	spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
++	local_unlock_irqrestore_on(pa_lock, flags, cpu)
+ #else
+ # define cpu_lock_irqsave(cpu, flags)		local_irq_save(flags)
+ # define cpu_unlock_irqrestore(cpu, flags)	local_irq_restore(flags)
diff --git a/patches/mm-page_alloc-reduce-lock-sections-further.patch b/patches/mm-page_alloc-reduce-lock-sections-further.patch
new file mode 100644
index 0000000..072387c
--- /dev/null
+++ b/patches/mm-page_alloc-reduce-lock-sections-further.patch
@@ -0,0 +1,219 @@
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 3 Jul 2009 08:44:37 -0500
+Subject: mm: page_alloc reduce lock sections further
+
+Split out the pages which are to be freed into a separate list and
+call free_pages_bulk() outside of the percpu page allocator locks.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c |   98 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 69 insertions(+), 29 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -625,7 +625,7 @@ static inline int free_pages_check(struc
+ }
+ 
+ /*
+- * Frees a number of pages from the PCP lists
++ * Frees a number of pages which have been collected from the pcp lists.
+  * Assumes all pages on list are in same zone, and of same order.
+  * count is the number of pages to free.
+  *
+@@ -636,16 +636,50 @@ static inline int free_pages_check(struc
+  * pinned" detection logic.
+  */
+ static void free_pcppages_bulk(struct zone *zone, int count,
+-					struct per_cpu_pages *pcp)
++			       struct list_head *list)
+ {
+-	int migratetype = 0;
+-	int batch_free = 0;
+ 	int to_free = count;
++	unsigned long flags;
+ 
+-	spin_lock(&zone->lock);
++	spin_lock_irqsave(&zone->lock, flags);
+ 	zone->all_unreclaimable = 0;
+ 	zone->pages_scanned = 0;
+ 
++	while (!list_empty(list)) {
++		struct page *page = list_first_entry(list, struct page, lru);
++		int mt;	/* migratetype of the to-be-freed page */
++
++		/* must delete as __free_one_page list manipulates */
++		list_del(&page->lru);
++
++		mt = get_freepage_migratetype(page);
++		/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
++		__free_one_page(page, zone, 0, mt);
++		trace_mm_page_pcpu_drain(page, 0, mt);
++		if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
++			__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
++			if (is_migrate_cma(mt))
++				__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
++		}
++
++		to_free--;
++	}
++	WARN_ON(to_free != 0);
++	spin_unlock_irqrestore(&zone->lock, flags);
++}
++
++/*
++ * Moves a number of pages from the PCP lists to free list which
++ * is freed outside of the locked region.
++ *
++ * Assumes all pages on list are in same zone, and of same order.
++ * count is the number of pages to free.
++ */
++static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
++			      struct list_head *dst)
++{
++	int migratetype = 0, batch_free = 0;
++
+ 	while (to_free) {
+ 		struct page *page;
+ 		struct list_head *list;
+@@ -661,7 +695,7 @@ static void free_pcppages_bulk(struct zo
+ 			batch_free++;
+ 			if (++migratetype == MIGRATE_PCPTYPES)
+ 				migratetype = 0;
+-			list = &pcp->lists[migratetype];
++			list = &src->lists[migratetype];
+ 		} while (list_empty(list));
+ 
+ 		/* This is the only non-empty list. Free them all. */
+@@ -669,36 +703,26 @@ static void free_pcppages_bulk(struct zo
+ 			batch_free = to_free;
+ 
+ 		do {
+-			int mt;	/* migratetype of the to-be-freed page */
+-
+ 			page = list_last_entry(list, struct page, lru);
+-			/* must delete as __free_one_page list manipulates */
+ 			list_del(&page->lru);
+-			mt = get_freepage_migratetype(page);
+-			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+-			__free_one_page(page, zone, 0, mt);
+-			trace_mm_page_pcpu_drain(page, 0, mt);
+-			if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+-				__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
+-				if (is_migrate_cma(mt))
+-					__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+-			}
++			list_add(&page->lru, dst);
+ 		} while (--to_free && --batch_free && !list_empty(list));
+ 	}
+-	spin_unlock(&zone->lock);
+ }
+ 
+ static void free_one_page(struct zone *zone, struct page *page, int order,
+ 				int migratetype)
+ {
+-	spin_lock(&zone->lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&zone->lock, flags);
+ 	zone->all_unreclaimable = 0;
+ 	zone->pages_scanned = 0;
+ 
+ 	__free_one_page(page, zone, order, migratetype);
+ 	if (unlikely(migratetype != MIGRATE_ISOLATE))
+ 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
+-	spin_unlock(&zone->lock);
++	spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ 
+ static bool free_pages_prepare(struct page *page, unsigned int order)
+@@ -1180,6 +1204,7 @@ static int rmqueue_bulk(struct zone *zon
+ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
+ {
+ 	unsigned long flags;
++	LIST_HEAD(dst);
+ 	int to_drain;
+ 
+ 	local_lock_irqsave(pa_lock, flags);
+@@ -1188,10 +1213,11 @@ void drain_zone_pages(struct zone *zone,
+ 	else
+ 		to_drain = pcp->count;
+ 	if (to_drain > 0) {
+-		free_pcppages_bulk(zone, to_drain, pcp);
++		isolate_pcp_pages(to_drain, pcp, &dst);
+ 		pcp->count -= to_drain;
+ 	}
+ 	local_unlock_irqrestore(pa_lock, flags);
++	free_pcppages_bulk(zone, to_drain, &dst);
+ }
+ #endif
+ 
+@@ -1210,16 +1236,21 @@ static void drain_pages(unsigned int cpu
+ 	for_each_populated_zone(zone) {
+ 		struct per_cpu_pageset *pset;
+ 		struct per_cpu_pages *pcp;
++		LIST_HEAD(dst);
++		int count;
+ 
+ 		cpu_lock_irqsave(cpu, flags);
+ 		pset = per_cpu_ptr(zone->pageset, cpu);
+ 
+ 		pcp = &pset->pcp;
+-		if (pcp->count) {
+-			free_pcppages_bulk(zone, pcp->count, pcp);
++		count = pcp->count;
++		if (count) {
++			isolate_pcp_pages(count, pcp, &dst);
+ 			pcp->count = 0;
+ 		}
+ 		cpu_unlock_irqrestore(cpu, flags);
++		if (count)
++			free_pcppages_bulk(zone, count, &dst);
+ 	}
+ }
+ 
+@@ -1357,8 +1388,15 @@ void free_hot_cold_page(struct page *pag
+ 		list_add(&page->lru, &pcp->lists[migratetype]);
+ 	pcp->count++;
+ 	if (pcp->count >= pcp->high) {
+-		free_pcppages_bulk(zone, pcp->batch, pcp);
++		LIST_HEAD(dst);
++		int count;
++
++		isolate_pcp_pages(pcp->batch, pcp, &dst);
+ 		pcp->count -= pcp->batch;
++		count = pcp->batch;
++		local_unlock_irqrestore(pa_lock, flags);
++		free_pcppages_bulk(zone, count, &dst);
++		return;
+ 	}
+ 
+ out:
+@@ -5977,20 +6015,22 @@ static int __meminit __zone_pcp_update(v
+ {
+ 	struct zone *zone = data;
+ 	int cpu;
+-	unsigned long batch = zone_batchsize(zone), flags;
++	unsigned long flags;
+ 
+ 	for_each_possible_cpu(cpu) {
+ 		struct per_cpu_pageset *pset;
+ 		struct per_cpu_pages *pcp;
++		LIST_HEAD(dst);
+ 
+ 		pset = per_cpu_ptr(zone->pageset, cpu);
+ 		pcp = &pset->pcp;
+ 
+ 		cpu_lock_irqsave(cpu, flags);
+-		if (pcp->count > 0)
+-			free_pcppages_bulk(zone, pcp->count, pcp);
++		if (pcp->count > 0) {
++			isolate_pcp_pages(pcp->count, pcp, &dst);
++			free_pcppages_bulk(zone, pcp->count, &dst);
++		}
+ 		drain_zonestat(zone, pset);
+-		setup_pageset(pset, batch);
+ 		cpu_unlock_irqrestore(cpu, flags);
+ 	}
+ 	return 0;
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
new file mode 100644
index 0000000..6dcb233
--- /dev/null
+++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
@@ -0,0 +1,213 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:37 -0500
+Subject: mm: page_alloc: rt-friendly per-cpu pages
+
+rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
+method into a preemptible, explicit-per-cpu-locks method.
+
+Contains fixes from:
+	 Peter Zijlstra <a.p.zijlstra@chello.nl>
+	 Thomas Gleixner <tglx@linutronix.de>
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/page_alloc.c |   57 ++++++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 39 insertions(+), 18 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -58,6 +58,7 @@
+ #include <linux/prefetch.h>
+ #include <linux/migrate.h>
+ #include <linux/page-debug-flags.h>
++#include <linux/locallock.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -219,6 +220,18 @@ EXPORT_SYMBOL(nr_node_ids);
+ EXPORT_SYMBOL(nr_online_nodes);
+ #endif
+ 
++static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
++
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define cpu_lock_irqsave(cpu, flags)		\
++	spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
++# define cpu_unlock_irqrestore(cpu, flags)	\
++	spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
++#else
++# define cpu_lock_irqsave(cpu, flags)		local_irq_save(flags)
++# define cpu_unlock_irqrestore(cpu, flags)	local_irq_restore(flags)
++#endif
++
+ int page_group_by_mobility_disabled __read_mostly;
+ 
+ void set_pageblock_migratetype(struct page *page, int migratetype)
+@@ -722,12 +735,12 @@ static void __free_pages_ok(struct page
+ 	if (!free_pages_prepare(page, order))
+ 		return;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(pa_lock, flags);
+ 	__count_vm_events(PGFREE, 1 << order);
+ 	migratetype = get_pageblock_migratetype(page);
+ 	set_freepage_migratetype(page, migratetype);
+ 	free_one_page(page_zone(page), page, order, migratetype);
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ }
+ 
+ /*
+@@ -1169,7 +1182,7 @@ void drain_zone_pages(struct zone *zone,
+ 	unsigned long flags;
+ 	int to_drain;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(pa_lock, flags);
+ 	if (pcp->count >= pcp->batch)
+ 		to_drain = pcp->batch;
+ 	else
+@@ -1178,7 +1191,7 @@ void drain_zone_pages(struct zone *zone,
+ 		free_pcppages_bulk(zone, to_drain, pcp);
+ 		pcp->count -= to_drain;
+ 	}
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ }
+ #endif
+ 
+@@ -1198,7 +1211,7 @@ static void drain_pages(unsigned int cpu
+ 		struct per_cpu_pageset *pset;
+ 		struct per_cpu_pages *pcp;
+ 
+-		local_irq_save(flags);
++		cpu_lock_irqsave(cpu, flags);
+ 		pset = per_cpu_ptr(zone->pageset, cpu);
+ 
+ 		pcp = &pset->pcp;
+@@ -1206,7 +1219,7 @@ static void drain_pages(unsigned int cpu
+ 			free_pcppages_bulk(zone, pcp->count, pcp);
+ 			pcp->count = 0;
+ 		}
+-		local_irq_restore(flags);
++		cpu_unlock_irqrestore(cpu, flags);
+ 	}
+ }
+ 
+@@ -1259,7 +1272,12 @@ void drain_all_pages(void)
+ 		else
+ 			cpumask_clear_cpu(cpu, &cpus_with_pcps);
+ 	}
++#ifndef CONFIG_PREEMPT_RT_BASE
+ 	on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
++#else
++	for_each_cpu(cpu, &cpus_with_pcps)
++		drain_pages(cpu);
++#endif
+ }
+ 
+ #ifdef CONFIG_HIBERNATION
+@@ -1314,7 +1332,7 @@ void free_hot_cold_page(struct page *pag
+ 
+ 	migratetype = get_pageblock_migratetype(page);
+ 	set_freepage_migratetype(page, migratetype);
+-	local_irq_save(flags);
++	local_lock_irqsave(pa_lock, flags);
+ 	__count_vm_event(PGFREE);
+ 
+ 	/*
+@@ -1344,7 +1362,7 @@ void free_hot_cold_page(struct page *pag
+ 	}
+ 
+ out:
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ }
+ 
+ /*
+@@ -1473,7 +1491,7 @@ again:
+ 		struct per_cpu_pages *pcp;
+ 		struct list_head *list;
+ 
+-		local_irq_save(flags);
++		local_lock_irqsave(pa_lock, flags);
+ 		pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ 		list = &pcp->lists[migratetype];
+ 		if (list_empty(list)) {
+@@ -1505,18 +1523,20 @@ again:
+ 			 */
+ 			WARN_ON_ONCE(order > 1);
+ 		}
+-		spin_lock_irqsave(&zone->lock, flags);
++		local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
+ 		page = __rmqueue(zone, order, migratetype);
+-		spin_unlock(&zone->lock);
+-		if (!page)
++		if (!page) {
++			spin_unlock(&zone->lock);
+ 			goto failed;
++		}
+ 		__mod_zone_freepage_state(zone, -(1 << order),
+ 					  get_pageblock_migratetype(page));
++		spin_unlock(&zone->lock);
+ 	}
+ 
+ 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
+ 	zone_statistics(preferred_zone, zone, gfp_flags);
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ 
+ 	VM_BUG_ON(bad_range(zone, page));
+ 	if (prep_new_page(page, order, gfp_flags))
+@@ -1524,7 +1544,7 @@ again:
+ 	return page;
+ 
+ failed:
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ 	return NULL;
+ }
+ 
+@@ -5133,6 +5153,7 @@ static int page_alloc_cpu_notify(struct
+ void __init page_alloc_init(void)
+ {
+ 	hotcpu_notifier(page_alloc_cpu_notify, 0);
++	local_irq_lock_init(pa_lock);
+ }
+ 
+ /*
+@@ -5965,12 +5986,12 @@ static int __meminit __zone_pcp_update(v
+ 		pset = per_cpu_ptr(zone->pageset, cpu);
+ 		pcp = &pset->pcp;
+ 
+-		local_irq_save(flags);
++		cpu_lock_irqsave(cpu, flags);
+ 		if (pcp->count > 0)
+ 			free_pcppages_bulk(zone, pcp->count, pcp);
+ 		drain_zonestat(zone, pset);
+ 		setup_pageset(pset, batch);
+-		local_irq_restore(flags);
++		cpu_unlock_irqrestore(cpu, flags);
+ 	}
+ 	return 0;
+ }
+@@ -5988,7 +6009,7 @@ void zone_pcp_reset(struct zone *zone)
+ 	struct per_cpu_pageset *pset;
+ 
+ 	/* avoid races with drain_pages()  */
+-	local_irq_save(flags);
++	local_lock_irqsave(pa_lock, flags);
+ 	if (zone->pageset != &boot_pageset) {
+ 		for_each_online_cpu(cpu) {
+ 			pset = per_cpu_ptr(zone->pageset, cpu);
+@@ -5997,7 +6018,7 @@ void zone_pcp_reset(struct zone *zone)
+ 		free_percpu(zone->pageset);
+ 		zone->pageset = &boot_pageset;
+ 	}
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(pa_lock, flags);
+ }
+ 
+ #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/patches/mm-prepare-pf-disable-discoupling.patch b/patches/mm-prepare-pf-disable-discoupling.patch
new file mode 100644
index 0000000..d3a5458
--- /dev/null
+++ b/patches/mm-prepare-pf-disable-discoupling.patch
@@ -0,0 +1,118 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:37 -0500
+Subject: mm: Prepare decoupling the page fault disabling logic
+
+Add a pagefault_disabled variable to task_struct to allow decoupling
+the pagefault-disabled logic from the preempt count.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/sched.h   |    1 +
+ include/linux/uaccess.h |   33 +++------------------------------
+ kernel/fork.c           |    1 +
+ mm/memory.c             |   29 +++++++++++++++++++++++++++++
+ 4 files changed, 34 insertions(+), 30 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1452,6 +1452,7 @@ struct task_struct {
+ 	/* mutex deadlock detection */
+ 	struct mutex_waiter *blocked_on;
+ #endif
++	int pagefault_disabled;
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ 	unsigned int irq_events;
+ 	unsigned long hardirq_enable_ip;
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -6,37 +6,10 @@
+ 
+ /*
+  * These routines enable/disable the pagefault handler in that
+- * it will not take any locks and go straight to the fixup table.
+- *
+- * They have great resemblance to the preempt_disable/enable calls
+- * and in fact they are identical; this is because currently there is
+- * no other way to make the pagefault handlers do this. So we do
+- * disable preemption but we don't necessarily care about that.
++ * it will not take any MM locks and go straight to the fixup table.
+  */
+-static inline void pagefault_disable(void)
+-{
+-	inc_preempt_count();
+-	/*
+-	 * make sure to have issued the store before a pagefault
+-	 * can hit.
+-	 */
+-	barrier();
+-}
+-
+-static inline void pagefault_enable(void)
+-{
+-	/*
+-	 * make sure to issue those last loads/stores before enabling
+-	 * the pagefault handler again.
+-	 */
+-	barrier();
+-	dec_preempt_count();
+-	/*
+-	 * make sure we do..
+-	 */
+-	barrier();
+-	preempt_check_resched();
+-}
++extern void pagefault_disable(void);
++extern void pagefault_enable(void);
+ 
+ #ifndef ARCH_HAS_NOCACHE_UACCESS
+ 
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1285,6 +1285,7 @@ static struct task_struct *copy_process(
+ 	p->hardirq_context = 0;
+ 	p->softirq_context = 0;
+ #endif
++	p->pagefault_disabled = 0;
+ #ifdef CONFIG_LOCKDEP
+ 	p->lockdep_depth = 0; /* no locks held yet */
+ 	p->curr_chain_key = 0;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3669,6 +3669,35 @@ unlock:
+ 	return 0;
+ }
+ 
++void pagefault_disable(void)
++{
++	inc_preempt_count();
++	current->pagefault_disabled++;
++	/*
++	 * make sure to have issued the store before a pagefault
++	 * can hit.
++	 */
++	barrier();
++}
++EXPORT_SYMBOL(pagefault_disable);
++
++void pagefault_enable(void)
++{
++	/*
++	 * make sure to issue those last loads/stores before enabling
++	 * the pagefault handler again.
++	 */
++	barrier();
++	current->pagefault_disabled--;
++	dec_preempt_count();
++	/*
++	 * make sure we do..
++	 */
++	barrier();
++	preempt_check_resched();
++}
++EXPORT_SYMBOL(pagefault_enable);
++
+ /*
+  * By the time we get here, we already hold the mm semaphore
+  */
diff --git a/patches/mm-protect-activate-switch-mm.patch b/patches/mm-protect-activate-switch-mm.patch
new file mode 100644
index 0000000..dcaa01f
--- /dev/null
+++ b/patches/mm-protect-activate-switch-mm.patch
@@ -0,0 +1,69 @@
+Subject:  mm: Protect activate_mm() by preempt_[disable&enable]_rt()
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Tue, 15 May 2012 13:53:56 +0800
+
+User preempt_*_rt instead of local_irq_*_rt or otherwise there will be
+warning on ARM like below:
+
+WARNING: at build/linux/kernel/smp.c:459 smp_call_function_many+0x98/0x264()
+Modules linked in:
+[<c0013bb4>] (unwind_backtrace+0x0/0xe4) from [<c001be94>] (warn_slowpath_common+0x4c/0x64)
+[<c001be94>] (warn_slowpath_common+0x4c/0x64) from [<c001bec4>] (warn_slowpath_null+0x18/0x1c)
+[<c001bec4>] (warn_slowpath_null+0x18/0x1c) from [<c0053ff8>](smp_call_function_many+0x98/0x264)
+[<c0053ff8>] (smp_call_function_many+0x98/0x264) from [<c0054364>] (smp_call_function+0x44/0x6c)
+[<c0054364>] (smp_call_function+0x44/0x6c) from [<c0017d50>] (__new_context+0xbc/0x124)
+[<c0017d50>] (__new_context+0xbc/0x124) from [<c009e49c>] (flush_old_exec+0x460/0x5e4)
+[<c009e49c>] (flush_old_exec+0x460/0x5e4) from [<c00d61ac>] (load_elf_binary+0x2e0/0x11ac)
+[<c00d61ac>] (load_elf_binary+0x2e0/0x11ac) from [<c009d060>] (search_binary_handler+0x94/0x2a4)
+[<c009d060>] (search_binary_handler+0x94/0x2a4) from [<c009e8fc>] (do_execve+0x254/0x364)
+[<c009e8fc>] (do_execve+0x254/0x364) from [<c0010e84>] (sys_execve+0x34/0x54)
+[<c0010e84>] (sys_execve+0x34/0x54) from [<c000da00>] (ret_fast_syscall+0x0/0x30)
+---[ end trace 0000000000000002 ]---
+
+The reason is that ARM need irq enabled when doing activate_mm().
+According to mm-protect-activate-switch-mm.patch, actually
+preempt_[disable|enable]_rt() is sufficient.
+
+Inspired-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1337061236-1766-1-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/exec.c        |    2 ++
+ mm/mmu_context.c |    2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -827,10 +827,12 @@ static int exec_mmap(struct mm_struct *m
+ 		}
+ 	}
+ 	task_lock(tsk);
++	preempt_disable_rt();
+ 	active_mm = tsk->active_mm;
+ 	tsk->mm = mm;
+ 	tsk->active_mm = mm;
+ 	activate_mm(active_mm, mm);
++	preempt_enable_rt();
+ 	task_unlock(tsk);
+ 	arch_pick_mmap_layout(mm);
+ 	if (old_mm) {
+--- a/mm/mmu_context.c
++++ b/mm/mmu_context.c
+@@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm)
+ 	struct task_struct *tsk = current;
+ 
+ 	task_lock(tsk);
++	preempt_disable_rt();
+ 	active_mm = tsk->active_mm;
+ 	if (active_mm != mm) {
+ 		atomic_inc(&mm->mm_count);
+@@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm)
+ 	}
+ 	tsk->mm = mm;
+ 	switch_mm(active_mm, mm, tsk);
++	preempt_enable_rt();
+ 	task_unlock(tsk);
+ 
+ 	if (active_mm != mm)
diff --git a/patches/mm-remove-preempt-count-from-pf.patch b/patches/mm-remove-preempt-count-from-pf.patch
new file mode 100644
index 0000000..a7ca61d
--- /dev/null
+++ b/patches/mm-remove-preempt-count-from-pf.patch
@@ -0,0 +1,34 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 25 Jul 2009 22:06:27 +0200
+Subject: mm: Remove preempt count from pagefault disable/enable
+
+Now that all users are cleaned up, we can remove the preemption count.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/memory.c |    7 -------
+ 1 file changed, 7 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3672,7 +3672,6 @@ unlock:
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ void pagefault_disable(void)
+ {
+-	inc_preempt_count();
+ 	current->pagefault_disabled++;
+ 	/*
+ 	 * make sure to have issued the store before a pagefault
+@@ -3690,12 +3689,6 @@ void pagefault_enable(void)
+ 	 */
+ 	barrier();
+ 	current->pagefault_disabled--;
+-	dec_preempt_count();
+-	/*
+-	 * make sure we do..
+-	 */
+-	barrier();
+-	preempt_check_resched();
+ }
+ EXPORT_SYMBOL(pagefault_enable);
+ #endif
diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch
new file mode 100644
index 0000000..6345236
--- /dev/null
+++ b/patches/mm-rt-kmap-atomic-scheduling.patch
@@ -0,0 +1,274 @@
+Subject: mm, rt: kmap_atomic scheduling
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 28 Jul 2011 10:43:51 +0200
+
+In fact, with migrate_disable() existing one could play games with
+kmap_atomic. You could save/restore the kmap_atomic slots on context
+switch (if there are any in use of course), this should be esp easy now
+that we have a kmap_atomic stack.
+
+Something like the below.. it wants replacing all the preempt_disable()
+stuff with pagefault_disable() && migrate_disable() of course, but then
+you can flip kmaps around like below.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+[dvhart@linux.intel.com: build fix]
+Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins
+
+[tglx@linutronix.de: Get rid of the per cpu variable and store the idx
+		     and the pte content right away in the task struct.
+		     Shortens the context switch code. ]
+---
+ arch/x86/kernel/process_32.c |   32 ++++++++++++++++++++++++++++++++
+ arch/x86/mm/highmem_32.c     |    9 ++++++++-
+ arch/x86/mm/iomap_32.c       |    9 ++++++++-
+ include/linux/highmem.h      |   27 +++++++++++++++++++++++----
+ include/linux/sched.h        |    7 +++++++
+ mm/highmem.c                 |    6 ++++--
+ mm/memory.c                  |    2 ++
+ 7 files changed, 84 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -36,6 +36,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/io.h>
+ #include <linux/kdebug.h>
++#include <linux/highmem.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/ldt.h>
+@@ -216,6 +217,35 @@ start_thread(struct pt_regs *regs, unsig
+ }
+ EXPORT_SYMBOL_GPL(start_thread);
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
++{
++	int i;
++
++	/*
++	 * Clear @prev's kmap_atomic mappings
++	 */
++	for (i = 0; i < prev_p->kmap_idx; i++) {
++		int idx = i + KM_TYPE_NR * smp_processor_id();
++		pte_t *ptep = kmap_pte - idx;
++
++		kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
++	}
++	/*
++	 * Restore @next_p's kmap_atomic mappings
++	 */
++	for (i = 0; i < next_p->kmap_idx; i++) {
++		int idx = i + KM_TYPE_NR * smp_processor_id();
++
++		if (!pte_none(next_p->kmap_pte[i]))
++			set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
++	}
++}
++#else
++static inline void
++switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
++#endif
++
+ 
+ /*
+  *	switch_to(x,y) should switch tasks from x to y.
+@@ -295,6 +325,8 @@ __switch_to(struct task_struct *prev_p,
+ 		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+ 		__switch_to_xtra(prev_p, next_p, tss);
+ 
++	switch_kmaps(prev_p, next_p);
++
+ 	/*
+ 	 * Leave lazy mode, flushing any hypercalls made here.
+ 	 * This must be done before restoring TLS segments so
+--- a/arch/x86/mm/highmem_32.c
++++ b/arch/x86/mm/highmem_32.c
+@@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap);
+  */
+ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+ {
++	pte_t pte = mk_pte(page, prot);
+ 	unsigned long vaddr;
+ 	int idx, type;
+ 
+@@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page
+ 	idx = type + KM_TYPE_NR*smp_processor_id();
+ 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ 	BUG_ON(!pte_none(*(kmap_pte-idx)));
+-	set_pte(kmap_pte-idx, mk_pte(page, prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++	current->kmap_pte[type] = pte;
++#endif
++	set_pte(kmap_pte-idx, pte);
+ 	arch_flush_lazy_mmu_mode();
+ 
+ 	return (void *)vaddr;
+@@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr)
+ 		 * is a bad idea also, in case the page changes cacheability
+ 		 * attributes or becomes a protected page in a hypervisor.
+ 		 */
++#ifdef CONFIG_PREEMPT_RT_FULL
++		current->kmap_pte[type] = __pte(0);
++#endif
+ 		kpte_clear_flush(kmap_pte-idx, vaddr);
+ 		kmap_atomic_idx_pop();
+ 		arch_flush_lazy_mmu_mode();
+--- a/arch/x86/mm/iomap_32.c
++++ b/arch/x86/mm/iomap_32.c
+@@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
+ 
+ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
+ {
++	pte_t pte = pfn_pte(pfn, prot);
+ 	unsigned long vaddr;
+ 	int idx, type;
+ 
+@@ -64,7 +65,10 @@ void *kmap_atomic_prot_pfn(unsigned long
+ 	type = kmap_atomic_idx_push();
+ 	idx = type + KM_TYPE_NR * smp_processor_id();
+ 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++	current->kmap_pte[type] = pte;
++#endif
++	set_pte(kmap_pte - idx, pte);
+ 	arch_flush_lazy_mmu_mode();
+ 
+ 	return (void *)vaddr;
+@@ -110,6 +114,9 @@ iounmap_atomic(void __iomem *kvaddr)
+ 		 * is a bad idea also, in case the page changes cacheability
+ 		 * attributes or becomes a protected page in a hypervisor.
+ 		 */
++#ifdef CONFIG_PREEMPT_RT_FULL
++		current->kmap_pte[type] = __pte(0);
++#endif
+ 		kpte_clear_flush(kmap_pte-idx, vaddr);
+ 		kmap_atomic_idx_pop();
+ 	}
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -85,32 +85,51 @@ static inline void __kunmap_atomic(void
+ 
+ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+ 
++#ifndef CONFIG_PREEMPT_RT_FULL
+ DECLARE_PER_CPU(int, __kmap_atomic_idx);
++#endif
+ 
+ static inline int kmap_atomic_idx_push(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
+ 
+-#ifdef CONFIG_DEBUG_HIGHMEM
++# ifdef CONFIG_DEBUG_HIGHMEM
+ 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
+ 	BUG_ON(idx > KM_TYPE_NR);
+-#endif
++# endif
+ 	return idx;
++#else
++	current->kmap_idx++;
++	BUG_ON(current->kmap_idx > KM_TYPE_NR);
++	return current->kmap_idx - 1;
++#endif
+ }
+ 
+ static inline int kmap_atomic_idx(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ 	return __this_cpu_read(__kmap_atomic_idx) - 1;
++#else
++	return current->kmap_idx - 1;
++#endif
+ }
+ 
+ static inline void kmap_atomic_idx_pop(void)
+ {
+-#ifdef CONFIG_DEBUG_HIGHMEM
++#ifndef CONFIG_PREEMPT_RT_FULL
++# ifdef CONFIG_DEBUG_HIGHMEM
+ 	int idx = __this_cpu_dec_return(__kmap_atomic_idx);
+ 
+ 	BUG_ON(idx < 0);
+-#else
++# else
+ 	__this_cpu_dec(__kmap_atomic_idx);
++# endif
++#else
++	current->kmap_idx--;
++# ifdef CONFIG_DEBUG_HIGHMEM
++	BUG_ON(current->kmap_idx < 0);
++# endif
+ #endif
+ }
+ 
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -23,6 +23,7 @@ struct sched_param {
+ #include <linux/nodemask.h>
+ #include <linux/mm_types.h>
+ 
++#include <asm/kmap_types.h>
+ #include <asm/page.h>
+ #include <asm/ptrace.h>
+ #include <asm/cputime.h>
+@@ -1635,6 +1636,12 @@ struct task_struct {
+ 	struct rcu_head put_rcu;
+ 	int softirq_nestcnt;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_FULL
++# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
++	int kmap_idx;
++	pte_t kmap_pte[KM_TYPE_NR];
++# endif
++#endif
+ };
+ 
+ #ifdef CONFIG_NUMA_BALANCING
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -29,10 +29,11 @@
+ #include <linux/kgdb.h>
+ #include <asm/tlbflush.h>
+ 
+-
++#ifndef CONFIG_PREEMPT_RT_FULL
+ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+ DEFINE_PER_CPU(int, __kmap_atomic_idx);
+ #endif
++#endif
+ 
+ /*
+  * Virtual_count is not a pure "count".
+@@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
+ unsigned long totalhigh_pages __read_mostly;
+ EXPORT_SYMBOL(totalhigh_pages);
+ 
+-
++#ifndef CONFIG_PREEMPT_RT_FULL
+ EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
++#endif
+ 
+ unsigned int nr_free_highpages (void)
+ {
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3672,6 +3672,7 @@ unlock:
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ void pagefault_disable(void)
+ {
++	migrate_disable();
+ 	current->pagefault_disabled++;
+ 	/*
+ 	 * make sure to have issued the store before a pagefault
+@@ -3689,6 +3690,7 @@ void pagefault_enable(void)
+ 	 */
+ 	barrier();
+ 	current->pagefault_disabled--;
++	migrate_enable();
+ }
+ EXPORT_SYMBOL(pagefault_enable);
+ #endif
diff --git a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch
new file mode 100644
index 0000000..23a622c
--- /dev/null
+++ b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch
@@ -0,0 +1,38 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:34 -0500
+Subject: mm: scatterlist dont disable irqs on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/scatterlist.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/lib/scatterlist.c
++++ b/lib/scatterlist.c
+@@ -499,7 +499,7 @@ void sg_miter_stop(struct sg_mapping_ite
+ 			flush_kernel_dcache_page(miter->page);
+ 
+ 		if (miter->__flags & SG_MITER_ATOMIC) {
+-			WARN_ON_ONCE(preemptible());
++			WARN_ON_ONCE(!pagefault_disabled());
+ 			kunmap_atomic(miter->addr);
+ 		} else
+ 			kunmap(miter->page);
+@@ -539,7 +539,7 @@ static size_t sg_copy_buffer(struct scat
+ 
+ 	sg_miter_start(&miter, sgl, nents, sg_flags);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	while (sg_miter_next(&miter) && offset < buflen) {
+ 		unsigned int len;
+@@ -556,7 +556,7 @@ static size_t sg_copy_buffer(struct scat
+ 
+ 	sg_miter_stop(&miter);
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return offset;
+ }
+ 
diff --git a/patches/mm-shrink-the-page-frame-to-rt-size.patch b/patches/mm-shrink-the-page-frame-to-rt-size.patch
new file mode 100644
index 0000000..80f968b
--- /dev/null
+++ b/patches/mm-shrink-the-page-frame-to-rt-size.patch
@@ -0,0 +1,140 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 3 Jul 2009 08:44:54 -0500
+Subject: mm: shrink the page frame to !-rt size
+
+He below is a boot-tested hack to shrink the page frame size back to
+normal.
+
+Should be a net win since there should be many less PTE-pages than
+page-frames.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/mm.h       |   46 +++++++++++++++++++++++++++++++++++++++-------
+ include/linux/mm_types.h |    4 ++++
+ mm/memory.c              |   32 ++++++++++++++++++++++++++++++++
+ 3 files changed, 75 insertions(+), 7 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1259,27 +1259,59 @@ static inline pmd_t *pmd_alloc(struct mm
+  * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+  * When freeing, reset page->mapping so free_pages_check won't complain.
+  */
++#ifndef CONFIG_PREEMPT_RT_FULL
++
+ #define __pte_lockptr(page)	&((page)->ptl)
+-#define pte_lock_init(_page)	do {					\
+-	spin_lock_init(__pte_lockptr(_page));				\
+-} while (0)
++
++static inline struct page *pte_lock_init(struct page *page)
++{
++	spin_lock_init(__pte_lockptr(page));
++	return page;
++}
++
+ #define pte_lock_deinit(page)	((page)->mapping = NULL)
++
++#else /* !PREEMPT_RT_FULL */
++
++/*
++ * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the
++ * page frame, hence it only has a pointer and we need to dynamically
++ * allocate the lock when we allocate PTE-pages.
++ *
++ * This is an overall win, since only a small fraction of the pages
++ * will be PTE pages under normal circumstances.
++ */
++
++#define __pte_lockptr(page)	((page)->ptl)
++
++extern struct page *pte_lock_init(struct page *page);
++extern void pte_lock_deinit(struct page *page);
++
++#endif /* PREEMPT_RT_FULL */
++
+ #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+ #else	/* !USE_SPLIT_PTLOCKS */
+ /*
+  * We use mm->page_table_lock to guard all pagetable pages of the mm.
+  */
+-#define pte_lock_init(page)	do {} while (0)
++static inline struct page *pte_lock_init(struct page *page) { return page; }
+ #define pte_lock_deinit(page)	do {} while (0)
+ #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
+ #endif /* USE_SPLIT_PTLOCKS */
+ 
+-static inline void pgtable_page_ctor(struct page *page)
++static inline struct page *__pgtable_page_ctor(struct page *page)
+ {
+-	pte_lock_init(page);
+-	inc_zone_page_state(page, NR_PAGETABLE);
++	page = pte_lock_init(page);
++	if (page)
++		inc_zone_page_state(page, NR_PAGETABLE);
++	return page;
+ }
+ 
++#define pgtable_page_ctor(page)				\
++do {							\
++	page = __pgtable_page_ctor(page);		\
++} while (0)
++
+ static inline void pgtable_page_dtor(struct page *page)
+ {
+ 	pte_lock_deinit(page);
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -141,7 +141,11 @@ struct page {
+ 						 * system if PG_buddy is set.
+ 						 */
+ #if USE_SPLIT_PTLOCKS
++# ifndef CONFIG_PREEMPT_RT_FULL
+ 		spinlock_t ptl;
++# else
++		spinlock_t *ptl;
++# endif
+ #endif
+ 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
+ 		struct page *first_page;	/* Compound tail pages */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4264,3 +4264,35 @@ void copy_user_huge_page(struct page *ds
+ 	}
+ }
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
++
++#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
++/*
++ * Heinous hack, relies on the caller doing something like:
++ *
++ *   pte = alloc_pages(PGALLOC_GFP, 0);
++ *   if (pte)
++ *     pgtable_page_ctor(pte);
++ *   return pte;
++ *
++ * This ensures we release the page and return NULL when the
++ * lock allocation fails.
++ */
++struct page *pte_lock_init(struct page *page)
++{
++	page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
++	if (page->ptl) {
++		spin_lock_init(__pte_lockptr(page));
++	} else {
++		__free_page(page);
++		page = NULL;
++	}
++	return page;
++}
++
++void pte_lock_deinit(struct page *page)
++{
++	kfree(page->ptl);
++	page->mapping = NULL;
++}
++
++#endif
diff --git a/patches/mm-slab-more-lock-breaks.patch b/patches/mm-slab-more-lock-breaks.patch
new file mode 100644
index 0000000..455b7fd
--- /dev/null
+++ b/patches/mm-slab-more-lock-breaks.patch
@@ -0,0 +1,229 @@
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 3 Jul 2009 08:44:43 -0500
+Subject: mm: More lock breaks in slab.c
+
+Handle __free_pages outside of the locked regions. This reduces the
+lock contention on the percpu slab locks in -rt significantly.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slab.c |   82 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 64 insertions(+), 18 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -697,6 +697,7 @@ static void slab_set_debugobj_lock_class
+ #endif
+ 
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
++static DEFINE_PER_CPU(struct list_head, slab_free_list);
+ static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
+ 
+ #ifndef CONFIG_PREEMPT_RT_BASE
+@@ -729,6 +730,34 @@ static void unlock_slab_on(unsigned int
+ }
+ #endif
+ 
++static void free_delayed(struct list_head *h)
++{
++	while(!list_empty(h)) {
++		struct page *page = list_first_entry(h, struct page, lru);
++
++		list_del(&page->lru);
++		__free_pages(page, page->index);
++	}
++}
++
++static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
++{
++	LIST_HEAD(tmp);
++
++	list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
++	local_spin_unlock_irq(slab_lock, list_lock);
++	free_delayed(&tmp);
++}
++
++static void unlock_slab_and_free_delayed(unsigned long flags)
++{
++	LIST_HEAD(tmp);
++
++	list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
++	local_unlock_irqrestore(slab_lock, flags);
++	free_delayed(&tmp);
++}
++
+ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
+ {
+ 	return cachep->array[smp_processor_id()];
+@@ -1345,7 +1374,7 @@ static void __cpuinit cpuup_canceled(lon
+ 			free_block(cachep, nc->entry, nc->avail, node);
+ 
+ 		if (!cpumask_empty(mask)) {
+-			local_spin_unlock_irq(slab_lock, &l3->list_lock);
++			unlock_l3_and_free_delayed(&l3->list_lock);
+ 			goto free_array_cache;
+ 		}
+ 
+@@ -1359,7 +1388,7 @@ static void __cpuinit cpuup_canceled(lon
+ 		alien = l3->alien;
+ 		l3->alien = NULL;
+ 
+-		local_spin_unlock_irq(slab_lock, &l3->list_lock);
++		unlock_l3_and_free_delayed(&l3->list_lock);
+ 
+ 		kfree(shared);
+ 		if (alien) {
+@@ -1652,6 +1681,8 @@ void __init kmem_cache_init(void)
+ 		use_alien_caches = 0;
+ 
+ 	local_irq_lock_init(slab_lock);
++	for_each_possible_cpu(i)
++		INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
+ 
+ 	for (i = 0; i < NUM_INIT_LISTS; i++)
+ 		kmem_list3_init(&initkmem_list3[i]);
+@@ -1953,12 +1984,14 @@ static void *kmem_getpages(struct kmem_c
+ /*
+  * Interface to system's page release.
+  */
+-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
++static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
+ {
+ 	unsigned long i = (1 << cachep->gfporder);
+-	struct page *page = virt_to_page(addr);
++	struct page *page, *basepage = virt_to_page(addr);
+ 	const unsigned long nr_freed = i;
+ 
++	page = basepage;
++
+ 	kmemcheck_free_shadow(page, cachep->gfporder);
+ 
+ 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+@@ -1977,7 +2010,12 @@ static void kmem_freepages(struct kmem_c
+ 	memcg_release_pages(cachep, cachep->gfporder);
+ 	if (current->reclaim_state)
+ 		current->reclaim_state->reclaimed_slab += nr_freed;
+-	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
++	if (!delayed) {
++		free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
++	} else {
++		basepage->index = cachep->gfporder;
++		list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
++	}
+ }
+ 
+ static void kmem_rcu_free(struct rcu_head *head)
+@@ -1985,7 +2023,7 @@ static void kmem_rcu_free(struct rcu_hea
+ 	struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
+ 	struct kmem_cache *cachep = slab_rcu->cachep;
+ 
+-	kmem_freepages(cachep, slab_rcu->addr);
++	kmem_freepages(cachep, slab_rcu->addr, false);
+ 	if (OFF_SLAB(cachep))
+ 		kmem_cache_free(cachep->slabp_cache, slab_rcu);
+ }
+@@ -2204,7 +2242,8 @@ static void slab_destroy_debugcheck(stru
+  * Before calling the slab must have been unlinked from the cache.  The
+  * cache-lock is not held/needed.
+  */
+-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
++static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
++			 bool delayed)
+ {
+ 	void *addr = slabp->s_mem - slabp->colouroff;
+ 
+@@ -2217,7 +2256,7 @@ static void slab_destroy(struct kmem_cac
+ 		slab_rcu->addr = addr;
+ 		call_rcu(&slab_rcu->head, kmem_rcu_free);
+ 	} else {
+-		kmem_freepages(cachep, addr);
++		kmem_freepages(cachep, addr, delayed);
+ 		if (OFF_SLAB(cachep))
+ 			kmem_cache_free(cachep->slabp_cache, slabp);
+ 	}
+@@ -2628,9 +2667,15 @@ static void do_drain(void *arg)
+ 	__do_drain(arg, smp_processor_id());
+ }
+ #else
+-static void do_drain(void *arg, int this_cpu)
++static void do_drain(void *arg, int cpu)
+ {
+-	__do_drain(arg, this_cpu);
++	LIST_HEAD(tmp);
++
++	lock_slab_on(cpu);
++	__do_drain(arg, cpu);
++	list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
++	unlock_slab_on(cpu);
++	free_delayed(&tmp);
+ }
+ #endif
+ 
+@@ -2688,7 +2733,7 @@ static int drain_freelist(struct kmem_ca
+ 		 */
+ 		l3->free_objects -= cache->num;
+ 		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+-		slab_destroy(cache, slabp);
++		slab_destroy(cache, slabp, false);
+ 		nr_freed++;
+ 	}
+ out:
+@@ -3003,7 +3048,7 @@ static int cache_grow(struct kmem_cache
+ 	spin_unlock(&l3->list_lock);
+ 	return 1;
+ opps1:
+-	kmem_freepages(cachep, objp);
++	kmem_freepages(cachep, objp, false);
+ failed:
+ 	if (local_flags & __GFP_WAIT)
+ 		local_lock_irq(slab_lock);
+@@ -3684,7 +3729,7 @@ static void free_block(struct kmem_cache
+ 				 * a different cache, refer to comments before
+ 				 * alloc_slabmgmt.
+ 				 */
+-				slab_destroy(cachep, slabp);
++				slab_destroy(cachep, slabp, true);
+ 			} else {
+ 				list_add(&slabp->list, &l3->slabs_free);
+ 			}
+@@ -3952,7 +3997,7 @@ void kmem_cache_free(struct kmem_cache *
+ 		debug_check_no_obj_freed(objp, cachep->object_size);
+ 	local_lock_irqsave(slab_lock, flags);
+ 	__cache_free(cachep, objp, _RET_IP_);
+-	local_unlock_irqrestore(slab_lock, flags);
++	unlock_slab_and_free_delayed(flags);
+ 
+ 	trace_kmem_cache_free(_RET_IP_, objp);
+ }
+@@ -3983,7 +4028,7 @@ void kfree(const void *objp)
+ 	debug_check_no_obj_freed(objp, c->object_size);
+ 	local_lock_irqsave(slab_lock, flags);
+ 	__cache_free(c, (void *)objp, _RET_IP_);
+-	local_unlock_irqrestore(slab_lock, flags);
++	unlock_slab_and_free_delayed(flags);
+ }
+ EXPORT_SYMBOL(kfree);
+ 
+@@ -4033,7 +4078,8 @@ static int alloc_kmemlist(struct kmem_ca
+ 			}
+ 			l3->free_limit = (1 + nr_cpus_node(node)) *
+ 					cachep->batchcount + cachep->num;
+-			local_spin_unlock_irq(slab_lock, &l3->list_lock);
++			unlock_l3_and_free_delayed(&l3->list_lock);
++
+ 			kfree(shared);
+ 			free_alien_cache(new_alien);
+ 			continue;
+@@ -4141,8 +4187,8 @@ static int __do_tune_cpucache(struct kme
+ 		local_spin_lock_irq(slab_lock,
+ 				    &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ 		free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+-		local_spin_unlock_irq(slab_lock,
+-				      &cachep->nodelists[cpu_to_mem(i)]->list_lock);
++
++		unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ 		kfree(ccold);
+ 	}
+ 	kfree(new);
diff --git a/patches/mm-slab-move-debug-out.patch b/patches/mm-slab-move-debug-out.patch
new file mode 100644
index 0000000..5f2ff42
--- /dev/null
+++ b/patches/mm-slab-move-debug-out.patch
@@ -0,0 +1,37 @@
+Subject: mm-slab-move-debug-out.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 20 Jun 2011 10:42:04 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/slab.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -3895,10 +3895,10 @@ void kmem_cache_free(struct kmem_cache *
+ 	if (!cachep)
+ 		return;
+ 
+-	local_irq_save(flags);
+ 	debug_check_no_locks_freed(objp, cachep->object_size);
+ 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
+ 		debug_check_no_obj_freed(objp, cachep->object_size);
++	local_irq_save(flags);
+ 	__cache_free(cachep, objp, _RET_IP_);
+ 	local_irq_restore(flags);
+ 
+@@ -3924,12 +3924,12 @@ void kfree(const void *objp)
+ 
+ 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
+ 		return;
+-	local_irq_save(flags);
+ 	kfree_debugcheck(objp);
+ 	c = virt_to_cache(objp);
+ 	debug_check_no_locks_freed(objp, c->object_size);
+ 
+ 	debug_check_no_obj_freed(objp, c->object_size);
++	local_irq_save(flags);
+ 	__cache_free(c, (void *)objp, _RET_IP_);
+ 	local_irq_restore(flags);
+ }
diff --git a/patches/mm-slab-wrap-functions.patch b/patches/mm-slab-wrap-functions.patch
new file mode 100644
index 0000000..b6f823a
--- /dev/null
+++ b/patches/mm-slab-wrap-functions.patch
@@ -0,0 +1,458 @@
+Subject: mm-slab-wrap-functions.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 18 Jun 2011 19:44:43 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/slab.c |  163 +++++++++++++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 114 insertions(+), 49 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -116,6 +116,7 @@
+ #include	<linux/kmemcheck.h>
+ #include	<linux/memory.h>
+ #include	<linux/prefetch.h>
++#include	<linux/locallock.h>
+ 
+ #include	<net/sock.h>
+ 
+@@ -696,12 +697,49 @@ static void slab_set_debugobj_lock_class
+ #endif
+ 
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
++static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++# define slab_on_each_cpu(func, cp)	on_each_cpu(func, cp, 1)
++#else
++/*
++ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
++ * to run on the remote CPUs - we only have to take their CPU-locks.
++ * (This is a rare operation, so cacheline bouncing is not an issue.)
++ */
++static void
++slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
++{
++	unsigned int i;
++
++	get_cpu_light();
++	for_each_online_cpu(i)
++		func(arg, i);
++	put_cpu_light();
++}
++
++static void lock_slab_on(unsigned int cpu)
++{
++	local_lock_irq_on(slab_lock, cpu);
++}
++
++static void unlock_slab_on(unsigned int cpu)
++{
++	local_unlock_irq_on(slab_lock, cpu);
++}
++#endif
+ 
+ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
+ {
+ 	return cachep->array[smp_processor_id()];
+ }
+ 
++static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
++						       int cpu)
++{
++	return cachep->array[cpu];
++}
++
+ static inline struct kmem_cache *__find_general_cachep(size_t size,
+ 							gfp_t gfpflags)
+ {
+@@ -1171,9 +1209,10 @@ static void reap_alien(struct kmem_cache
+ 	if (l3->alien) {
+ 		struct array_cache *ac = l3->alien[node];
+ 
+-		if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
++		if (ac && ac->avail &&
++		    local_spin_trylock_irq(slab_lock, &ac->lock)) {
+ 			__drain_alien_cache(cachep, ac, node);
+-			spin_unlock_irq(&ac->lock);
++			local_spin_unlock_irq(slab_lock, &ac->lock);
+ 		}
+ 	}
+ }
+@@ -1188,9 +1227,9 @@ static void drain_alien_cache(struct kme
+ 	for_each_online_node(i) {
+ 		ac = alien[i];
+ 		if (ac) {
+-			spin_lock_irqsave(&ac->lock, flags);
++			local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
+ 			__drain_alien_cache(cachep, ac, i);
+-			spin_unlock_irqrestore(&ac->lock, flags);
++			local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
+ 		}
+ 	}
+ }
+@@ -1269,11 +1308,11 @@ static int init_cache_nodelists_node(int
+ 			cachep->nodelists[node] = l3;
+ 		}
+ 
+-		spin_lock_irq(&cachep->nodelists[node]->list_lock);
++		local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
+ 		cachep->nodelists[node]->free_limit =
+ 			(1 + nr_cpus_node(node)) *
+ 			cachep->batchcount + cachep->num;
+-		spin_unlock_irq(&cachep->nodelists[node]->list_lock);
++		local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
+ 	}
+ 	return 0;
+ }
+@@ -1298,7 +1337,7 @@ static void __cpuinit cpuup_canceled(lon
+ 		if (!l3)
+ 			goto free_array_cache;
+ 
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 
+ 		/* Free limit for this kmem_list3 */
+ 		l3->free_limit -= cachep->batchcount;
+@@ -1306,7 +1345,7 @@ static void __cpuinit cpuup_canceled(lon
+ 			free_block(cachep, nc->entry, nc->avail, node);
+ 
+ 		if (!cpumask_empty(mask)) {
+-			spin_unlock_irq(&l3->list_lock);
++			local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 			goto free_array_cache;
+ 		}
+ 
+@@ -1320,7 +1359,7 @@ static void __cpuinit cpuup_canceled(lon
+ 		alien = l3->alien;
+ 		l3->alien = NULL;
+ 
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 
+ 		kfree(shared);
+ 		if (alien) {
+@@ -1394,7 +1433,7 @@ static int __cpuinit cpuup_prepare(long
+ 		l3 = cachep->nodelists[node];
+ 		BUG_ON(!l3);
+ 
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 		if (!l3->shared) {
+ 			/*
+ 			 * We are serialised from CPU_DEAD or
+@@ -1409,7 +1448,7 @@ static int __cpuinit cpuup_prepare(long
+ 			alien = NULL;
+ 		}
+ #endif
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 		kfree(shared);
+ 		free_alien_cache(alien);
+ 		if (cachep->flags & SLAB_DEBUG_OBJECTS)
+@@ -1612,6 +1651,8 @@ void __init kmem_cache_init(void)
+ 	if (num_possible_nodes() == 1)
+ 		use_alien_caches = 0;
+ 
++	local_irq_lock_init(slab_lock);
++
+ 	for (i = 0; i < NUM_INIT_LISTS; i++)
+ 		kmem_list3_init(&initkmem_list3[i]);
+ 
+@@ -2533,7 +2574,7 @@ __kmem_cache_create (struct kmem_cache *
+ #if DEBUG
+ static void check_irq_off(void)
+ {
+-	BUG_ON(!irqs_disabled());
++	BUG_ON_NONRT(!irqs_disabled());
+ }
+ 
+ static void check_irq_on(void)
+@@ -2568,26 +2609,37 @@ static void drain_array(struct kmem_cach
+ 			struct array_cache *ac,
+ 			int force, int node);
+ 
+-static void do_drain(void *arg)
++static void __do_drain(void *arg, unsigned int cpu)
+ {
+ 	struct kmem_cache *cachep = arg;
+ 	struct array_cache *ac;
+-	int node = numa_mem_id();
++	int node = cpu_to_mem(cpu);
+ 
+-	check_irq_off();
+-	ac = cpu_cache_get(cachep);
++	ac = cpu_cache_get_on_cpu(cachep, cpu);
+ 	spin_lock(&cachep->nodelists[node]->list_lock);
+ 	free_block(cachep, ac->entry, ac->avail, node);
+ 	spin_unlock(&cachep->nodelists[node]->list_lock);
+ 	ac->avail = 0;
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT_BASE
++static void do_drain(void *arg)
++{
++	__do_drain(arg, smp_processor_id());
++}
++#else
++static void do_drain(void *arg, int this_cpu)
++{
++	__do_drain(arg, this_cpu);
++}
++#endif
++
+ static void drain_cpu_caches(struct kmem_cache *cachep)
+ {
+ 	struct kmem_list3 *l3;
+ 	int node;
+ 
+-	on_each_cpu(do_drain, cachep, 1);
++	slab_on_each_cpu(do_drain, cachep);
+ 	check_irq_on();
+ 	for_each_online_node(node) {
+ 		l3 = cachep->nodelists[node];
+@@ -2618,10 +2670,10 @@ static int drain_freelist(struct kmem_ca
+ 	nr_freed = 0;
+ 	while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+ 
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 		p = l3->slabs_free.prev;
+ 		if (p == &l3->slabs_free) {
+-			spin_unlock_irq(&l3->list_lock);
++			local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 			goto out;
+ 		}
+ 
+@@ -2635,7 +2687,7 @@ static int drain_freelist(struct kmem_ca
+ 		 * to the cache.
+ 		 */
+ 		l3->free_objects -= cache->num;
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 		slab_destroy(cache, slabp);
+ 		nr_freed++;
+ 	}
+@@ -2910,7 +2962,7 @@ static int cache_grow(struct kmem_cache
+ 	offset *= cachep->colour_off;
+ 
+ 	if (local_flags & __GFP_WAIT)
+-		local_irq_enable();
++		local_unlock_irq(slab_lock);
+ 
+ 	/*
+ 	 * The test for missing atomic flag is performed here, rather than
+@@ -2940,7 +2992,7 @@ static int cache_grow(struct kmem_cache
+ 	cache_init_objs(cachep, slabp);
+ 
+ 	if (local_flags & __GFP_WAIT)
+-		local_irq_disable();
++		local_lock_irq(slab_lock);
+ 	check_irq_off();
+ 	spin_lock(&l3->list_lock);
+ 
+@@ -2954,7 +3006,7 @@ opps1:
+ 	kmem_freepages(cachep, objp);
+ failed:
+ 	if (local_flags & __GFP_WAIT)
+-		local_irq_disable();
++		local_lock_irq(slab_lock);
+ 	return 0;
+ }
+ 
+@@ -3368,11 +3420,11 @@ retry:
+ 		 * set and go into memory reserves if necessary.
+ 		 */
+ 		if (local_flags & __GFP_WAIT)
+-			local_irq_enable();
++			local_unlock_irq(slab_lock);
+ 		kmem_flagcheck(cache, flags);
+ 		obj = kmem_getpages(cache, local_flags, numa_mem_id());
+ 		if (local_flags & __GFP_WAIT)
+-			local_irq_disable();
++			local_lock_irq(slab_lock);
+ 		if (obj) {
+ 			/*
+ 			 * Insert into the appropriate per node queues
+@@ -3492,7 +3544,7 @@ slab_alloc_node(struct kmem_cache *cache
+ 	cachep = memcg_kmem_get_cache(cachep, flags);
+ 
+ 	cache_alloc_debugcheck_before(cachep, flags);
+-	local_irq_save(save_flags);
++	local_lock_irqsave(slab_lock, save_flags);
+ 
+ 	if (nodeid == NUMA_NO_NODE)
+ 		nodeid = slab_node;
+@@ -3517,7 +3569,7 @@ slab_alloc_node(struct kmem_cache *cache
+ 	/* ___cache_alloc_node can fall back to other nodes */
+ 	ptr = ____cache_alloc_node(cachep, flags, nodeid);
+   out:
+-	local_irq_restore(save_flags);
++	local_unlock_irqrestore(slab_lock, save_flags);
+ 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+ 	kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
+ 				 flags);
+@@ -3579,9 +3631,9 @@ slab_alloc(struct kmem_cache *cachep, gf
+ 	cachep = memcg_kmem_get_cache(cachep, flags);
+ 
+ 	cache_alloc_debugcheck_before(cachep, flags);
+-	local_irq_save(save_flags);
++	local_lock_irqsave(slab_lock, save_flags);
+ 	objp = __do_cache_alloc(cachep, flags);
+-	local_irq_restore(save_flags);
++	local_unlock_irqrestore(slab_lock, save_flags);
+ 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ 	kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
+ 				 flags);
+@@ -3898,9 +3950,9 @@ void kmem_cache_free(struct kmem_cache *
+ 	debug_check_no_locks_freed(objp, cachep->object_size);
+ 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
+ 		debug_check_no_obj_freed(objp, cachep->object_size);
+-	local_irq_save(flags);
++	local_lock_irqsave(slab_lock, flags);
+ 	__cache_free(cachep, objp, _RET_IP_);
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(slab_lock, flags);
+ 
+ 	trace_kmem_cache_free(_RET_IP_, objp);
+ }
+@@ -3929,9 +3981,9 @@ void kfree(const void *objp)
+ 	debug_check_no_locks_freed(objp, c->object_size);
+ 
+ 	debug_check_no_obj_freed(objp, c->object_size);
+-	local_irq_save(flags);
++	local_lock_irqsave(slab_lock, flags);
+ 	__cache_free(c, (void *)objp, _RET_IP_);
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(slab_lock, flags);
+ }
+ EXPORT_SYMBOL(kfree);
+ 
+@@ -3968,7 +4020,7 @@ static int alloc_kmemlist(struct kmem_ca
+ 		if (l3) {
+ 			struct array_cache *shared = l3->shared;
+ 
+-			spin_lock_irq(&l3->list_lock);
++			local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 
+ 			if (shared)
+ 				free_block(cachep, shared->entry,
+@@ -3981,7 +4033,7 @@ static int alloc_kmemlist(struct kmem_ca
+ 			}
+ 			l3->free_limit = (1 + nr_cpus_node(node)) *
+ 					cachep->batchcount + cachep->num;
+-			spin_unlock_irq(&l3->list_lock);
++			local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 			kfree(shared);
+ 			free_alien_cache(new_alien);
+ 			continue;
+@@ -4028,17 +4080,28 @@ struct ccupdate_struct {
+ 	struct array_cache *new[0];
+ };
+ 
+-static void do_ccupdate_local(void *info)
++static void __do_ccupdate_local(void *info, int cpu)
+ {
+ 	struct ccupdate_struct *new = info;
+ 	struct array_cache *old;
+ 
+-	check_irq_off();
+-	old = cpu_cache_get(new->cachep);
++	old = cpu_cache_get_on_cpu(new->cachep, cpu);
+ 
+-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
+-	new->new[smp_processor_id()] = old;
++	new->cachep->array[cpu] = new->new[cpu];
++	new->new[cpu] = old;
++}
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++static void do_ccupdate_local(void *info)
++{
++	__do_ccupdate_local(info, smp_processor_id());
+ }
++#else
++static void do_ccupdate_local(void *info, int cpu)
++{
++	__do_ccupdate_local(info, cpu);
++}
++#endif
+ 
+ /* Always called with the slab_mutex held */
+ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
+@@ -4064,7 +4127,7 @@ static int __do_tune_cpucache(struct kme
+ 	}
+ 	new->cachep = cachep;
+ 
+-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
++	slab_on_each_cpu(do_ccupdate_local, (void *)new);
+ 
+ 	check_irq_on();
+ 	cachep->batchcount = batchcount;
+@@ -4075,9 +4138,11 @@ static int __do_tune_cpucache(struct kme
+ 		struct array_cache *ccold = new->new[i];
+ 		if (!ccold)
+ 			continue;
+-		spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
++		local_spin_lock_irq(slab_lock,
++				    &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ 		free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+-		spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
++		local_spin_unlock_irq(slab_lock,
++				      &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ 		kfree(ccold);
+ 	}
+ 	kfree(new);
+@@ -4192,7 +4257,7 @@ static void drain_array(struct kmem_cach
+ 	if (ac->touched && !force) {
+ 		ac->touched = 0;
+ 	} else {
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 		if (ac->avail) {
+ 			tofree = force ? ac->avail : (ac->limit + 4) / 5;
+ 			if (tofree > ac->avail)
+@@ -4202,7 +4267,7 @@ static void drain_array(struct kmem_cach
+ 			memmove(ac->entry, &(ac->entry[tofree]),
+ 				sizeof(void *) * ac->avail);
+ 		}
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 	}
+ }
+ 
+@@ -4295,7 +4360,7 @@ void get_slabinfo(struct kmem_cache *cac
+ 			continue;
+ 
+ 		check_irq_on();
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 
+ 		list_for_each_entry(slabp, &l3->slabs_full, list) {
+ 			if (slabp->inuse != cachep->num && !error)
+@@ -4320,7 +4385,7 @@ void get_slabinfo(struct kmem_cache *cac
+ 		if (l3->shared)
+ 			shared_avail += l3->shared->avail;
+ 
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 	}
+ 	num_slabs += active_slabs;
+ 	num_objs = num_slabs * cachep->num;
+@@ -4520,13 +4585,13 @@ static int leaks_show(struct seq_file *m
+ 			continue;
+ 
+ 		check_irq_on();
+-		spin_lock_irq(&l3->list_lock);
++		local_spin_lock_irq(slab_lock, &l3->list_lock);
+ 
+ 		list_for_each_entry(slabp, &l3->slabs_full, list)
+ 			handle_slab(n, cachep, slabp);
+ 		list_for_each_entry(slabp, &l3->slabs_partial, list)
+ 			handle_slab(n, cachep, slabp);
+-		spin_unlock_irq(&l3->list_lock);
++		local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ 	}
+ 	name = cachep->name;
+ 	if (n[0] == n[1]) {
diff --git a/patches/mm-vmalloc-use-get-cpu-light.patch b/patches/mm-vmalloc-use-get-cpu-light.patch
new file mode 100644
index 0000000..61f6d7f
--- /dev/null
+++ b/patches/mm-vmalloc-use-get-cpu-light.patch
@@ -0,0 +1,64 @@
+Subject: mm-vmalloc.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2011 11:39:36 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/vmalloc.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -782,7 +782,7 @@ static struct vmap_block *new_vmap_block
+ 	struct vmap_block *vb;
+ 	struct vmap_area *va;
+ 	unsigned long vb_idx;
+-	int node, err;
++	int node, err, cpu;
+ 
+ 	node = numa_node_id();
+ 
+@@ -821,12 +821,13 @@ static struct vmap_block *new_vmap_block
+ 	BUG_ON(err);
+ 	radix_tree_preload_end();
+ 
+-	vbq = &get_cpu_var(vmap_block_queue);
++	cpu = get_cpu_light();
++	vbq = &__get_cpu_var(vmap_block_queue);
+ 	vb->vbq = vbq;
+ 	spin_lock(&vbq->lock);
+ 	list_add_rcu(&vb->free_list, &vbq->free);
+ 	spin_unlock(&vbq->lock);
+-	put_cpu_var(vmap_block_queue);
++	put_cpu_light();
+ 
+ 	return vb;
+ }
+@@ -900,7 +901,7 @@ static void *vb_alloc(unsigned long size
+ 	struct vmap_block *vb;
+ 	unsigned long addr = 0;
+ 	unsigned int order;
+-	int purge = 0;
++	int purge = 0, cpu;
+ 
+ 	BUG_ON(size & ~PAGE_MASK);
+ 	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -916,7 +917,8 @@ static void *vb_alloc(unsigned long size
+ 
+ again:
+ 	rcu_read_lock();
+-	vbq = &get_cpu_var(vmap_block_queue);
++	cpu = get_cpu_light();
++	vbq = &__get_cpu_var(vmap_block_queue);
+ 	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ 		int i;
+ 
+@@ -953,7 +955,7 @@ next:
+ 	if (purge)
+ 		purge_fragmented_blocks_thiscpu();
+ 
+-	put_cpu_var(vmap_block_queue);
++	put_cpu_light();
+ 	rcu_read_unlock();
+ 
+ 	if (!addr) {
diff --git a/patches/mmci-remove-bogus-irq-save.patch b/patches/mmci-remove-bogus-irq-save.patch
new file mode 100644
index 0000000..9250895
--- /dev/null
+++ b/patches/mmci-remove-bogus-irq-save.patch
@@ -0,0 +1,39 @@
+Subject: mmci: Remove bogus local_irq_save()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 12:11:12 +0100
+
+On !RT interrupt runs with interrupts disabled. On RT it's in a
+thread, so no need to disable interrupts at all.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/mmc/host/mmci.c |    5 -----
+ 1 file changed, 5 deletions(-)
+
+--- a/drivers/mmc/host/mmci.c
++++ b/drivers/mmc/host/mmci.c
+@@ -930,15 +930,12 @@ static irqreturn_t mmci_pio_irq(int irq,
+ 	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+ 	struct variant_data *variant = host->variant;
+ 	void __iomem *base = host->base;
+-	unsigned long flags;
+ 	u32 status;
+ 
+ 	status = readl(base + MMCISTATUS);
+ 
+ 	dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
+ 
+-	local_irq_save(flags);
+-
+ 	do {
+ 		unsigned int remain, len;
+ 		char *buffer;
+@@ -978,8 +975,6 @@ static irqreturn_t mmci_pio_irq(int irq,
+ 
+ 	sg_miter_stop(sg_miter);
+ 
+-	local_irq_restore(flags);
+-
+ 	/*
+ 	 * If we have less than the fifo 'half-full' threshold to transfer,
+ 	 * trigger a PIO interrupt as soon as any data is available.
diff --git a/patches/mutex-no-spin-on-rt.patch b/patches/mutex-no-spin-on-rt.patch
new file mode 100644
index 0000000..23b771d
--- /dev/null
+++ b/patches/mutex-no-spin-on-rt.patch
@@ -0,0 +1,17 @@
+Subject: mutex-no-spin-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:51:45 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/Kconfig.locks |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/Kconfig.locks
++++ b/kernel/Kconfig.locks
+@@ -222,4 +222,4 @@ endif
+ 
+ config MUTEX_SPIN_ON_OWNER
+ 	def_bool y
+-	depends on SMP && !DEBUG_MUTEXES
++	depends on SMP && !DEBUG_MUTEXES && !PREEMPT_RT_FULL
diff --git a/patches/net-another-local-irq-disable-alloc-atomic-headache.patch b/patches/net-another-local-irq-disable-alloc-atomic-headache.patch
new file mode 100644
index 0000000..e14da83
--- /dev/null
+++ b/patches/net-another-local-irq-disable-alloc-atomic-headache.patch
@@ -0,0 +1,47 @@
+Subject: net: Another local_irq_disable/kmalloc headache
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 26 Sep 2012 16:21:08 +0200
+
+Replace it by a local lock. Though that's pretty inefficient :(
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/skbuff.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -60,6 +60,7 @@
+ #include <linux/scatterlist.h>
+ #include <linux/errqueue.h>
+ #include <linux/prefetch.h>
++#include <linux/locallock.h>
+ 
+ #include <net/protocol.h>
+ #include <net/dst.h>
+@@ -347,6 +348,7 @@ struct netdev_alloc_cache {
+ 	unsigned int		pagecnt_bias;
+ };
+ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
++static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
+ 
+ #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+ #define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+@@ -359,7 +361,7 @@ static void *__netdev_alloc_frag(unsigne
+ 	int order;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(netdev_alloc_lock, flags);
+ 	nc = &__get_cpu_var(netdev_alloc_cache);
+ 	if (unlikely(!nc->frag.page)) {
+ refill:
+@@ -393,7 +395,7 @@ recycle:
+ 	nc->frag.offset += fragsz;
+ 	nc->pagecnt_bias--;
+ end:
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(netdev_alloc_lock, flags);
+ 	return data;
+ }
+ 
diff --git a/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch b/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch
new file mode 100644
index 0000000..4234945
--- /dev/null
+++ b/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch
@@ -0,0 +1,94 @@
+Subject: net: netfilter: Serialize xt_write_recseq sections on RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 11:18:08 +0100
+
+The netfilter code relies only on the implicit semantics of
+local_bh_disable() for serializing wt_write_recseq sections. RT breaks
+that and needs explicit serialization here.
+
+Reported-by: Peter LaDow <petela@gocougs.wsu.edu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/locallock.h          |    4 ++++
+ include/linux/netfilter/x_tables.h |    7 +++++++
+ net/netfilter/core.c               |    6 ++++++
+ 3 files changed, 17 insertions(+)
+
+--- a/include/linux/locallock.h
++++ b/include/linux/locallock.h
+@@ -25,6 +25,9 @@ struct local_irq_lock {
+ 	DEFINE_PER_CPU(struct local_irq_lock, lvar) = {			\
+ 		.lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
+ 
++#define DECLARE_LOCAL_IRQ_LOCK(lvar)					\
++	DECLARE_PER_CPU(struct local_irq_lock, lvar)
++
+ #define local_irq_lock_init(lvar)					\
+ 	do {								\
+ 		int __cpu;						\
+@@ -220,6 +223,7 @@ static inline int __local_unlock_irqrest
+ #else /* PREEMPT_RT_BASE */
+ 
+ #define DEFINE_LOCAL_IRQ_LOCK(lvar)		__typeof__(const int) lvar
++#define DECLARE_LOCAL_IRQ_LOCK(lvar)		extern __typeof__(const int) lvar
+ 
+ static inline void local_irq_lock_init(int lvar) { }
+ 
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -3,6 +3,7 @@
+ 
+ 
+ #include <linux/netdevice.h>
++#include <linux/locallock.h>
+ #include <uapi/linux/netfilter/x_tables.h>
+ 
+ /**
+@@ -284,6 +285,8 @@ extern void xt_free_table_info(struct xt
+  */
+ DECLARE_PER_CPU(seqcount_t, xt_recseq);
+ 
++DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
++
+ /**
+  * xt_write_recseq_begin - start of a write section
+  *
+@@ -298,6 +301,9 @@ static inline unsigned int xt_write_recs
+ {
+ 	unsigned int addend;
+ 
++	/* RT protection */
++	local_lock(xt_write_lock);
++
+ 	/*
+ 	 * Low order bit of sequence is set if we already
+ 	 * called xt_write_recseq_begin().
+@@ -328,6 +334,7 @@ static inline void xt_write_recseq_end(u
+ 	/* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
+ 	smp_wmb();
+ 	__this_cpu_add(xt_recseq.sequence, addend);
++	local_unlock(xt_write_lock);
+ }
+ 
+ /*
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -20,11 +20,17 @@
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
++#include <linux/locallock.h>
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+ 
+ #include "nf_internals.h"
+ 
++#ifdef CONFIG_PREEMPT_RT_BASE
++DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
++EXPORT_PER_CPU_SYMBOL(xt_write_lock);
++#endif
++
+ static DEFINE_MUTEX(afinfo_mutex);
+ 
+ const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
diff --git a/patches/net-flip-lock-dep-thingy.patch b/patches/net-flip-lock-dep-thingy.patch
new file mode 100644
index 0000000..f4fed61
--- /dev/null
+++ b/patches/net-flip-lock-dep-thingy.patch
@@ -0,0 +1,111 @@
+Subject: net-flip-lock-dep-thingy.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 10:59:58 +0200
+
+=======================================================
+[ INFO: possible circular locking dependency detected ]
+3.0.0-rc3+ #26
+-------------------------------------------------------
+ip/1104 is trying to acquire lock:
+ (local_softirq_lock){+.+...}, at: [<ffffffff81056d12>] __local_lock+0x25/0x68
+
+but task is already holding lock:
+ (sk_lock-AF_INET){+.+...}, at: [<ffffffff81433308>] lock_sock+0x10/0x12
+
+which lock already depends on the new lock.
+
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (sk_lock-AF_INET){+.+...}:
+       [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+       [<ffffffff813e2781>] lock_sock_nested+0x82/0x92
+       [<ffffffff81433308>] lock_sock+0x10/0x12
+       [<ffffffff81433afa>] tcp_close+0x1b/0x355
+       [<ffffffff81453c99>] inet_release+0xc3/0xcd
+       [<ffffffff813dff3f>] sock_release+0x1f/0x74
+       [<ffffffff813dffbb>] sock_close+0x27/0x2b
+       [<ffffffff81129c63>] fput+0x11d/0x1e3
+       [<ffffffff81126577>] filp_close+0x70/0x7b
+       [<ffffffff8112667a>] sys_close+0xf8/0x13d
+       [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+-> #0 (local_softirq_lock){+.+...}:
+       [<ffffffff81082ecc>] __lock_acquire+0xacc/0xdc8
+       [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+       [<ffffffff814a7e40>] _raw_spin_lock+0x3b/0x4a
+       [<ffffffff81056d12>] __local_lock+0x25/0x68
+       [<ffffffff81056d8b>] local_bh_disable+0x36/0x3b
+       [<ffffffff814a7fc4>] _raw_write_lock_bh+0x16/0x4f
+       [<ffffffff81433c38>] tcp_close+0x159/0x355
+       [<ffffffff81453c99>] inet_release+0xc3/0xcd
+       [<ffffffff813dff3f>] sock_release+0x1f/0x74
+       [<ffffffff813dffbb>] sock_close+0x27/0x2b
+       [<ffffffff81129c63>] fput+0x11d/0x1e3
+       [<ffffffff81126577>] filp_close+0x70/0x7b
+       [<ffffffff8112667a>] sys_close+0xf8/0x13d
+       [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(sk_lock-AF_INET);
+                               lock(local_softirq_lock);
+                               lock(sk_lock-AF_INET);
+  lock(local_softirq_lock);
+
+ *** DEADLOCK ***
+
+1 lock held by ip/1104:
+ #0:  (sk_lock-AF_INET){+.+...}, at: [<ffffffff81433308>] lock_sock+0x10/0x12
+
+stack backtrace:
+Pid: 1104, comm: ip Not tainted 3.0.0-rc3+ #26
+Call Trace:
+ [<ffffffff81081649>] print_circular_bug+0x1f8/0x209
+ [<ffffffff81082ecc>] __lock_acquire+0xacc/0xdc8
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff81046c75>] ? get_parent_ip+0x11/0x41
+ [<ffffffff814a7e40>] _raw_spin_lock+0x3b/0x4a
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff81046c8c>] ? get_parent_ip+0x28/0x41
+ [<ffffffff81056d12>] __local_lock+0x25/0x68
+ [<ffffffff81056d8b>] local_bh_disable+0x36/0x3b
+ [<ffffffff81433308>] ? lock_sock+0x10/0x12
+ [<ffffffff814a7fc4>] _raw_write_lock_bh+0x16/0x4f
+ [<ffffffff81433c38>] tcp_close+0x159/0x355
+ [<ffffffff81453c99>] inet_release+0xc3/0xcd
+ [<ffffffff813dff3f>] sock_release+0x1f/0x74
+ [<ffffffff813dffbb>] sock_close+0x27/0x2b
+ [<ffffffff81129c63>] fput+0x11d/0x1e3
+ [<ffffffff81126577>] filp_close+0x70/0x7b
+ [<ffffffff8112667a>] sys_close+0xf8/0x13d
+ [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/sock.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2287,12 +2287,11 @@ void lock_sock_nested(struct sock *sk, i
+ 	if (sk->sk_lock.owned)
+ 		__lock_sock(sk);
+ 	sk->sk_lock.owned = 1;
+-	spin_unlock(&sk->sk_lock.slock);
++	spin_unlock_bh(&sk->sk_lock.slock);
+ 	/*
+ 	 * The sk_lock has mutex_lock() semantics here:
+ 	 */
+ 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+-	local_bh_enable();
+ }
+ EXPORT_SYMBOL(lock_sock_nested);
+ 
diff --git a/patches/net-make-devnet_rename_seq-a-mutex.patch b/patches/net-make-devnet_rename_seq-a-mutex.patch
new file mode 100644
index 0000000..f4f675a
--- /dev/null
+++ b/patches/net-make-devnet_rename_seq-a-mutex.patch
@@ -0,0 +1,150 @@
+From b83c29516a334722b4c38dbc2bc3caf58ce46b88 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 20 Mar 2013 18:06:20 +0100
+Subject: [PATCH] net: make devnet_rename_seq a mutex
+
+On RT write_seqcount_begin() disables preemption and device_rename()
+allocates memory with GFP_KERNEL and grabs later the sysfs_mutex mutex.
+Since I don't see a reason why this can't be a mutex, make it one. We
+probably don't have that much reads at the same time in the hot path.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/netdevice.h |    2 +-
+ net/core/dev.c            |   23 +++++++++++------------
+ net/core/sock.c           |    8 +++-----
+ 3 files changed, 15 insertions(+), 18 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1579,7 +1579,7 @@ extern int call_netdevice_notifiers(unsi
+ 
+ extern rwlock_t				dev_base_lock;		/* Device list lock */
+ 
+-extern seqcount_t	devnet_rename_seq;	/* Device rename seq */
++extern struct mutex devnet_rename_mutex;
+ 
+ 
+ #define for_each_netdev(net, d)		\
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -203,7 +203,7 @@ static struct list_head offload_base __r
+ DEFINE_RWLOCK(dev_base_lock);
+ EXPORT_SYMBOL(dev_base_lock);
+ 
+-seqcount_t devnet_rename_seq;
++DEFINE_MUTEX(devnet_rename_mutex);
+ 
+ static inline void dev_base_seq_inc(struct net *net)
+ {
+@@ -1093,10 +1093,11 @@ int dev_change_name(struct net_device *d
+ 	if (dev->flags & IFF_UP)
+ 		return -EBUSY;
+ 
+-	write_seqcount_begin(&devnet_rename_seq);
++
++	mutex_lock(&devnet_rename_mutex);
+ 
+ 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+-		write_seqcount_end(&devnet_rename_seq);
++		mutex_unlock(&devnet_rename_mutex);
+ 		return 0;
+ 	}
+ 
+@@ -1104,7 +1105,7 @@ int dev_change_name(struct net_device *d
+ 
+ 	err = dev_get_valid_name(net, dev, newname);
+ 	if (err < 0) {
+-		write_seqcount_end(&devnet_rename_seq);
++		mutex_unlock(&devnet_rename_mutex);
+ 		return err;
+ 	}
+ 
+@@ -1112,11 +1113,11 @@ rollback:
+ 	ret = device_rename(&dev->dev, dev->name);
+ 	if (ret) {
+ 		memcpy(dev->name, oldname, IFNAMSIZ);
+-		write_seqcount_end(&devnet_rename_seq);
++		mutex_unlock(&devnet_rename_mutex);
+ 		return ret;
+ 	}
+ 
+-	write_seqcount_end(&devnet_rename_seq);
++	mutex_unlock(&devnet_rename_mutex);
+ 
+ 	write_lock_bh(&dev_base_lock);
+ 	hlist_del_rcu(&dev->name_hlist);
+@@ -1135,7 +1136,7 @@ rollback:
+ 		/* err >= 0 after dev_alloc_name() or stores the first errno */
+ 		if (err >= 0) {
+ 			err = ret;
+-			write_seqcount_begin(&devnet_rename_seq);
++			mutex_lock(&devnet_rename_mutex);
+ 			memcpy(dev->name, oldname, IFNAMSIZ);
+ 			goto rollback;
+ 		} else {
+@@ -4214,7 +4215,6 @@ static int dev_ifname(struct net *net, s
+ {
+ 	struct net_device *dev;
+ 	struct ifreq ifr;
+-	unsigned seq;
+ 
+ 	/*
+ 	 *	Fetch the caller's info block.
+@@ -4223,19 +4223,18 @@ static int dev_ifname(struct net *net, s
+ 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ 		return -EFAULT;
+ 
+-retry:
+-	seq = read_seqcount_begin(&devnet_rename_seq);
++	mutex_lock(&devnet_rename_mutex);
+ 	rcu_read_lock();
+ 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+ 	if (!dev) {
+ 		rcu_read_unlock();
++		mutex_unlock(&devnet_rename_mutex);
+ 		return -ENODEV;
+ 	}
+ 
+ 	strcpy(ifr.ifr_name, dev->name);
+ 	rcu_read_unlock();
+-	if (read_seqcount_retry(&devnet_rename_seq, seq))
+-		goto retry;
++	mutex_unlock(&devnet_rename_mutex);
+ 
+ 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ 		return -EFAULT;
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -571,7 +571,6 @@ static int sock_getbindtodevice(struct s
+ 	struct net *net = sock_net(sk);
+ 	struct net_device *dev;
+ 	char devname[IFNAMSIZ];
+-	unsigned seq;
+ 
+ 	if (sk->sk_bound_dev_if == 0) {
+ 		len = 0;
+@@ -582,20 +581,19 @@ static int sock_getbindtodevice(struct s
+ 	if (len < IFNAMSIZ)
+ 		goto out;
+ 
+-retry:
+-	seq = read_seqcount_begin(&devnet_rename_seq);
++	mutex_lock(&devnet_rename_mutex);
+ 	rcu_read_lock();
+ 	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ 	ret = -ENODEV;
+ 	if (!dev) {
+ 		rcu_read_unlock();
++		mutex_unlock(&devnet_rename_mutex);
+ 		goto out;
+ 	}
+ 
+ 	strcpy(devname, dev->name);
+ 	rcu_read_unlock();
+-	if (read_seqcount_retry(&devnet_rename_seq, seq))
+-		goto retry;
++	mutex_unlock(&devnet_rename_mutex);
+ 
+ 	len = strlen(devname) + 1;
+ 
diff --git a/patches/net-netif-rx-ni-use-local-bh-disable.patch b/patches/net-netif-rx-ni-use-local-bh-disable.patch
new file mode 100644
index 0000000..5d96fc4
--- /dev/null
+++ b/patches/net-netif-rx-ni-use-local-bh-disable.patch
@@ -0,0 +1,31 @@
+Subject: net: Use local_bh_disable in netif_rx_ni()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 15:12:49 +0000
+
+This code triggers the new WARN in __raise_softirq_irqsoff() though it
+actually looks at the softirq pending bit and calls into the softirq
+code, but that fits not well with the context related softirq model of
+RT. It's correct on mainline though, but going through
+local_bh_disable/enable here is not going to hurt badly.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/dev.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3118,11 +3118,9 @@ int netif_rx_ni(struct sk_buff *skb)
+ {
+ 	int err;
+ 
+-	migrate_disable();
++	local_bh_disable();
+ 	err = netif_rx(skb);
+-	if (local_softirq_pending())
+-		thread_do_softirq();
+-	migrate_enable();
++	local_bh_enable();
+ 
+ 	return err;
+ }
diff --git a/patches/net-netif_rx_ni-migrate-disable.patch b/patches/net-netif_rx_ni-migrate-disable.patch
new file mode 100644
index 0000000..b5b5e75
--- /dev/null
+++ b/patches/net-netif_rx_ni-migrate-disable.patch
@@ -0,0 +1,25 @@
+Subject: net-netif_rx_ni-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 16:29:27 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/dev.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3115,11 +3115,11 @@ int netif_rx_ni(struct sk_buff *skb)
+ {
+ 	int err;
+ 
+-	preempt_disable();
++	migrate_disable();
+ 	err = netif_rx(skb);
+ 	if (local_softirq_pending())
+ 		thread_do_softirq();
+-	preempt_enable();
++	migrate_enable();
+ 
+ 	return err;
+ }
diff --git a/patches/net-tx-action-avoid-livelock-on-rt.patch b/patches/net-tx-action-avoid-livelock-on-rt.patch
new file mode 100644
index 0000000..92a988a
--- /dev/null
+++ b/patches/net-tx-action-avoid-livelock-on-rt.patch
@@ -0,0 +1,92 @@
+Subject: net: Avoid livelock in net_tx_action() on RT
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Thu, 06 Oct 2011 10:48:39 -0400
+
+qdisc_lock is taken w/o disabling interrupts or bottom halfs. So code
+holding a qdisc_lock() can be interrupted and softirqs can run on the
+return of interrupt in !RT.
+
+The spin_trylock() in net_tx_action() makes sure, that the softirq
+does not deadlock. When the lock can't be acquired q is requeued and
+the NET_TX softirq is raised. That causes the softirq to run over and
+over.
+
+That works in mainline as do_softirq() has a retry loop limit and
+leaves the softirq processing in the interrupt return path and
+schedules ksoftirqd. The task which holds qdisc_lock cannot be
+preempted, so the lock is released and either ksoftirqd or the next
+softirq in the return from interrupt path can proceed. Though it's a
+bit strange to actually run MAX_SOFTIRQ_RESTART (10) loops before it
+decides to bail out even if it's clear in the first iteration :)
+
+On RT all softirq processing is done in a FIFO thread and we don't
+have a loop limit, so ksoftirqd preempts the lock holder forever and
+unqueues and requeues until the reset button is hit.
+
+Due to the forced threading of ksoftirqd on RT we actually cannot
+deadlock on qdisc_lock because it's a "sleeping lock". So it's safe to
+replace the spin_trylock() with a spin_lock(). When contended,
+ksoftirqd is scheduled out and the lock holder can proceed.
+
+[ tglx: Massaged changelog and code comments ]
+
+Solved-by: Thomas Gleixner <tglx@linuxtronix.de>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Cc: Clark Williams <williams@redhat.com>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Luis Claudio R. Goncalves <lclaudio@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ net/core/dev.c |   32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3125,6 +3125,36 @@ int netif_rx_ni(struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(netif_rx_ni);
+ 
++#ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * RT runs ksoftirqd as a real time thread and the root_lock is a
++ * "sleeping spinlock". If the trylock fails then we can go into an
++ * infinite loop when ksoftirqd preempted the task which actually
++ * holds the lock, because we requeue q and raise NET_TX softirq
++ * causing ksoftirqd to loop forever.
++ *
++ * It's safe to use spin_lock on RT here as softirqs run in thread
++ * context and cannot deadlock against the thread which is holding
++ * root_lock.
++ *
++ * On !RT the trylock might fail, but there we bail out from the
++ * softirq loop after 10 attempts which we can't do on RT. And the
++ * task holding root_lock cannot be preempted, so the only downside of
++ * that trylock is that we need 10 loops to decide that we should have
++ * given up in the first one :)
++ */
++static inline int take_root_lock(spinlock_t *lock)
++{
++	spin_lock(lock);
++	return 1;
++}
++#else
++static inline int take_root_lock(spinlock_t *lock)
++{
++	return spin_trylock(lock);
++}
++#endif
++
+ static void net_tx_action(struct softirq_action *h)
+ {
+ 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+@@ -3163,7 +3193,7 @@ static void net_tx_action(struct softirq
+ 			head = head->next_sched;
+ 
+ 			root_lock = qdisc_lock(q);
+-			if (spin_trylock(root_lock)) {
++			if (take_root_lock(root_lock)) {
+ 				smp_mb__before_clear_bit();
+ 				clear_bit(__QDISC_STATE_SCHED,
+ 					  &q->state);
diff --git a/patches/net-use-cpu-chill.patch b/patches/net-use-cpu-chill.patch
new file mode 100644
index 0000000..d0451fb
--- /dev/null
+++ b/patches/net-use-cpu-chill.patch
@@ -0,0 +1,62 @@
+Subject: net: Use cpu_chill() instead of cpu_relax()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 07 Mar 2012 21:10:04 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ net/packet/af_packet.c |    5 +++--
+ net/rds/ib_rdma.c      |    3 ++-
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -88,6 +88,7 @@
+ #include <linux/virtio_net.h>
+ #include <linux/errqueue.h>
+ #include <linux/net_tstamp.h>
++#include <linux/delay.h>
+ 
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+@@ -553,7 +554,7 @@ static void prb_retire_rx_blk_timer_expi
+ 	if (BLOCK_NUM_PKTS(pbd)) {
+ 		while (atomic_read(&pkc->blk_fill_in_prog)) {
+ 			/* Waiting for skb_copy_bits to finish... */
+-			cpu_relax();
++			cpu_chill();
+ 		}
+ 	}
+ 
+@@ -807,7 +808,7 @@ static void prb_retire_current_block(str
+ 		if (!(status & TP_STATUS_BLK_TMO)) {
+ 			while (atomic_read(&pkc->blk_fill_in_prog)) {
+ 				/* Waiting for skb_copy_bits to finish... */
+-				cpu_relax();
++				cpu_chill();
+ 			}
+ 		}
+ 		prb_close_block(pkc, pbd, po, status);
+--- a/net/rds/ib_rdma.c
++++ b/net/rds/ib_rdma.c
+@@ -34,6 +34,7 @@
+ #include <linux/slab.h>
+ #include <linux/rculist.h>
+ #include <linux/llist.h>
++#include <linux/delay.h>
+ 
+ #include "rds.h"
+ #include "ib.h"
+@@ -286,7 +287,7 @@ static inline void wait_clean_list_grace
+ 	for_each_online_cpu(cpu) {
+ 		flag = &per_cpu(clean_list_grace, cpu);
+ 		while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
+-			cpu_relax();
++			cpu_chill();
+ 	}
+ }
+ 
diff --git a/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch b/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch
new file mode 100644
index 0000000..c934904
--- /dev/null
+++ b/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch
@@ -0,0 +1,30 @@
+Subject: net: Use get_cpu_light() in ip_send_unicast_reply()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 01 Oct 2012 17:12:35 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/ipv4/ip_output.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1508,7 +1508,8 @@ void ip_send_unicast_reply(struct net *n
+ 	if (IS_ERR(rt))
+ 		return;
+ 
+-	inet = &get_cpu_var(unicast_sock);
++	get_cpu_light();
++	inet = &__get_cpu_var(unicast_sock);
+ 
+ 	inet->tos = arg->tos;
+ 	sk = &inet->sk;
+@@ -1532,7 +1533,7 @@ void ip_send_unicast_reply(struct net *n
+ 		ip_push_pending_frames(sk, &fl4);
+ 	}
+ 
+-	put_cpu_var(unicast_sock);
++	put_cpu_light();
+ 
+ 	ip_rt_put(rt);
+ }
diff --git a/patches/net-wireless-warn-nort.patch b/patches/net-wireless-warn-nort.patch
new file mode 100644
index 0000000..005ecd7
--- /dev/null
+++ b/patches/net-wireless-warn-nort.patch
@@ -0,0 +1,20 @@
+Subject: net-wireless-warn-nort.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 21:05:33 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/mac80211/rx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -3144,7 +3144,7 @@ void ieee80211_rx(struct ieee80211_hw *h
+ 	struct ieee80211_supported_band *sband;
+ 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ 
+-	WARN_ON_ONCE(softirq_count() == 0);
++	WARN_ON_ONCE_NONRT(softirq_count() == 0);
+ 
+ 	if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
+ 		goto drop;
diff --git a/patches/ntp-make-ntp-lock-raw-sigh.patch b/patches/ntp-make-ntp-lock-raw-sigh.patch
new file mode 100644
index 0000000..a3ab0e7
--- /dev/null
+++ b/patches/ntp-make-ntp-lock-raw-sigh.patch
@@ -0,0 +1,125 @@
+Subject: ntp: Make ntp_lock raw.
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 10 Apr 2012 11:14:55 +0200
+
+This needs to be revisited. Not sure whether we can avoid to make this
+lock raw, but it'd really like to.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/ntp.c |   26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -22,7 +22,7 @@
+  * NTP timekeeping variables:
+  */
+ 
+-DEFINE_SPINLOCK(ntp_lock);
++DEFINE_RAW_SPINLOCK(ntp_lock);
+ 
+ 
+ /* USER_HZ period (usecs): */
+@@ -347,7 +347,7 @@ void ntp_clear(void)
+ {
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&ntp_lock, flags);
++	raw_spin_lock_irqsave(&ntp_lock, flags);
+ 
+ 	time_adjust	= 0;		/* stop active adjtime() */
+ 	time_status	|= STA_UNSYNC;
+@@ -361,7 +361,7 @@ void ntp_clear(void)
+ 
+ 	/* Clear PPS state variables */
+ 	pps_clear();
+-	spin_unlock_irqrestore(&ntp_lock, flags);
++	raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ 
+ }
+ 
+@@ -371,9 +371,9 @@ u64 ntp_tick_length(void)
+ 	unsigned long flags;
+ 	s64 ret;
+ 
+-	spin_lock_irqsave(&ntp_lock, flags);
++	raw_spin_lock_irqsave(&ntp_lock, flags);
+ 	ret = tick_length;
+-	spin_unlock_irqrestore(&ntp_lock, flags);
++	raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ 	return ret;
+ }
+ 
+@@ -394,7 +394,7 @@ int second_overflow(unsigned long secs)
+ 	int leap = 0;
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&ntp_lock, flags);
++	raw_spin_lock_irqsave(&ntp_lock, flags);
+ 
+ 	/*
+ 	 * Leap second processing. If in leap-insert state at the end of the
+@@ -478,7 +478,7 @@ int second_overflow(unsigned long secs)
+ 	time_adjust = 0;
+ 
+ out:
+-	spin_unlock_irqrestore(&ntp_lock, flags);
++	raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ 
+ 	return leap;
+ }
+@@ -660,7 +660,7 @@ int do_adjtimex(struct timex *txc)
+ 
+ 	getnstimeofday(&ts);
+ 
+-	spin_lock_irq(&ntp_lock);
++	raw_spin_lock_irq(&ntp_lock);
+ 
+ 	if (txc->modes & ADJ_ADJTIME) {
+ 		long save_adjust = time_adjust;
+@@ -702,7 +702,7 @@ int do_adjtimex(struct timex *txc)
+ 	/* fill PPS status fields */
+ 	pps_fill_timex(txc);
+ 
+-	spin_unlock_irq(&ntp_lock);
++	raw_spin_unlock_irq(&ntp_lock);
+ 
+ 	txc->time.tv_sec = ts.tv_sec;
+ 	txc->time.tv_usec = ts.tv_nsec;
+@@ -900,7 +900,7 @@ void hardpps(const struct timespec *phas
+ 
+ 	pts_norm = pps_normalize_ts(*phase_ts);
+ 
+-	spin_lock_irqsave(&ntp_lock, flags);
++	raw_spin_lock_irqsave(&ntp_lock, flags);
+ 
+ 	/* clear the error bits, they will be set again if needed */
+ 	time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
+@@ -913,7 +913,7 @@ void hardpps(const struct timespec *phas
+ 	 * just start the frequency interval */
+ 	if (unlikely(pps_fbase.tv_sec == 0)) {
+ 		pps_fbase = *raw_ts;
+-		spin_unlock_irqrestore(&ntp_lock, flags);
++		raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ 		return;
+ 	}
+ 
+@@ -928,7 +928,7 @@ void hardpps(const struct timespec *phas
+ 		time_status |= STA_PPSJITTER;
+ 		/* restart the frequency calibration interval */
+ 		pps_fbase = *raw_ts;
+-		spin_unlock_irqrestore(&ntp_lock, flags);
++		raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ 		pr_err("hardpps: PPSJITTER: bad pulse\n");
+ 		return;
+ 	}
+@@ -945,7 +945,7 @@ void hardpps(const struct timespec *phas
+ 
+ 	hardpps_update_phase(pts_norm.nsec);
+ 
+-	spin_unlock_irqrestore(&ntp_lock, flags);
++	raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ }
+ EXPORT_SYMBOL(hardpps);
+ 
diff --git a/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch b/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch
new file mode 100644
index 0000000..38caa00
--- /dev/null
+++ b/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch
@@ -0,0 +1,449 @@
+Subject: OF: Convert devtree lock from rw_lock to raw spinlock
+From: Paul Gortmaker <paul.gortmaker@windriver.com>
+Date: Wed, 6 Feb 2013 15:30:56 -0500
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+With the locking cleanup in place (from "OF: Fixup resursive
+locking code paths"), we can now do the conversion from the
+rw_lock to a raw spinlock as required for preempt-rt.
+
+The previous cleanup and this conversion were originally
+separate since they predated when mainline got raw spinlock (in
+commit c2f21ce2e31286a "locking: Implement new raw_spinlock").
+
+So, at that point in time, the cleanup was considered plausible
+for mainline, but not this conversion.  In any case, we've kept
+them separate as it makes for easier review and better bisection.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Grant Likely <grant.likely@secretlab.ca>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: <devicetree-discuss@lists.ozlabs.org>
+Cc: Rob Herring <rob.herring@calxeda.com>
+Link: http://lkml.kernel.org/r/1360182656-15898-1-git-send-email-paul.gortmaker@windriver.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[PG: taken from preempt-rt, update subject & add a commit log]
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+---
+
+[v2: recent commit e81b329 ("powerpc+of: Add /proc device tree
+ updating to of node add/remove") added two more instances of
+ write_unlock that also needed converting to raw_spin_unlock.
+ Retested (boot) on sbc8548, defconfig builds on arm/sparc; no
+ new warnings observed.]
+
+ arch/sparc/kernel/prom_common.c |    4 -
+ drivers/of/base.c               |  100 ++++++++++++++++++++++------------------
+ include/linux/of.h              |    2 
+ 3 files changed, 59 insertions(+), 47 deletions(-)
+
+--- a/arch/sparc/kernel/prom_common.c
++++ b/arch/sparc/kernel/prom_common.c
+@@ -64,7 +64,7 @@ int of_set_property(struct device_node *
+ 	err = -ENODEV;
+ 
+ 	mutex_lock(&of_set_property_mutex);
+-	write_lock(&devtree_lock);
++	raw_spin_lock(&devtree_lock);
+ 	prevp = &dp->properties;
+ 	while (*prevp) {
+ 		struct property *prop = *prevp;
+@@ -91,7 +91,7 @@ int of_set_property(struct device_node *
+ 		}
+ 		prevp = &(*prevp)->next;
+ 	}
+-	write_unlock(&devtree_lock);
++	raw_spin_unlock(&devtree_lock);
+ 	mutex_unlock(&of_set_property_mutex);
+ 
+ 	/* XXX Upate procfs if necessary... */
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -55,7 +55,7 @@ static DEFINE_MUTEX(of_aliases_mutex);
+ /* use when traversing tree through the allnext, child, sibling,
+  * or parent members of struct device_node.
+  */
+-DEFINE_RWLOCK(devtree_lock);
++DEFINE_RAW_SPINLOCK(devtree_lock);
+ 
+ int of_n_addr_cells(struct device_node *np)
+ {
+@@ -188,10 +188,11 @@ struct property *of_find_property(const
+ 				  int *lenp)
+ {
+ 	struct property *pp;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	pp = __of_find_property(np, name, lenp);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 
+ 	return pp;
+ }
+@@ -209,13 +210,13 @@ struct device_node *of_find_all_nodes(st
+ {
+ 	struct device_node *np;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock(&devtree_lock);
+ 	np = prev ? prev->allnext : of_allnodes;
+ 	for (; np != NULL; np = np->allnext)
+ 		if (of_node_get(np))
+ 			break;
+ 	of_node_put(prev);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock(&devtree_lock);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_all_nodes);
+@@ -274,11 +275,12 @@ static int __of_device_is_compatible(con
+ int of_device_is_compatible(const struct device_node *device,
+ 		const char *compat)
+ {
++	unsigned long flags;
+ 	int res;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	res = __of_device_is_compatible(device, compat);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return res;
+ }
+ EXPORT_SYMBOL(of_device_is_compatible);
+@@ -340,13 +342,14 @@ EXPORT_SYMBOL(of_device_is_available);
+ struct device_node *of_get_parent(const struct device_node *node)
+ {
+ 	struct device_node *np;
++	unsigned long flags;
+ 
+ 	if (!node)
+ 		return NULL;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	np = of_node_get(node->parent);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_get_parent);
+@@ -365,14 +368,15 @@ EXPORT_SYMBOL(of_get_parent);
+ struct device_node *of_get_next_parent(struct device_node *node)
+ {
+ 	struct device_node *parent;
++	unsigned long flags;
+ 
+ 	if (!node)
+ 		return NULL;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	parent = of_node_get(node->parent);
+ 	of_node_put(node);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return parent;
+ }
+ 
+@@ -388,14 +392,15 @@ struct device_node *of_get_next_child(co
+ 	struct device_node *prev)
+ {
+ 	struct device_node *next;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	next = prev ? prev->sibling : node->child;
+ 	for (; next; next = next->sibling)
+ 		if (of_node_get(next))
+ 			break;
+ 	of_node_put(prev);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return next;
+ }
+ EXPORT_SYMBOL(of_get_next_child);
+@@ -413,7 +418,7 @@ struct device_node *of_get_next_availabl
+ {
+ 	struct device_node *next;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock(&devtree_lock);
+ 	next = prev ? prev->sibling : node->child;
+ 	for (; next; next = next->sibling) {
+ 		if (!of_device_is_available(next))
+@@ -422,7 +427,7 @@ struct device_node *of_get_next_availabl
+ 			break;
+ 	}
+ 	of_node_put(prev);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock(&devtree_lock);
+ 	return next;
+ }
+ EXPORT_SYMBOL(of_get_next_available_child);
+@@ -460,14 +465,15 @@ EXPORT_SYMBOL(of_get_child_by_name);
+ struct device_node *of_find_node_by_path(const char *path)
+ {
+ 	struct device_node *np = of_allnodes;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	for (; np; np = np->allnext) {
+ 		if (np->full_name && (of_node_cmp(np->full_name, path) == 0)
+ 		    && of_node_get(np))
+ 			break;
+ 	}
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_path);
+@@ -487,15 +493,16 @@ struct device_node *of_find_node_by_name
+ 	const char *name)
+ {
+ 	struct device_node *np;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	np = from ? from->allnext : of_allnodes;
+ 	for (; np; np = np->allnext)
+ 		if (np->name && (of_node_cmp(np->name, name) == 0)
+ 		    && of_node_get(np))
+ 			break;
+ 	of_node_put(from);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_name);
+@@ -516,15 +523,16 @@ struct device_node *of_find_node_by_type
+ 	const char *type)
+ {
+ 	struct device_node *np;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	np = from ? from->allnext : of_allnodes;
+ 	for (; np; np = np->allnext)
+ 		if (np->type && (of_node_cmp(np->type, type) == 0)
+ 		    && of_node_get(np))
+ 			break;
+ 	of_node_put(from);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_type);
+@@ -547,8 +555,9 @@ struct device_node *of_find_compatible_n
+ 	const char *type, const char *compatible)
+ {
+ 	struct device_node *np;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	np = from ? from->allnext : of_allnodes;
+ 	for (; np; np = np->allnext) {
+ 		if (type
+@@ -559,7 +568,7 @@ struct device_node *of_find_compatible_n
+ 			break;
+ 	}
+ 	of_node_put(from);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_compatible_node);
+@@ -581,8 +590,9 @@ struct device_node *of_find_node_with_pr
+ {
+ 	struct device_node *np;
+ 	struct property *pp;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	np = from ? from->allnext : of_allnodes;
+ 	for (; np; np = np->allnext) {
+ 		for (pp = np->properties; pp; pp = pp->next) {
+@@ -594,7 +604,7 @@ struct device_node *of_find_node_with_pr
+ 	}
+ out:
+ 	of_node_put(from);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return np;
+ }
+ EXPORT_SYMBOL(of_find_node_with_property);
+@@ -635,10 +645,11 @@ const struct of_device_id *of_match_node
+ 					 const struct device_node *node)
+ {
+ 	const struct of_device_id *match;
++	unsigned long flags;
+ 
+-	read_lock(&devtree_lock);
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 	match = __of_match_node(matches, node);
+-	read_unlock(&devtree_lock);
++	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ 	return match;
+ }
+ EXPORT_SYMBOL(of_match_node);
+@@ -661,11 +672,12 @@ struct device_node *of_find_matching_nod