Merge tag 'armsoc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

Pull ARM SoC fixes from Kevin Hilman:
 "A fairly random colletion of fixes based on -rc1 for OMAP, sunxi and
  prima2 as well as a few arm64-specific DT fixes.

  This series also includes a late to support a new Allwinner (sunxi)
  SoC, but since it's rather simple and isolated to the
  platform-specific code, it's included it for this -rc"

* tag 'armsoc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc:
  arm64: dts: add device tree for ARM SMM-A53x2 on LogicTile Express 20MG
  arm: dts: vexpress: add missing CCI PMU device node to TC2
  arm: dts: vexpress: describe all PMUs in TC2 dts
  GICv3: Add ITS entry to THUNDER dts
  arm64: dts: Add poweroff button device node for APM X-Gene platform
  ARM: dts: am4372.dtsi: disable rfbi
  ARM: dts: am57xx-beagle-x15: Provide supply for usb2_phy2
  ARM: dts: am4372: Add emif node
  Revert "ARM: dts: am335x-boneblack: disable RTC-only sleep"
  ARM: sunxi: Enable simplefb in the defconfig
  ARM: Remove deprecated symbol from defconfig files
  ARM: sunxi: Add Machine support for A33
  ARM: sunxi: Introduce Allwinner H3 support
  Documentation: sunxi: Update Allwinner SoC documentation
  ARM: prima2: move to use REGMAP APIs for rtciobrg
  ARM: dts: atlas7: add pinctrl and gpio descriptions
  ARM: OMAP2+: Remove unnessary return statement from the void function, omap2_show_dma_caps
  memory: omap-gpmc: Fix parsing of devices
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index f732a83..8cc17ca 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -410,8 +410,17 @@
 
 Q: Can I suspend-to-disk using a swap partition under LVM?
 
-A: No. You can suspend successfully, but you'll not be able to
-resume. uswsusp should be able to work with LVM. See suspend.sf.net.
+A: Yes and No.  You can suspend successfully, but the kernel will not be able
+to resume on its own.  You need an initramfs that can recognize the resume
+situation, activate the logical volume containing the swap volume (but not
+touch any filesystems!), and eventually call
+
+echo -n "$major:$minor" > /sys/power/resume
+
+where $major and $minor are the respective major and minor device numbers of
+the swap volume.
+
+uswsusp works with LVM, too.  See http://suspend.sourceforge.net/
 
 Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
 compiled with the similar configuration files. Anyway I found that
diff --git a/MAINTAINERS b/MAINTAINERS
index 3729a4a..b3ea3b6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2563,19 +2563,31 @@
 F:	arch/powerpc/oprofile/*cell*
 F:	arch/powerpc/platforms/cell/
 
-CEPH DISTRIBUTED FILE SYSTEM CLIENT
+CEPH COMMON CODE (LIBCEPH)
+M:	Ilya Dryomov <idryomov@gmail.com>
 M:	"Yan, Zheng" <zyan@redhat.com>
 M:	Sage Weil <sage@redhat.com>
 L:	ceph-devel@vger.kernel.org
 W:	http://ceph.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
 S:	Supported
-F:	Documentation/filesystems/ceph.txt
-F:	fs/ceph/
 F:	net/ceph/
 F:	include/linux/ceph/
 F:	include/linux/crush/
 
+CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
+M:	"Yan, Zheng" <zyan@redhat.com>
+M:	Sage Weil <sage@redhat.com>
+M:	Ilya Dryomov <idryomov@gmail.com>
+L:	ceph-devel@vger.kernel.org
+W:	http://ceph.com/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
+S:	Supported
+F:	Documentation/filesystems/ceph.txt
+F:	fs/ceph/
+
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 L:	linux-usb@vger.kernel.org
 S:	Orphan
@@ -6162,7 +6174,7 @@
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@lists.ozlabs.org
 Q:	http://patchwork.ozlabs.org/project/linuxppc-dev/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
 S:	Supported
 F:	Documentation/powerpc/
 F:	arch/powerpc/
@@ -8367,10 +8379,12 @@
 M:	Ilya Dryomov <idryomov@gmail.com>
 M:	Sage Weil <sage@redhat.com>
 M:	Alex Elder <elder@kernel.org>
-M:	ceph-devel@vger.kernel.org
+L:	ceph-devel@vger.kernel.org
 W:	http://ceph.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
 S:	Supported
+F:	Documentation/ABI/testing/sysfs-bus-rbd
 F:	drivers/block/rbd.c
 F:	drivers/block/rbd_types.h
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c14..1c50210 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1693,6 +1693,12 @@
 config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
+	help
+	  The VM uses one page of physical memory for each page table.
+	  For systems with a lot of processes, this can use a lot of
+	  precious low memory, eventually leading to low memory being
+	  consumed by page tables.  Setting this option will allow
+	  user-space 2nd level page tables to reside in high memory.
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index f1b1579..a2e16f9 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1635,7 +1635,7 @@
 
 config DEBUG_SET_MODULE_RONX
 	bool "Set loadable kernel module data as NX and text as RO"
-	depends on MODULES
+	depends on MODULES && MMU
 	---help---
 	  This option helps catch unintended modifications to loadable
 	  kernel module's text and read-only data. It also prevents execution
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1c3938f..4859820 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -140,16 +140,11 @@
  * The _caller variety takes a __builtin_return_address(0) value for
  * /proc/vmalloc to use - and should only be used in non-inline functions.
  */
-extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
-	size_t, unsigned int, void *);
 extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 	void *);
-
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
 extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
-extern void __arm_iounmap(volatile void __iomem *addr);
 
 extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 	unsigned int, void *);
@@ -321,21 +316,24 @@
 static inline void memset_io(volatile void __iomem *dst, unsigned c,
 	size_t count)
 {
-	memset((void __force *)dst, c, count);
+	extern void mmioset(void *, unsigned int, size_t);
+	mmioset((void __force *)dst, c, count);
 }
 #define memset_io(dst,c,count) memset_io(dst,c,count)
 
 static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
 	size_t count)
 {
-	memcpy(to, (const void __force *)from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy(to, (const void __force *)from, count);
 }
 #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
 
 static inline void memcpy_toio(volatile void __iomem *to, const void *from,
 	size_t count)
 {
-	memcpy((void __force *)to, from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy((void __force *)to, from, count);
 }
 #define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
 
@@ -348,18 +346,61 @@
 #endif	/* readl */
 
 /*
- * ioremap and friends.
+ * ioremap() and friends.
  *
- * ioremap takes a PCI memory address, as specified in
- * Documentation/io-mapping.txt.
+ * ioremap() takes a resource address, and size.  Due to the ARM memory
+ * types, it is important to use the correct ioremap() function as each
+ * mapping has specific properties.
  *
+ * Function		Memory type	Cacheability	Cache hint
+ * ioremap()		Device		n/a		n/a
+ * ioremap_nocache()	Device		n/a		n/a
+ * ioremap_cache()	Normal		Writeback	Read allocate
+ * ioremap_wc()		Normal		Non-cacheable	n/a
+ * ioremap_wt()		Normal		Non-cacheable	n/a
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
+ * - writes may be delayed before they hit the endpoint device
+ *
+ * ioremap_nocache() is the same as ioremap() as there are too many device
+ * drivers using this for device registers, and documentation which tells
+ * people to use it for such for this to be any different.  This is not a
+ * safe fallback for memory-like mappings, or memory regions where the
+ * compiler may generate unaligned accesses - eg, via inlining its own
+ * memcpy.
+ *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ * - ordering is not guaranteed without explicit dependencies or barrier
+ *   instructions
+ * - writes may be delayed before they hit the endpoint memory
+ *
+ * The cache hint is only a performance hint: CPUs may alias these hints.
+ * Eg, a CPU not implementing read allocate but implementing write allocate
+ * will provide a write allocate mapping instead.
  */
-#define ioremap(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_nocache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_cache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
-#define ioremap_wc(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE_WC)
-#define ioremap_wt(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define iounmap				__arm_iounmap
+void __iomem *ioremap(resource_size_t res_cookie, size_t size);
+#define ioremap ioremap
+#define ioremap_nocache ioremap
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size);
+#define ioremap_cache ioremap_cache
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
+#define ioremap_wc ioremap_wc
+#define ioremap_wt ioremap_wc
+
+void iounmap(volatile void __iomem *iomem_cookie);
+#define iounmap iounmap
 
 /*
  * io{read,write}{16,32}be() macros
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 3a72d69..6f225ac 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -275,7 +275,7 @@
  */
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
-#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
 extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
 
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index bfd662e..aeddd28 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -129,7 +129,36 @@
 
 /*
  * These are the memory types, defined to be compatible with
- * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B
+ * ARMv6+ without TEX remapping, they are a table index.
+ * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B
+ *
+ * MT type		Pre-ARMv6	ARMv6+ type / cacheable status
+ * UNCACHED		Uncached	Strongly ordered
+ * BUFFERABLE		Bufferable	Normal memory / non-cacheable
+ * WRITETHROUGH		Writethrough	Normal memory / write through
+ * WRITEBACK		Writeback	Normal memory / write back, read alloc
+ * MINICACHE		Minicache	N/A
+ * WRITEALLOC		Writeback	Normal memory / write back, write alloc
+ * DEV_SHARED		Uncached	Device memory (shared)
+ * DEV_NONSHARED	Uncached	Device memory (non-shared)
+ * DEV_WC		Bufferable	Normal memory / non-cacheable
+ * DEV_CACHED		Writeback	Normal memory / write back, read alloc
+ * VECTORS		Variable	Normal memory / variable
+ *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
  */
 #define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
 #define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671c..5e5a51a 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -50,6 +50,9 @@
 
 extern void fpundefinstr(void);
 
+void mmioset(void *, unsigned int, size_t);
+void mmiocpy(void *, const void *, size_t);
+
 	/* platform dependent support */
 EXPORT_SYMBOL(arm_delay_ops);
 
@@ -88,6 +91,9 @@
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
+EXPORT_SYMBOL(mmioset);
+EXPORT_SYMBOL(mmiocpy);
+
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7dac308..cb4fb1e 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -410,7 +410,7 @@
 	zero_fp
 
 	.if	\trace
-#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 	ct_user_exit save = 0
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbed..3d6b782 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -578,7 +578,7 @@
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	if ((unsigned)ipinr < NR_IPI) {
-		trace_ipi_entry(ipi_types[ipinr]);
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 	}
 
@@ -637,7 +637,7 @@
 	}
 
 	if ((unsigned)ipinr < NR_IPI)
-		trace_ipi_exit(ipi_types[ipinr]);
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7797e81..64111bd 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -61,8 +61,10 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
+ENTRY(mmiocpy)
 ENTRY(memcpy)
 
 #include "copy_template.S"
 
 ENDPROC(memcpy)
+ENDPROC(mmiocpy)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index a4ee97b..3c65e3b 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -16,6 +16,7 @@
 	.text
 	.align	5
 
+ENTRY(mmioset)
 ENTRY(memset)
 UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
@@ -133,3 +134,4 @@
 	b	1b
 UNWIND( .fnend   )
 ENDPROC(memset)
+ENDPROC(mmioset)
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index d1e5ad7..0c81056 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -255,7 +255,7 @@
 }
 #endif
 
-void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
+static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 	unsigned long offset, size_t size, unsigned int mtype, void *caller)
 {
 	const struct mem_type *type;
@@ -363,7 +363,7 @@
 		  unsigned int mtype)
 {
 	return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
-			__builtin_return_address(0));
+					__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
@@ -371,13 +371,26 @@
 				      unsigned int, void *) =
 	__arm_ioremap_caller;
 
-void __iomem *
-__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype)
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
 {
-	return arch_ioremap_caller(phys_addr, size, mtype,
-		__builtin_return_address(0));
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE,
+				   __builtin_return_address(0));
 }
-EXPORT_SYMBOL(__arm_ioremap);
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
@@ -431,11 +444,11 @@
 
 void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
 
-void __arm_iounmap(volatile void __iomem *io_addr)
+void iounmap(volatile void __iomem *cookie)
 {
-	arch_iounmap(io_addr);
+	arch_iounmap(cookie);
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
 
 #ifdef CONFIG_PCI
 static int pci_ioremap_mem_type = MT_DEVICE;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6ca7d9a..870838a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1072,6 +1072,7 @@
 	int highmem = 0;
 	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 	struct memblock_region *reg;
+	bool should_use_highmem = false;
 
 	for_each_memblock(memory, reg) {
 		phys_addr_t block_start = reg->base;
@@ -1090,6 +1091,7 @@
 				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
 					  &block_start, &block_end);
 				memblock_remove(reg->base, reg->size);
+				should_use_highmem = true;
 				continue;
 			}
 
@@ -1100,6 +1102,7 @@
 					  &block_start, &block_end, &vmalloc_limit);
 				memblock_remove(vmalloc_limit, overlap_size);
 				block_end = vmalloc_limit;
+				should_use_highmem = true;
 			}
 		}
 
@@ -1134,6 +1137,9 @@
 		}
 	}
 
+	if (should_use_highmem)
+		pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
@@ -1494,6 +1500,7 @@
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
+	memblock_set_current_limit(arm_lowmem_limit);
 	dma_contiguous_remap();
 	devicemaps_init(mdesc);
 	kmap_init();
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index afd7e05..1dd1093 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -351,30 +351,43 @@
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
-void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset,
-			   size_t size, unsigned int mtype, void *caller)
-{
-	return __arm_ioremap_pfn(pfn, offset, size, mtype);
-}
-
-void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size,
-			    unsigned int mtype)
-{
-	return (void __iomem *)phys_addr;
-}
-EXPORT_SYMBOL(__arm_ioremap);
-
-void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
-
 void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 				   unsigned int mtype, void *caller)
 {
-	return __arm_ioremap(phys_addr, size, mtype);
+	return (void __iomem *)phys_addr;
 }
 
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
+
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iounmap(volatile void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(__iounmap);
+
 void (*arch_iounmap)(volatile void __iomem *);
 
-void __arm_iounmap(volatile void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
 {
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
index 9005b07..aedec81 100644
--- a/arch/arm/vdso/vdsomunge.c
+++ b/arch/arm/vdso/vdsomunge.c
@@ -45,13 +45,11 @@
  * it does.
  */
 
-#define _GNU_SOURCE
-
 #include <byteswap.h>
 #include <elf.h>
 #include <errno.h>
-#include <error.h>
 #include <fcntl.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -82,11 +80,25 @@
 #define EF_ARM_ABI_FLOAT_HARD 0x400
 #endif
 
+static int failed;
+static const char *argv0;
 static const char *outfile;
 
+static void fail(const char *fmt, ...)
+{
+	va_list ap;
+
+	failed = 1;
+	fprintf(stderr, "%s: ", argv0);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
 static void cleanup(void)
 {
-	if (error_message_count > 0 && outfile != NULL)
+	if (failed && outfile != NULL)
 		unlink(outfile);
 }
 
@@ -119,68 +131,66 @@
 	int infd;
 
 	atexit(cleanup);
+	argv0 = argv[0];
 
 	if (argc != 3)
-		error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]);
+		fail("Usage: %s [infile] [outfile]\n", argv[0]);
 
 	infile = argv[1];
 	outfile = argv[2];
 
 	infd = open(infile, O_RDONLY);
 	if (infd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", infile);
+		fail("Cannot open %s: %s\n", infile, strerror(errno));
 
 	if (fstat(infd, &stat) != 0)
-		error(EXIT_FAILURE, errno, "Failed stat for %s", infile);
+		fail("Failed stat for %s: %s\n", infile, strerror(errno));
 
 	inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0);
 	if (inbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", infile);
+		fail("Failed to map %s: %s\n", infile, strerror(errno));
 
 	close(infd);
 
 	inhdr = inbuf;
 
 	if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0)
-		error(EXIT_FAILURE, 0, "Not an ELF file");
+		fail("Not an ELF file\n");
 
 	if (inhdr->e_ident[EI_CLASS] != ELFCLASS32)
-		error(EXIT_FAILURE, 0, "Unsupported ELF class");
+		fail("Unsupported ELF class\n");
 
 	swap = inhdr->e_ident[EI_DATA] != HOST_ORDER;
 
 	if (read_elf_half(inhdr->e_type, swap) != ET_DYN)
-		error(EXIT_FAILURE, 0, "Not a shared object");
+		fail("Not a shared object\n");
 
-	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) {
-		error(EXIT_FAILURE, 0, "Unsupported architecture %#x",
-		      inhdr->e_machine);
-	}
+	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM)
+		fail("Unsupported architecture %#x\n", inhdr->e_machine);
 
 	e_flags = read_elf_word(inhdr->e_flags, swap);
 
 	if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) {
-		error(EXIT_FAILURE, 0, "Unsupported EABI version %#x",
-		      EF_ARM_EABI_VERSION(e_flags));
+		fail("Unsupported EABI version %#x\n",
+		     EF_ARM_EABI_VERSION(e_flags));
 	}
 
 	if (e_flags & EF_ARM_ABI_FLOAT_HARD)
-		error(EXIT_FAILURE, 0,
-		      "Unexpected hard-float flag set in e_flags");
+		fail("Unexpected hard-float flag set in e_flags\n");
 
 	clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT);
 
 	outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
 	if (outfd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", outfile);
+		fail("Cannot open %s: %s\n", outfile, strerror(errno));
 
 	if (ftruncate(outfd, stat.st_size) != 0)
-		error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile);
+		fail("Cannot truncate %s: %s\n", outfile, strerror(errno));
 
 	outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 		      outfd, 0);
 	if (outbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", outfile);
+		fail("Failed to map %s: %s\n", outfile, strerror(errno));
 
 	close(outfd);
 
@@ -195,7 +205,7 @@
 	}
 
 	if (msync(outbuf, stat.st_size, MS_SYNC) != 0)
-		error(EXIT_FAILURE, errno, "Failed to sync %s", outfile);
+		fail("Failed to sync %s: %s\n", outfile, strerror(errno));
 
 	return EXIT_SUCCESS;
 }
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0f6edb1..318175f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,9 +23,9 @@
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select EDAC_SUPPORT
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS
+	select EDAC_SUPPORT
 	select GENERIC_ALLOCATOR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f38c94f..4e17e7e 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -83,6 +83,7 @@
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_CEVA=y
 CONFIG_AHCI_XGENE=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 39248d3..406485e 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -19,6 +19,14 @@
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
 
+/* Macros for consistency checks of the GICC subtable of MADT */
+#define ACPI_MADT_GICC_LENGTH	\
+	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+#define BAD_MADT_GICC_ENTRY(entry, end)						\
+	(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||	\
+	 (entry)->header.length != ACPI_MADT_GICC_LENGTH)
+
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
 /* ACPI table mapping after acpi_gbl_permanent_mmap is set */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a7691a3..f860bfd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -352,8 +352,8 @@
 	// TODO: add support for undefined instructions in kernel mode
 	enable_dbg
 	mov	x0, sp
+	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
 	b	bad_mode
 ENDPROC(el1_sync)
 
@@ -553,7 +553,7 @@
 	ct_user_exit
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
+	mov	x2, x25
 	bl	bad_mode
 	b	ret_to_user
 ENDPROC(el0_sync)
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index bd9bfaa..f332d5d 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -32,13 +32,11 @@
 
 ENTRY(compat_sys_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_sigreturn
 ENDPROC(compat_sys_sigreturn_wrapper)
 
 ENTRY(compat_sys_rt_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_rt_sigreturn
 ENDPROC(compat_sys_rt_sigreturn_wrapper)
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 695801a..50fb469 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -438,7 +438,7 @@
 	struct acpi_madt_generic_interrupt *processor;
 
 	processor = (struct acpi_madt_generic_interrupt *)header;
-	if (BAD_MADT_ENTRY(processor, end))
+	if (BAD_MADT_GICC_ENTRY(processor, end))
 		return -EINVAL;
 
 	acpi_table_print_madt_entry(header);
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 9d84feb..773d37a 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,5 +4,3 @@
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
-
-CFLAGS_mmu.o			:= -I$(srctree)/scripts/dtc/libfdt/
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 0a18375..f93c4a4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-extern spinlock_t pa_dbit_lock;
+extern spinlock_t pa_tlb_lock;
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -33,6 +33,19 @@
  */
 #define kern_addr_valid(addr)	(1)
 
+/* Purge data and instruction TLB entries.  Must be called holding
+ * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+
+static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+{
+	mtsp(mm->context, 1);
+	pdtlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
+}
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -42,15 +55,20 @@
                 *(pteptr) = (pteval);                           \
         } while(0)
 
-extern void purge_tlb_entries(struct mm_struct *, unsigned long);
+#define pte_inserted(x)						\
+	((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED))		\
+	 == (_PAGE_PRESENT|_PAGE_ACCESSED))
 
-#define set_pte_at(mm, addr, ptep, pteval)                      \
-	do {                                                    \
+#define set_pte_at(mm, addr, ptep, pteval)			\
+	do {							\
+		pte_t old_pte;					\
 		unsigned long flags;				\
-		spin_lock_irqsave(&pa_dbit_lock, flags);	\
-		set_pte(ptep, pteval);                          \
-		purge_tlb_entries(mm, addr);                    \
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);	\
+		spin_lock_irqsave(&pa_tlb_lock, flags);		\
+		old_pte = *ptep;				\
+		set_pte(ptep, pteval);				\
+		if (pte_inserted(old_pte))			\
+			purge_tlb_entries(mm, addr);		\
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);	\
 	} while (0)
 
 #endif /* !__ASSEMBLY__ */
@@ -268,7 +286,7 @@
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
+#define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -435,15 +453,15 @@
 	if (!pte_young(*ptep))
 		return 0;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);
 		return 0;
 	}
 	set_pte(ptep, pte_mkold(pte));
 	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 	return 1;
 }
 
@@ -453,11 +471,12 @@
 	pte_t old_pte;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	old_pte = *ptep;
-	pte_clear(mm,addr,ptep);
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	set_pte(ptep, __pte(0));
+	if (pte_inserted(old_pte))
+		purge_tlb_entries(mm, addr);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 
 	return old_pte;
 }
@@ -465,10 +484,10 @@
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	set_pte(ptep, pte_wrprotect(*ptep));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 }
 
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 9d086a5..e84b964 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -13,6 +13,9 @@
  * active at any one time on the Merced bus.  This tlb purge
  * synchronisation is fairly lightweight and harmless so we activate
  * it on all systems not just the N class.
+
+ * It is also used to ensure PTE updates are atomic and consistent
+ * with the TLB.
  */
 extern spinlock_t pa_tlb_lock;
 
@@ -24,20 +27,24 @@
 
 #define smp_flush_tlb_all()	flush_tlb_all()
 
+int __flush_tlb_range(unsigned long sid,
+	unsigned long start, unsigned long end);
+
+#define flush_tlb_range(vma, start, end) \
+	__flush_tlb_range((vma)->vm_mm->context, start, end)
+
+#define flush_tlb_kernel_range(start, end) \
+	__flush_tlb_range(0, start, end)
+
 /*
  * flush_tlb_mm()
  *
- * XXX This code is NOT valid for HP-UX compatibility processes,
- * (although it will probably work 99% of the time). HP-UX
- * processes are free to play with the space id's and save them
- * over long periods of time, etc. so we have to preserve the
- * space and just flush the entire tlb. We need to check the
- * personality in order to do that, but the personality is not
- * currently being set correctly.
- *
- * Of course, Linux processes could do the same thing, but
- * we don't support that (and the compilers, dynamic linker,
- * etc. do not do that).
+ * The code to switch to a new context is NOT valid for processes
+ * which play with the space id's.  Thus, we have to preserve the
+ * space and just flush the entire tlb.  However, the compilers,
+ * dynamic linker, etc, do not manipulate space id's, so there
+ * could be a significant performance benefit in switching contexts
+ * and not flushing the whole tlb.
  */
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
@@ -45,10 +52,18 @@
 	BUG_ON(mm == &init_mm); /* Should never happen */
 
 #if 1 || defined(CONFIG_SMP)
+	/* Except for very small threads, flushing the whole TLB is
+	 * faster than using __flush_tlb_range.  The pdtlb and pitlb
+	 * instructions are very slow because of the TLB broadcast.
+	 * It might be faster to do local range flushes on all CPUs
+	 * on PA 2.0 systems.
+	 */
 	flush_tlb_all();
 #else
 	/* FIXME: currently broken, causing space id and protection ids
-	 *  to go out of sync, resulting in faults on userspace accesses.
+	 * to go out of sync, resulting in faults on userspace accesses.
+	 * This approach needs further investigation since running many
+	 * small applications (e.g., GCC testsuite) is faster on HP-UX.
 	 */
 	if (mm) {
 		if (mm->context != 0)
@@ -65,22 +80,12 @@
 {
 	unsigned long flags, sid;
 
-	/* For one page, it's not worth testing the split_tlb variable */
-
-	mb();
 	sid = vma->vm_mm->context;
 	purge_tlb_start(flags);
 	mtsp(sid, 1);
 	pdtlb(addr);
-	pitlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
 	purge_tlb_end(flags);
 }
-
-void __flush_tlb_range(unsigned long sid,
-	unsigned long start, unsigned long end);
-
-#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
-
-#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
-
 #endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index f6448c7..cda6dbb 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -342,12 +342,15 @@
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
-int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+
+#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */
+static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD;
 
 void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
-	unsigned long size;
+	unsigned long size, start;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -364,14 +367,43 @@
 	/* Racy, but if we see an intermediate value, it's ok too... */
 	parisc_cache_flush_threshold = size * alltime / rangetime;
 
-	parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); 
+	parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
 	if (!parisc_cache_flush_threshold)
 		parisc_cache_flush_threshold = FLUSH_THRESHOLD;
 
 	if (parisc_cache_flush_threshold > cache_info.dc_size)
 		parisc_cache_flush_threshold = cache_info.dc_size;
 
-	printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus());
+	printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+		parisc_cache_flush_threshold/1024);
+
+	/* calculate TLB flush threshold */
+
+	alltime = mfctl(16);
+	flush_tlb_all();
+	alltime = mfctl(16) - alltime;
+
+	size = PAGE_SIZE;
+	start = (unsigned long) _text;
+	rangetime = mfctl(16);
+	while (start < (unsigned long) _end) {
+		flush_tlb_kernel_range(start, start + PAGE_SIZE);
+		start += PAGE_SIZE;
+		size += PAGE_SIZE;
+	}
+	rangetime = mfctl(16) - rangetime;
+
+	printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
+		alltime, size, rangetime);
+
+	parisc_tlb_flush_threshold = size * alltime / rangetime;
+	parisc_tlb_flush_threshold *= num_online_cpus();
+	parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
+	if (!parisc_tlb_flush_threshold)
+		parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
+
+	printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+		parisc_tlb_flush_threshold/1024);
 }
 
 extern void purge_kernel_dcache_page_asm(unsigned long);
@@ -403,48 +435,45 @@
 }
 EXPORT_SYMBOL(copy_user_page);
 
-void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+/* __flush_tlb_range()
+ *
+ * returns 1 if all TLBs were flushed.
+ */
+int __flush_tlb_range(unsigned long sid, unsigned long start,
+		      unsigned long end)
 {
-	unsigned long flags;
+	unsigned long flags, size;
 
-	/* Note: purge_tlb_entries can be called at startup with
-	   no context.  */
-
-	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
-	pdtlb(addr);
-	pitlb(addr);
-	purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(purge_tlb_entries);
-
-void __flush_tlb_range(unsigned long sid, unsigned long start,
-		       unsigned long end)
-{
-	unsigned long npages;
-
-	npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (npages >= 512)  /* 2MB of space: arbitrary, should be tuned */
+	size = (end - start);
+	if (size >= parisc_tlb_flush_threshold) {
 		flush_tlb_all();
-	else {
-		unsigned long flags;
+		return 1;
+	}
 
+	/* Purge TLB entries for small ranges using the pdtlb and
+	   pitlb instructions.  These instructions execute locally
+	   but cause a purge request to be broadcast to other TLBs.  */
+	if (likely(!split_tlb)) {
+		while (start < end) {
+			purge_tlb_start(flags);
+			mtsp(sid, 1);
+			pdtlb(start);
+			purge_tlb_end(flags);
+			start += PAGE_SIZE;
+		}
+		return 0;
+	}
+
+	/* split TLB case */
+	while (start < end) {
 		purge_tlb_start(flags);
 		mtsp(sid, 1);
-		if (split_tlb) {
-			while (npages--) {
-				pdtlb(start);
-				pitlb(start);
-				start += PAGE_SIZE;
-			}
-		} else {
-			while (npages--) {
-				pdtlb(start);
-				start += PAGE_SIZE;
-			}
-		}
+		pdtlb(start);
+		pitlb(start);
 		purge_tlb_end(flags);
+		start += PAGE_SIZE;
 	}
+	return 0;
 }
 
 static void cacheflush_h_tmp_function(void *dummy)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 7581961..c5ef408 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -45,7 +45,7 @@
 	.level 2.0
 #endif
 
-	.import         pa_dbit_lock,data
+	.import		pa_tlb_lock,data
 
 	/* space_to_prot macro creates a prot id from a space id */
 
@@ -420,8 +420,8 @@
 	SHLREG		%r9,PxD_VALUE_SHIFT,\pmd
 	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd
-	LDREG		%r0(\pmd),\pte		/* pmd is now pte */
+	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
+	LDREG		%r0(\pmd),\pte
 	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
 	.endm
 
@@ -453,57 +453,53 @@
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
 
-	/* Acquire pa_dbit_lock lock. */
-	.macro		dbit_lock	spc,tmp,tmp1
+	/* Acquire pa_tlb_lock lock and recheck page is still present. */
+	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 	cmpib,COND(=),n	0,\spc,2f
-	load32		PA(pa_dbit_lock),\tmp
+	load32		PA(pa_tlb_lock),\tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
+	LDREG		0(\ptp),\pte
+	bb,<,n		\pte,_PAGE_PRESENT_BIT,2f
+	b		\fault
+	stw		 \spc,0(\tmp)
 2:
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock without reloading lock address. */
-	.macro		dbit_unlock0	spc,tmp
+	/* Release pa_tlb_lock lock without reloading lock address. */
+	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock. */
-	.macro		dbit_unlock1	spc,tmp
+	/* Release pa_tlb_lock lock. */
+	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load32		PA(pa_dbit_lock),\tmp
-	dbit_unlock0	\spc,\tmp
+	load32		PA(pa_tlb_lock),\tmp
+	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
 
 	/* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
 	 * don't needlessly dirty the cache line if it was already set */
-	.macro		update_ptep	spc,ptep,pte,tmp,tmp1
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_accessed	ptp,pte,tmp,tmp1
 	ldi		_PAGE_ACCESSED,\tmp1
 	or		\tmp1,\pte,\tmp
 	and,COND(<>)	\tmp1,\pte,%r0
-	STREG		\tmp,0(\ptep)
+	STREG		\tmp,0(\ptp)
 	.endm
 
 	/* Set the dirty bit (and accessed bit).  No need to be
 	 * clever, this is only used from the dirty fault */
-	.macro		update_dirty	spc,ptep,pte,tmp
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_dirty	ptp,pte,tmp
 	ldi		_PAGE_ACCESSED|_PAGE_DIRTY,\tmp
 	or		\tmp,\pte,\pte
-	STREG		\pte,0(\ptep)
+	STREG		\pte,0(\ptp)
 	.endm
 
 	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
@@ -1148,14 +1144,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1174,14 +1170,14 @@
 
 	L3_ptep		ptp,pte,t0,va,nadtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1202,20 +1198,20 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1235,21 +1231,20 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1269,16 +1264,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1297,16 +1292,16 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock1	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1406,14 +1401,14 @@
 
 	L3_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1430,14 +1425,14 @@
 
 	L3_ptep		ptp,pte,t0,va,naitlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1458,20 +1453,20 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1482,20 +1477,20 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1516,16 +1511,16 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0	
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1536,16 +1531,16 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1568,14 +1563,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 		
 	idtlbt          pte,prot
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #else
@@ -1588,8 +1583,8 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
@@ -1600,8 +1595,8 @@
 	idtlbp		prot,(%sr1,va)
 
 	mtsp            t1, %sr1     /* Restore sr1 */
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 
@@ -1612,16 +1607,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock0	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 6548fd1..b99b39f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -43,10 +43,6 @@
 
 #include "../math-emu/math-emu.h"	/* for handle_fpe() */
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-DEFINE_SPINLOCK(pa_dbit_lock);
-#endif
-
 static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
 	struct pt_regs *regs);
 
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index ccde8f0..112ccf4 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -52,6 +52,22 @@
 	.text
 
 /*
+ * Used by threads when the lock bit of core_idle_state is set.
+ * Threads will spin in HMT_LOW until the lock bit is cleared.
+ * r14 - pointer to core_idle_state
+ * r15 - used to load contents of core_idle_state
+ */
+
+core_idle_lock_held:
+	HMT_LOW
+3:	lwz	r15,0(r14)
+	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	3b
+	HMT_MEDIUM
+	lwarx	r15,0,r14
+	blr
+
+/*
  * Pass requested state in r3:
  *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
@@ -150,6 +166,10 @@
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
 lwarx_loop1:
 	lwarx	r15,0,r14
+
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bnel	core_idle_lock_held
+
 	andc	r15,r15,r7			/* Clear thread bit */
 
 	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
@@ -294,7 +314,7 @@
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
-	bne	core_idle_lock_held
+	bnel	core_idle_lock_held
 
 	cmpwi	cr2,r15,0
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
@@ -319,15 +339,6 @@
 	isync
 	b	common_exit
 
-core_idle_lock_held:
-	HMT_LOW
-core_idle_lock_loop:
-	lwz	r15,0(14)
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bne	core_idle_lock_loop
-	HMT_MEDIUM
-	b	lwarx_loop2
-
 first_thread_in_subcore:
 	/* First thread in subcore to wakeup */
 	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6530f1b..37de90f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,6 +297,8 @@
 
 	__this_cpu_inc(irq_stat.mce_exceptions);
 
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6d53597..a67c6d7 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -529,6 +529,10 @@
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"instruction fetch\n");
 		break;
+	case 0x600:
+		printk(KERN_ALERT "Unable to handle kernel paging request for "
+			"unaligned access at address 0x%08lx\n", regs->dar);
+		break;
 	default:
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"unknown fault\n");
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index ec2eb20..df95629 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -320,6 +320,8 @@
 	if (!attr)
 		return NULL;
 
+	sysfs_attr_init(&attr->attr.attr);
+
 	attr->var = str;
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 4949ef0..37f959b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -237,7 +237,7 @@
 	return elog;
 }
 
-static void elog_work_fn(struct work_struct *work)
+static irqreturn_t elog_event(int irq, void *data)
 {
 	__be64 size;
 	__be64 id;
@@ -251,7 +251,7 @@
 	rc = opal_get_elog_size(&id, &size, &type);
 	if (rc != OPAL_SUCCESS) {
 		pr_err("ELOG: OPAL log info read failed\n");
-		return;
+		return IRQ_HANDLED;
 	}
 
 	elog_size = be64_to_cpu(size);
@@ -270,16 +270,10 @@
 	 * entries.
 	 */
 	if (kset_find_obj(elog_kset, name))
-		return;
+		return IRQ_HANDLED;
 
 	create_elog_obj(log_id, elog_size, elog_type);
-}
 
-static DECLARE_WORK(elog_work, elog_work_fn);
-
-static irqreturn_t elog_event(int irq, void *data)
-{
-	schedule_work(&elog_work);
 	return IRQ_HANDLED;
 }
 
@@ -304,8 +298,8 @@
 		return irq;
 	}
 
-	rc = request_irq(irq, elog_event,
-			IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL);
+	rc = request_threaded_irq(irq, NULL, elog_event,
+			IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
 	if (rc) {
 		pr_err("%s: Can't request OPAL event irq (%d)\n",
 		       __func__, rc);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 46cb3fe..4ece8e4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -112,6 +112,7 @@
 static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	size_t addr, size;
+	pgprot_t page_prot;
 	int rc;
 
 	pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
@@ -125,13 +126,11 @@
 	if (!opal_prd_range_is_valid(addr, size))
 		return -EINVAL;
 
-	vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file,
-						vma->vm_pgoff,
-						 size, vma->vm_page_prot))
-					| _PAGE_SPECIAL);
+	page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+					 size, vma->vm_page_prot);
 
 	rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
-			vma->vm_page_prot);
+				page_prot);
 
 	return rc;
 }
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
index 2bc3367..87f9623 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
@@ -18,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
 
 struct ppc4xx_hsta_msi {
 	struct device *dev;
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 88c7016..97bbb60 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -28,7 +28,7 @@
 #define _ST(p, inst, v)						\
 	({							\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
@@ -41,7 +41,7 @@
 	({							\
 		unsigned long __v;				\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index ddf9ecb..e342586 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -20,7 +20,7 @@
 	unsigned long ret;
 
 	if (__range_not_ok(from, n, TASK_SIZE))
-		return 0;
+		return n;
 
 	/*
 	 * Even though this function is typically called from NMI/IRQ context
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 569ee09..46b58ab 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -352,13 +352,16 @@
 				pdata->mmio_size = resource_size(rentry->res);
 			pdata->mmio_base = ioremap(rentry->res->start,
 						   pdata->mmio_size);
-			if (!pdata->mmio_base)
-				goto err_out;
 			break;
 		}
 
 	acpi_dev_free_resource_list(&resource_list);
 
+	if (!pdata->mmio_base) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
 	pdata->dev_desc = dev_desc;
 
 	if (dev_desc->setup)
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c262e4a..3b8963f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -175,10 +175,14 @@
 	if (!addr || !length)
 		return;
 
-	acpi_reserve_region(addr, length, gas->space_id, 0, desc);
+	/* Resources are never freed */
+	if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		request_region(addr, length, desc);
+	else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		request_mem_region(addr, length, desc);
 }
 
-static void __init acpi_reserve_resources(void)
+static int __init acpi_reserve_resources(void)
 {
 	acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length,
 		"ACPI PM1a_EVT_BLK");
@@ -207,7 +211,10 @@
 	if (!(acpi_gbl_FADT.gpe1_block_length & 0x1))
 		acpi_request_region(&acpi_gbl_FADT.xgpe1_block,
 			       acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK");
+
+	return 0;
 }
+fs_initcall_sync(acpi_reserve_resources);
 
 void acpi_os_printf(const char *fmt, ...)
 {
@@ -1862,7 +1869,6 @@
 
 acpi_status __init acpi_os_initialize1(void)
 {
-	acpi_reserve_resources();
 	kacpid_wq = alloc_workqueue("kacpid", 0, 1);
 	kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1);
 	kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0);
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 10561ce..8244f01 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -26,7 +26,6 @@
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/ioport.h>
-#include <linux/list.h>
 #include <linux/slab.h>
 
 #ifdef CONFIG_X86
@@ -622,164 +621,3 @@
 	return (type & types) ? 0 : 1;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type);
-
-struct reserved_region {
-	struct list_head node;
-	u64 start;
-	u64 end;
-};
-
-static LIST_HEAD(reserved_io_regions);
-static LIST_HEAD(reserved_mem_regions);
-
-static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags,
-			 char *desc)
-{
-	unsigned int length = end - start + 1;
-	struct resource *res;
-
-	res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ?
-		request_region(start, length, desc) :
-		request_mem_region(start, length, desc);
-	if (!res)
-		return -EIO;
-
-	res->flags &= ~flags;
-	return 0;
-}
-
-static int add_region_before(u64 start, u64 end, u8 space_id,
-			     unsigned long flags, char *desc,
-			     struct list_head *head)
-{
-	struct reserved_region *reg;
-	int error;
-
-	reg = kmalloc(sizeof(*reg), GFP_KERNEL);
-	if (!reg)
-		return -ENOMEM;
-
-	error = request_range(start, end, space_id, flags, desc);
-	if (error) {
-		kfree(reg);
-		return error;
-	}
-
-	reg->start = start;
-	reg->end = end;
-	list_add_tail(&reg->node, head);
-	return 0;
-}
-
-/**
- * acpi_reserve_region - Reserve an I/O or memory region as a system resource.
- * @start: Starting address of the region.
- * @length: Length of the region.
- * @space_id: Identifier of address space to reserve the region from.
- * @flags: Resource flags to clear for the region after requesting it.
- * @desc: Region description (for messages).
- *
- * Reserve an I/O or memory region as a system resource to prevent others from
- * using it.  If the new region overlaps with one of the regions (in the given
- * address space) already reserved by this routine, only the non-overlapping
- * parts of it will be reserved.
- *
- * Returned is either 0 (success) or a negative error code indicating a resource
- * reservation problem.  It is the code of the first encountered error, but the
- * routine doesn't abort until it has attempted to request all of the parts of
- * the new region that don't overlap with other regions reserved previously.
- *
- * The resources requested by this routine are never released.
- */
-int acpi_reserve_region(u64 start, unsigned int length, u8 space_id,
-			unsigned long flags, char *desc)
-{
-	struct list_head *regions;
-	struct reserved_region *reg;
-	u64 end = start + length - 1;
-	int ret = 0, error = 0;
-
-	if (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
-		regions = &reserved_io_regions;
-	else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
-		regions = &reserved_mem_regions;
-	else
-		return -EINVAL;
-
-	if (list_empty(regions))
-		return add_region_before(start, end, space_id, flags, desc, regions);
-
-	list_for_each_entry(reg, regions, node)
-		if (reg->start == end + 1) {
-			/* The new region can be prepended to this one. */
-			ret = request_range(start, end, space_id, flags, desc);
-			if (!ret)
-				reg->start = start;
-
-			return ret;
-		} else if (reg->start > end) {
-			/* No overlap.  Add the new region here and get out. */
-			return add_region_before(start, end, space_id, flags,
-						 desc, &reg->node);
-		} else if (reg->end == start - 1) {
-			goto combine;
-		} else if (reg->end >= start) {
-			goto overlap;
-		}
-
-	/* The new region goes after the last existing one. */
-	return add_region_before(start, end, space_id, flags, desc, regions);
-
- overlap:
-	/*
-	 * The new region overlaps an existing one.
-	 *
-	 * The head part of the new region immediately preceding the existing
-	 * overlapping one can be combined with it right away.
-	 */
-	if (reg->start > start) {
-		error = request_range(start, reg->start - 1, space_id, flags, desc);
-		if (error)
-			ret = error;
-		else
-			reg->start = start;
-	}
-
- combine:
-	/*
-	 * The new region is adjacent to an existing one.  If it extends beyond
-	 * that region all the way to the next one, it is possible to combine
-	 * all three of them.
-	 */
-	while (reg->end < end) {
-		struct reserved_region *next = NULL;
-		u64 a = reg->end + 1, b = end;
-
-		if (!list_is_last(&reg->node, regions)) {
-			next = list_next_entry(reg, node);
-			if (next->start <= end)
-				b = next->start - 1;
-		}
-		error = request_range(a, b, space_id, flags, desc);
-		if (!error) {
-			if (next && next->start == b + 1) {
-				reg->end = next->end;
-				list_del(&next->node);
-				kfree(next);
-			} else {
-				reg->end = end;
-				break;
-			}
-		} else if (next) {
-			if (!ret)
-				ret = error;
-
-			reg = next;
-		} else {
-			break;
-		}
-	}
-
-	return ret ? ret : error;
-}
-EXPORT_SYMBOL_GPL(acpi_reserve_region);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2649a06..ec25635 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1019,6 +1019,29 @@
 	return false;
 }
 
+static bool __acpi_match_device_cls(const struct acpi_device_id *id,
+				    struct acpi_hardware_id *hwid)
+{
+	int i, msk, byte_shift;
+	char buf[3];
+
+	if (!id->cls)
+		return false;
+
+	/* Apply class-code bitmask, before checking each class-code byte */
+	for (i = 1; i <= 3; i++) {
+		byte_shift = 8 * (3 - i);
+		msk = (id->cls_msk >> byte_shift) & 0xFF;
+		if (!msk)
+			continue;
+
+		sprintf(buf, "%02x", (id->cls >> byte_shift) & msk);
+		if (strncmp(buf, &hwid->id[(i - 1) * 2], 2))
+			return false;
+	}
+	return true;
+}
+
 static const struct acpi_device_id *__acpi_match_device(
 	struct acpi_device *device,
 	const struct acpi_device_id *ids,
@@ -1036,9 +1059,12 @@
 
 	list_for_each_entry(hwid, &device->pnp.ids, list) {
 		/* First, check the ACPI/PNP IDs provided by the caller. */
-		for (id = ids; id->id[0]; id++)
-			if (!strcmp((char *) id->id, hwid->id))
+		for (id = ids; id->id[0] || id->cls; id++) {
+			if (id->id[0] && !strcmp((char *) id->id, hwid->id))
 				return id;
+			else if (id->cls && __acpi_match_device_cls(id, hwid))
+				return id;
+		}
 
 		/*
 		 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
@@ -2101,6 +2127,8 @@
 		if (info->valid & ACPI_VALID_UID)
 			pnp->unique_id = kstrdup(info->unique_id.string,
 							GFP_KERNEL);
+		if (info->valid & ACPI_VALID_CLS)
+			acpi_add_id(pnp, info->class_code.string);
 
 		kfree(info);
 
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 6d17a3b..15e40ee 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -48,7 +48,7 @@
 
 config ATA_ACPI
 	bool "ATA ACPI Support"
-	depends on ACPI && PCI
+	depends on ACPI
 	default y
 	help
 	  This option adds support for ATA-related ACPI objects.
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 614c78f..1befb11 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -20,6 +20,8 @@
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/ahci_platform.h>
+#include <linux/acpi.h>
+#include <linux/pci_ids.h>
 #include "ahci.h"
 
 #define DRV_NAME "ahci"
@@ -79,12 +81,19 @@
 };
 MODULE_DEVICE_TABLE(of, ahci_of_match);
 
+static const struct acpi_device_id ahci_acpi_match[] = {
+	{ ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, ahci_acpi_match);
+
 static struct platform_driver ahci_driver = {
 	.probe = ahci_probe,
 	.remove = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
+		.acpi_match_table = ahci_acpi_match,
 		.pm = &ahci_pm_ops,
 	},
 };
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 9c42883..894bda1 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -563,10 +563,8 @@
 	kfree(fw_priv);
 }
 
-static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env)
 {
-	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-
 	if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id))
 		return -ENOMEM;
 	if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout))
@@ -577,6 +575,18 @@
 	return 0;
 }
 
+static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct firmware_priv *fw_priv = to_firmware_priv(dev);
+	int err = 0;
+
+	mutex_lock(&fw_lock);
+	if (fw_priv->buf)
+		err = do_firmware_uevent(fw_priv, env);
+	mutex_unlock(&fw_lock);
+	return err;
+}
+
 static struct class firmware_class = {
 	.name		= "firmware",
 	.class_attrs	= firmware_class_attrs,
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cdd547b..0ee43c1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -6,6 +6,7 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
@@ -19,6 +20,8 @@
 #include <linux/suspend.h>
 #include <linux/export.h>
 
+#define GENPD_RETRY_MAX_MS	250		/* Approximate */
+
 #define GENPD_DEV_CALLBACK(genpd, type, callback, dev)		\
 ({								\
 	type (*__routine)(struct device *__d); 			\
@@ -2131,6 +2134,7 @@
 static void genpd_dev_pm_detach(struct device *dev, bool power_off)
 {
 	struct generic_pm_domain *pd;
+	unsigned int i;
 	int ret = 0;
 
 	pd = pm_genpd_lookup_dev(dev);
@@ -2139,10 +2143,12 @@
 
 	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
 
-	while (1) {
+	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
 		ret = pm_genpd_remove_device(pd, dev);
 		if (ret != -EAGAIN)
 			break;
+
+		mdelay(i);
 		cond_resched();
 	}
 
@@ -2183,6 +2189,7 @@
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
+	unsigned int i;
 	int ret;
 
 	if (!dev->of_node)
@@ -2218,10 +2225,12 @@
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
-	while (1) {
+	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
 		ret = pm_genpd_add_device(pd, dev);
 		if (ret != -EAGAIN)
 			break;
+
+		mdelay(i);
 		cond_resched();
 	}
 
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 7470004..eb6e674 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -45,14 +45,12 @@
 		return -EEXIST;
 	}
 
-	dev->power.wakeirq = wirq;
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
 	err = device_wakeup_attach_irq(dev, wirq);
-	if (err)
-		return err;
+	if (!err)
+		dev->power.wakeirq = wirq;
 
-	return 0;
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+	return err;
 }
 
 /**
@@ -105,10 +103,10 @@
 		return;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
+	device_wakeup_detach_irq(dev);
 	dev->power.wakeirq = NULL;
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
-	device_wakeup_detach_irq(dev);
 	if (wirq->dedicated_irq)
 		free_irq(wirq->irq, wirq);
 	kfree(wirq);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 40f7160..51f15bc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -281,32 +281,25 @@
  * Attach a device wakeirq to the wakeup source so the device
  * wake IRQ can be configured automatically for suspend and
  * resume.
+ *
+ * Call under the device's power.lock lock.
  */
 int device_wakeup_attach_irq(struct device *dev,
 			     struct wake_irq *wakeirq)
 {
 	struct wakeup_source *ws;
-	int ret = 0;
 
-	spin_lock_irq(&dev->power.lock);
 	ws = dev->power.wakeup;
 	if (!ws) {
 		dev_err(dev, "forgot to call call device_init_wakeup?\n");
-		ret = -EINVAL;
-		goto unlock;
+		return -EINVAL;
 	}
 
-	if (ws->wakeirq) {
-		ret = -EEXIST;
-		goto unlock;
-	}
+	if (ws->wakeirq)
+		return -EEXIST;
 
 	ws->wakeirq = wakeirq;
-
-unlock:
-	spin_unlock_irq(&dev->power.lock);
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -314,20 +307,16 @@
  * @dev: Device to handle
  *
  * Removes a device wakeirq from the wakeup source.
+ *
+ * Call under the device's power.lock lock.
  */
 void device_wakeup_detach_irq(struct device *dev)
 {
 	struct wakeup_source *ws;
 
-	spin_lock_irq(&dev->power.lock);
 	ws = dev->power.wakeup;
-	if (!ws)
-		goto unlock;
-
-	ws->wakeirq = NULL;
-
-unlock:
-	spin_unlock_irq(&dev->power.lock);
+	if (ws)
+		ws->wakeirq = NULL;
 }
 
 /**
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 8d7e1c8..4dd8826 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1055,7 +1055,7 @@
 
 	processor = (struct acpi_madt_generic_interrupt *)header;
 
-	if (BAD_MADT_ENTRY(processor, end))
+	if (BAD_MADT_GICC_ENTRY(processor, end))
 		return -EINVAL;
 
 	/*
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 0c77240..729e085 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -23,6 +23,7 @@
 
 	afu = cxl_pci_to_afu(dev);
 
+	get_device(&afu->dev);
 	ctx = cxl_context_alloc();
 	if (IS_ERR(ctx))
 		return ctx;
@@ -31,6 +32,7 @@
 	rc = cxl_context_init(ctx, afu, false, NULL);
 	if (rc) {
 		kfree(ctx);
+		put_device(&afu->dev);
 		return ERR_PTR(-ENOMEM);
 	}
 	cxl_assign_psn_space(ctx);
@@ -60,6 +62,8 @@
 	if (ctx->status != CLOSED)
 		return -EBUSY;
 
+	put_device(&ctx->afu->dev);
+
 	cxl_context_free(ctx);
 
 	return 0;
@@ -159,7 +163,6 @@
 	}
 
 	ctx->status = STARTED;
-	get_device(&ctx->afu->dev);
 out:
 	mutex_unlock(&ctx->status_mutex);
 	return rc;
@@ -175,12 +178,7 @@
 /* Stop a context.  Returns 0 on success, otherwise -Errno */
 int cxl_stop_context(struct cxl_context *ctx)
 {
-	int rc;
-
-	rc = __detach_context(ctx);
-	if (!rc)
-		put_device(&ctx->afu->dev);
-	return rc;
+	return __detach_context(ctx);
 }
 EXPORT_SYMBOL_GPL(cxl_stop_context);
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 2a4c80a..1287148 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -113,11 +113,11 @@
 
 	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
 		area = ctx->afu->psn_phys;
-		if (offset > ctx->afu->adapter->ps_size)
+		if (offset >= ctx->afu->adapter->ps_size)
 			return VM_FAULT_SIGBUS;
 	} else {
 		area = ctx->psn_phys;
-		if (offset > ctx->psn_size)
+		if (offset >= ctx->psn_size)
 			return VM_FAULT_SIGBUS;
 	}
 
@@ -145,8 +145,16 @@
  */
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 {
+	u64 start = vma->vm_pgoff << PAGE_SHIFT;
 	u64 len = vma->vm_end - vma->vm_start;
-	len = min(len, ctx->psn_size);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		if (start + len > ctx->afu->adapter->ps_size)
+			return -EINVAL;
+	} else {
+		if (start + len > ctx->psn_size)
+			return -EINVAL;
+	}
 
 	if (ctx->afu->current_mode != CXL_MODE_DEDICATED) {
 		/* make sure there is a valid per process space for this AFU */
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 833348e..4a164ab 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -73,7 +73,7 @@
 		spin_lock(&adapter->afu_list_lock);
 		for (slice = 0; slice < adapter->slices; slice++) {
 			afu = adapter->afu[slice];
-			if (!afu->enabled)
+			if (!afu || !afu->enabled)
 				continue;
 			rcu_read_lock();
 			idr_for_each_entry(&afu->contexts_idr, ctx, id)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index c68ef58..32ad097 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -539,7 +539,7 @@
 
 static void cxl_unmap_slice_regs(struct cxl_afu *afu)
 {
-	if (afu->p1n_mmio)
+	if (afu->p2n_mmio)
 		iounmap(afu->p2n_mmio);
 	if (afu->p1n_mmio)
 		iounmap(afu->p1n_mmio);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index b1d1983a..2eba002 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -112,9 +112,10 @@
 	unsigned long addr;
 
 	phb = pci_bus_to_host(bus);
-	afu = (struct cxl_afu *)phb->private_data;
 	if (phb == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
+	afu = (struct cxl_afu *)phb->private_data;
+
 	if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 	if (offset >= (unsigned long)phb->cfg_data)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 357b6ae..458aa5a 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -552,22 +552,6 @@
 	schedule_work(&device->event_work);
 }
 
-void mei_cl_bus_remove_devices(struct mei_device *dev)
-{
-	struct mei_cl *cl, *next;
-
-	mutex_lock(&dev->device_lock);
-	list_for_each_entry_safe(cl, next, &dev->device_list, device_link) {
-		if (cl->device)
-			mei_cl_remove_device(cl->device);
-
-		list_del(&cl->device_link);
-		mei_cl_unlink(cl);
-		kfree(cl);
-	}
-	mutex_unlock(&dev->device_lock);
-}
-
 int __init mei_cl_bus_init(void)
 {
 	return bus_register(&mei_cl_bus_type);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 94514b2..00c3865 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -333,8 +333,6 @@
 
 	mei_nfc_host_exit(dev);
 
-	mei_cl_bus_remove_devices(dev);
-
 	mutex_lock(&dev->device_lock);
 
 	mei_wd_stop(dev);
diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index b983c4e..290ef30 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -402,11 +402,12 @@
 
 	cldev->priv_data = NULL;
 
-	mutex_lock(&dev->device_lock);
 	/* Need to remove the device here
 	 * since mei_nfc_free will unlink the clients
 	 */
 	mei_cl_remove_device(cldev);
+
+	mutex_lock(&dev->device_lock);
 	mei_nfc_free(ndev);
 	mutex_unlock(&dev->device_lock);
 }
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c
index 515f338..49c1720 100644
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -7,7 +7,6 @@
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  */
 
-#include <linux/acpi.h>
 #include <linux/pnp.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -23,41 +22,25 @@
 	{"", 0}
 };
 
-#ifdef CONFIG_ACPI
-static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc)
-{
-	u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY;
-	return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc);
-}
-#else
-static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc)
-{
-	struct resource *res;
-
-	res = io ? request_region(start, length, desc) :
-		request_mem_region(start, length, desc);
-	if (res) {
-		res->flags &= ~IORESOURCE_BUSY;
-		return true;
-	}
-	return false;
-}
-#endif
-
 static void reserve_range(struct pnp_dev *dev, struct resource *r, int port)
 {
 	char *regionid;
 	const char *pnpid = dev_name(&dev->dev);
 	resource_size_t start = r->start, end = r->end;
-	bool reserved;
+	struct resource *res;
 
 	regionid = kmalloc(16, GFP_KERNEL);
 	if (!regionid)
 		return;
 
 	snprintf(regionid, 16, "pnp %s", pnpid);
-	reserved = __reserve_range(start, end - start + 1, !!port, regionid);
-	if (!reserved)
+	if (port)
+		res = request_region(start, end - start + 1, regionid);
+	else
+		res = request_mem_region(start, end - start + 1, regionid);
+	if (res)
+		res->flags &= ~IORESOURCE_BUSY;
+	else
 		kfree(regionid);
 
 	/*
@@ -66,7 +49,7 @@
 	 * have double reservations.
 	 */
 	dev_info(&dev->dev, "%pR %s reserved\n", r,
-		 reserved ? "has been" : "could not be");
+		 res ? "has been" : "could not be");
 }
 
 static void reserve_resources_of_dev(struct pnp_dev *dev)
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 86621fa..735355b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -121,6 +121,7 @@
 #define REG_3		0x0004a0
 #define REG_4		0x000600
 #define REG_6		0x000800
+#define REG_7		0x000804
 #define REG_8		0x000820
 #define REG_9		0x000a04
 #define REG_10		0x018000
@@ -135,6 +136,8 @@
 #define REG_21		0x200218
 #define REG_22		0x0005a0
 #define REG_23		0x0005c0
+#define REG_24		0x000808
+#define REG_25		0x000b00
 #define REG_26		0x200118
 #define REG_27		0x200308
 #define REG_32		0x21003c
@@ -429,6 +432,9 @@
 #define SET_LENXY_START_RECFILL(fb, lenxy) \
 	WRITE_WORD(lenxy, fb, REG_9)
 
+#define SETUP_COPYAREA(fb) \
+	WRITE_BYTE(0, fb, REG_16b1)
+
 static void
 HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable)
 {
@@ -1004,6 +1010,36 @@
 	return 0;
 }
 
+static void
+stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	SETUP_COPYAREA(fb);
+
+	SETUP_HW(fb);
+	if (fb->info.var.bits_per_pixel == 32) {
+		WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+	} else {
+		WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+	}
+
+	NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb,
+		IBOvals(RopSrc, MaskAddrOffset(0),
+		BitmapExtent08, StaticReg(1),
+		DataDynamic, MaskOtc, BGx(0), FGx(0)));
+
+	WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24);
+	WRITE_WORD(((area->width << 16) | area->height), fb, REG_7);
+	WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25);
+
+	SETUP_FB(fb);
+}
+
 static void __init
 stifb_init_display(struct stifb_info *fb)
 {
@@ -1069,7 +1105,7 @@
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
 	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
+	.fb_copyarea	= stifb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
 };
 
@@ -1258,7 +1294,7 @@
 	info->fbops = &stifb_ops;
 	info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len);
 	info->screen_size = fix->smem_len;
-	info->flags = FBINFO_DEFAULT;
+	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
 	info->pseudo_palette = &fb->pseudo_palette;
 
 	/* This has to be done !!! */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b8e2f0..48851f6 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -896,6 +896,7 @@
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FITRIM)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 72afcc6..feef8a9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -325,7 +325,6 @@
 		return rc;
 
 	switch (cmd) {
-	case FITRIM:
 	case FS_IOC32_GETFLAGS:
 	case FS_IOC32_SETFLAGS:
 	case FS_IOC32_GETVERSION:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aadb728..2553aa8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -504,7 +504,7 @@
 	struct buffer_head		*bh;
 	int				err;
 
-	bh = sb_getblk(inode->i_sb, pblk);
+	bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh))
 		return ERR_PTR(-ENOMEM);
 
@@ -1089,7 +1089,7 @@
 		err = -EIO;
 		goto cleanup;
 	}
-	bh = sb_getblk(inode->i_sb, newblock);
+	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh)) {
 		err = -ENOMEM;
 		goto cleanup;
@@ -1283,7 +1283,7 @@
 	if (newblock == 0)
 		return err;
 
-	bh = sb_getblk(inode->i_sb, newblock);
+	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh))
 		return -ENOMEM;
 	lock_buffer(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41f8e55..cecf9aa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1323,7 +1323,7 @@
 					     unsigned int offset,
 					     unsigned int length)
 {
-	int to_release = 0;
+	int to_release = 0, contiguous_blks = 0;
 	struct buffer_head *head, *bh;
 	unsigned int curr_off = 0;
 	struct inode *inode = page->mapping->host;
@@ -1344,14 +1344,23 @@
 
 		if ((offset <= curr_off) && (buffer_delay(bh))) {
 			to_release++;
+			contiguous_blks++;
 			clear_buffer_delay(bh);
+		} else if (contiguous_blks) {
+			lblk = page->index <<
+			       (PAGE_CACHE_SHIFT - inode->i_blkbits);
+			lblk += (curr_off >> inode->i_blkbits) -
+				contiguous_blks;
+			ext4_es_remove_extent(inode, lblk, contiguous_blks);
+			contiguous_blks = 0;
 		}
 		curr_off = next_off;
 	} while ((bh = bh->b_this_page) != head);
 
-	if (to_release) {
+	if (contiguous_blks) {
 		lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-		ext4_es_remove_extent(inode, lblk, to_release);
+		lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
+		ext4_es_remove_extent(inode, lblk, contiguous_blks);
 	}
 
 	/* If we have released all the blocks belonging to a cluster, then we
@@ -4344,7 +4353,12 @@
 	int inode_size = EXT4_INODE_SIZE(sb);
 
 	oi.orig_ino = orig_ino;
-	ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
+	/*
+	 * Calculate the first inode in the inode table block.  Inode
+	 * numbers are one-based.  That is, the first inode in a block
+	 * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
+	 */
+	ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
 	for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
 		if (ino == orig_ino)
 			continue;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index cb84512..1346cfa 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -755,7 +755,6 @@
 		return err;
 	}
 	case EXT4_IOC_MOVE_EXT:
-	case FITRIM:
 	case EXT4_IOC_RESIZE_FS:
 	case EXT4_IOC_PRECACHE_EXTENTS:
 	case EXT4_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f6aedf8..34b610e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4816,18 +4816,12 @@
 		/*
 		 * blocks being freed are metadata. these blocks shouldn't
 		 * be used until this transaction is committed
+		 *
+		 * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+		 * to fail.
 		 */
-	retry:
-		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
-		if (!new_entry) {
-			/*
-			 * We use a retry loop because
-			 * ext4_free_blocks() is not allowed to fail.
-			 */
-			cond_resched();
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
-			goto retry;
-		}
+		new_entry = kmem_cache_alloc(ext4_free_data_cachep,
+				GFP_NOFS|__GFP_NOFAIL);
 		new_entry->efd_start_cluster = bit;
 		new_entry->efd_group = block_group;
 		new_entry->efd_count = count_clusters;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b52374e..6163ad2 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -620,6 +620,7 @@
 	struct ext4_inode_info		*ei = EXT4_I(inode);
 	struct ext4_extent		*ex;
 	unsigned int			i, len;
+	ext4_lblk_t			start, end;
 	ext4_fsblk_t			blk;
 	handle_t			*handle;
 	int				ret;
@@ -633,6 +634,14 @@
 				       EXT4_FEATURE_RO_COMPAT_BIGALLOC))
 		return -EOPNOTSUPP;
 
+	/*
+	 * In order to get correct extent info, force all delayed allocation
+	 * blocks to be allocated, otherwise delayed allocation blocks may not
+	 * be reflected and bypass the checks on extent header.
+	 */
+	if (test_opt(inode->i_sb, DELALLOC))
+		ext4_alloc_da_blocks(inode);
+
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -650,11 +659,13 @@
 		goto errout;
 	}
 	if (eh->eh_entries == 0)
-		blk = len = 0;
+		blk = len = start = end = 0;
 	else {
 		len = le16_to_cpu(ex->ee_len);
 		blk = ext4_ext_pblock(ex);
-		if (len > EXT4_NDIR_BLOCKS) {
+		start = le32_to_cpu(ex->ee_block);
+		end = start + len - 1;
+		if (end >= EXT4_NDIR_BLOCKS) {
 			ret = -EOPNOTSUPP;
 			goto errout;
 		}
@@ -662,7 +673,7 @@
 
 	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
 	memset(ei->i_data, 0, sizeof(ei->i_data));
-	for (i=0; i < len; i++)
+	for (i = start; i <= end; i++)
 		ei->i_data[i] = cpu_to_le32(blk++);
 	ext4_mark_inode_dirty(handle, inode);
 errout:
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index f005046..d6a4b55 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -484,3 +484,98 @@
 	a->btree.first_free = cpu_to_le16(8);
 	return a;
 }
+
+static unsigned find_run(__le32 *bmp, unsigned *idx)
+{
+	unsigned len;
+	while (tstbits(bmp, *idx, 1)) {
+		(*idx)++;
+		if (unlikely(*idx >= 0x4000))
+			return 0;
+	}
+	len = 1;
+	while (!tstbits(bmp, *idx + len, 1))
+		len++;
+	return len;
+}
+
+static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result)
+{
+	int err;
+	secno end;
+	if (fatal_signal_pending(current))
+		return -EINTR;
+	end = start + len;
+	if (start < limit_start)
+		start = limit_start;
+	if (end > limit_end)
+		end = limit_end;
+	if (start >= end)
+		return 0;
+	if (end - start < minlen)
+		return 0;
+	err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0);
+	if (err)
+		return err;
+	*result += end - start;
+	return 0;
+}
+
+int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result)
+{
+	int err = 0;
+	struct hpfs_sb_info *sbi = hpfs_sb(s);
+	unsigned idx, len, start_bmp, end_bmp;
+	__le32 *bmp;
+	struct quad_buffer_head qbh;
+
+	*result = 0;
+	if (!end || end > sbi->sb_fs_size)
+		end = sbi->sb_fs_size;
+	if (start >= sbi->sb_fs_size)
+		return 0;
+	if (minlen > 0x4000)
+		return 0;
+	if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) {
+		hpfs_lock(s);
+		if (s->s_flags & MS_RDONLY) {
+			err = -EROFS;
+			goto unlock_1;
+		}
+		if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
+			err = -EIO;
+			goto unlock_1;
+		}
+		idx = 0;
+		while ((len = find_run(bmp, &idx)) && !err) {
+			err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result);
+			idx += len;
+		}
+		hpfs_brelse4(&qbh);
+unlock_1:
+		hpfs_unlock(s);
+	}
+	start_bmp = start >> 14;
+	end_bmp = (end + 0x3fff) >> 14;
+	while (start_bmp < end_bmp && !err) {
+		hpfs_lock(s);
+		if (s->s_flags & MS_RDONLY) {
+			err = -EROFS;
+			goto unlock_2;
+		}
+		if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) {
+			err = -EIO;
+			goto unlock_2;
+		}
+		idx = 0;
+		while ((len = find_run(bmp, &idx)) && !err) {
+			err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result);
+			idx += len;
+		}
+		hpfs_brelse4(&qbh);
+unlock_2:
+		hpfs_unlock(s);
+		start_bmp++;
+	}
+	return err;
+}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 2a8e074..dc540bf 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -327,4 +327,5 @@
 	.iterate	= hpfs_readdir,
 	.release	= hpfs_dir_release,
 	.fsync		= hpfs_file_fsync,
+	.unlocked_ioctl	= hpfs_ioctl,
 };
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 6d8cfe9..7ca28d6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -203,6 +203,7 @@
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,
 	.splice_read	= generic_file_splice_read,
+	.unlocked_ioctl	= hpfs_ioctl,
 };
 
 const struct inode_operations hpfs_file_iops =
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index bb04b58..c4867b5 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,6 +18,8 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
 #include <asm/unaligned.h>
 
 #include "hpfs.h"
@@ -200,6 +202,7 @@
 struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
 struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
 struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
+int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *);
 
 /* anode.c */
 
@@ -318,6 +321,7 @@
 void hpfs_error(struct super_block *, const char *, ...);
 int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
 unsigned hpfs_get_free_dnodes(struct super_block *);
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 /*
  * local time (HPFS) to GMT (Unix)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 7cd00d3..68a9bed 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -52,17 +52,20 @@
 }
 
 /* Filesystem error... */
-static char err_buf[1024];
-
 void hpfs_error(struct super_block *s, const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
-	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err("filesystem error: %pV", &vaf);
+
 	va_end(args);
 
-	pr_err("filesystem error: %s", err_buf);
 	if (!hpfs_sb(s)->sb_was_error) {
 		if (hpfs_sb(s)->sb_err == 2) {
 			pr_cont("; crashing the system because you wanted it\n");
@@ -196,12 +199,39 @@
 	return 0;
 }
 
+
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	switch (cmd) {
+		case FITRIM: {
+			struct fstrim_range range;
+			secno n_trimmed;
+			int r;
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
+			if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
+				return -EFAULT;
+			r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed);
+			if (r)
+				return r;
+			range.len = (u64)n_trimmed << 9;
+			if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range)))
+				return -EFAULT;
+			return 0;
+		}
+		default: {
+			return -ENOIOCTLCMD;
+		}
+	}
+}
+
+
 static struct kmem_cache * hpfs_inode_cachep;
 
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
 	struct hpfs_inode_info *ei;
-	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
+	ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
@@ -424,11 +454,14 @@
 	int o;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	char *new_opts = kstrdup(data, GFP_KERNEL);
-	
+
+	if (!new_opts)
+		return -ENOMEM;
+
 	sync_filesystem(s);
 
 	*flags |= MS_NOATIME;
-	
+
 	hpfs_lock(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 93a1232..8db8b7d 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -180,9 +180,6 @@
 	case JFS_IOC_SETFLAGS32:
 		cmd = JFS_IOC_SETFLAGS;
 		break;
-	case FITRIM:
-		cmd = FITRIM;
-		break;
 	}
 	return jfs_ioctl(filp, cmd, arg);
 }
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9a20e51..aba4381 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1369,7 +1369,6 @@
 	case NILFS_IOCTL_SYNC:
 	case NILFS_IOCTL_RESIZE:
 	case NILFS_IOCTL_SET_ALLOC_RANGE:
-	case FITRIM:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 53e6c40..3cb097c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -980,7 +980,6 @@
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
-	case FITRIM:
 		break;
 	case OCFS2_IOC_REFLINK:
 		if (copy_from_user(&args, argp, sizeof(args)))
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c471dfc..d2445fa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -58,6 +58,19 @@
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+/**
+ * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with
+ * the PCI-defined class-code information
+ *
+ * @_cls : the class, subclass, prog-if triple for this device
+ * @_msk : the class mask for this device
+ *
+ * This macro is used to create a struct acpi_device_id that matches a
+ * specific PCI class. The .id and .driver_data fields will be left
+ * initialized with the default value.
+ */
+#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (_cls), .cls_msk = (_msk),
+
 static inline bool has_acpi_companion(struct device *dev)
 {
 	return is_acpi_node(dev->fwnode);
@@ -309,9 +322,6 @@
 
 int acpi_resources_are_enforced(void);
 
-int acpi_reserve_region(u64 start, unsigned int length, u8 space_id,
-			unsigned long flags, char *desc);
-
 #ifdef CONFIG_HIBERNATION
 void __init acpi_no_s4_hw_signature(void);
 #endif
@@ -446,6 +456,7 @@
 #define ACPI_COMPANION(dev)		(NULL)
 #define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
 #define ACPI_HANDLE(dev)		(NULL)
+#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (0), .cls_msk = (0),
 
 struct fwnode_handle;
 
@@ -507,13 +518,6 @@
 	return 0;
 }
 
-static inline int acpi_reserve_region(u64 start, unsigned int length,
-				      u8 space_id, unsigned long flags,
-				      char *desc)
-{
-	return -ENXIO;
-}
-
 struct acpi_table_header;
 static inline int acpi_table_parse(char *id,
 				int (*handler)(struct acpi_table_header *))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 73b4522..e6797de 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -317,6 +317,13 @@
 	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
 }
 
+
+static inline struct buffer_head *
+sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp)
+{
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp);
+}
+
 static inline struct buffer_head *
 sb_find_get_block(struct super_block *sb, sector_t block)
 {
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index e154994..3775327 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -8,6 +8,7 @@
 #include <linux/radix-tree.h>
 #include <linux/uio.h>
 #include <linux/workqueue.h>
+#include <net/net_namespace.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/buffer.h>
@@ -56,6 +57,7 @@
 	struct ceph_entity_addr my_enc_addr;
 
 	atomic_t stopping;
+	possible_net_t net;
 	bool nocrc;
 	bool tcp_nodelay;
 
@@ -267,6 +269,7 @@
 			u64 required_features,
 			bool nocrc,
 			bool tcp_nodelay);
+extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
 			const struct ceph_connection_operations *ops,
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 8183d66..34f25b7 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -189,6 +189,8 @@
 struct acpi_device_id {
 	__u8 id[ACPI_ID_LEN];
 	kernel_ulong_t driver_data;
+	__u32 cls;
+	__u32 cls_msk;
 };
 
 #define PNP_ID_LEN	8
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 09c6564..e85bdfd 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1021,8 +1021,7 @@
 	 * for strings that are too long, we should not have created
 	 * any.
 	 */
-	if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) {
-		WARN_ON(1);
+	if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
 		send_sig(SIGKILL, current, 0);
 		return -1;
 	}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e965cfa..d3dae34 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4358,14 +4358,6 @@
 	rcu_read_unlock();
 }
 
-static void rb_free_rcu(struct rcu_head *rcu_head)
-{
-	struct ring_buffer *rb;
-
-	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
-	rb_free(rb);
-}
-
 struct ring_buffer *ring_buffer_get(struct perf_event *event)
 {
 	struct ring_buffer *rb;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2deb24c..2bbad9c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,6 +11,7 @@
 struct ring_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
+	struct irq_work			irq_work;
 #ifdef CONFIG_PERF_USE_VMALLOC
 	struct work_struct		work;
 	int				page_order;	/* allocation order  */
@@ -55,6 +56,15 @@
 };
 
 extern void rb_free(struct ring_buffer *rb);
+
+static inline void rb_free_rcu(struct rcu_head *rcu_head)
+{
+	struct ring_buffer *rb;
+
+	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
+	rb_free(rb);
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 9647282..b2be01b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,6 +221,8 @@
 	rcu_read_unlock();
 }
 
+static void rb_irq_work(struct irq_work *work);
+
 static void
 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 {
@@ -241,6 +243,16 @@
 
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
+	init_irq_work(&rb->irq_work, rb_irq_work);
+}
+
+static void ring_buffer_put_async(struct ring_buffer *rb)
+{
+	if (!atomic_dec_and_test(&rb->refcount))
+		return;
+
+	rb->rcu_head.next = (void *)rb;
+	irq_work_queue(&rb->irq_work);
 }
 
 /*
@@ -319,7 +331,7 @@
 	rb_free_aux(rb);
 
 err:
-	ring_buffer_put(rb);
+	ring_buffer_put_async(rb);
 	handle->event = NULL;
 
 	return NULL;
@@ -370,7 +382,7 @@
 
 	local_set(&rb->aux_nest, 0);
 	rb_free_aux(rb);
-	ring_buffer_put(rb);
+	ring_buffer_put_async(rb);
 }
 
 /*
@@ -557,7 +569,18 @@
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))
+		irq_work_queue(&rb->irq_work);
+}
+
+static void rb_irq_work(struct irq_work *work)
+{
+	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
+
+	if (!atomic_read(&rb->aux_refcount))
 		__rb_free_aux(rb);
+
+	if (rb->rcu_head.next == (void *)rb)
+		call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/module.c b/kernel/module.c
index 3e0e197..4d2b82e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3557,6 +3557,7 @@
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	mod_tree_remove(mod);
 	wake_up_all(&module_wq);
 	/* Wait for RCU-sched synchronizing before releasing mod->list. */
 	synchronize_sched();
diff --git a/mm/memory.c b/mm/memory.c
index a84fbb7..388dcf9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2670,6 +2670,10 @@
 
 	pte_unmap(page_table);
 
+	/* File mapping without ->vm_ops ? */
+	if (vma->vm_flags & VM_SHARED)
+		return VM_FAULT_SIGBUS;
+
 	/* Check if we need to add a guard page to the stack */
 	if (check_stack_guard_page(vma, address) < 0)
 		return VM_FAULT_SIGSEGV;
@@ -3099,6 +3103,9 @@
 			- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
 	pte_unmap(page_table);
+	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+	if (!vma->vm_ops->fault)
+		return VM_FAULT_SIGBUS;
 	if (!(flags & FAULT_FLAG_WRITE))
 		return do_read_fault(mm, vma, address, pmd, pgoff, flags,
 				orig_pte);
@@ -3244,13 +3251,12 @@
 	barrier();
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
-			if (vma->vm_ops) {
-				if (likely(vma->vm_ops->fault))
-					return do_fault(mm, vma, address, pte,
-							pmd, flags, entry);
-			}
-			return do_anonymous_page(mm, vma, address,
-						 pte, pmd, flags);
+			if (vma->vm_ops)
+				return do_fault(mm, vma, address, pte, pmd,
+						flags, entry);
+
+			return do_anonymous_page(mm, vma, address, pte, pmd,
+					flags);
 		}
 		return do_swap_page(mm, vma, address,
 					pte, pmd, flags, entry);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index cb7db32..f30329f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -9,6 +9,7 @@
 #include <keys/ceph-type.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <linux/nsproxy.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
@@ -16,8 +17,6 @@
 #include <linux/statfs.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
 
 
 #include <linux/ceph/ceph_features.h>
@@ -131,6 +130,13 @@
 	int i;
 	int ret;
 
+	/*
+	 * Don't bother comparing options if network namespaces don't
+	 * match.
+	 */
+	if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net)))
+		return -1;
+
 	ret = memcmp(opt1, opt2, ofs);
 	if (ret)
 		return ret;
@@ -335,9 +341,6 @@
 	int err = -ENOMEM;
 	substring_t argstr[MAX_OPT_ARGS];
 
-	if (current->nsproxy->net_ns != &init_net)
-		return ERR_PTR(-EINVAL);
-
 	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
 	if (!opt)
 		return ERR_PTR(-ENOMEM);
@@ -608,6 +611,7 @@
 fail_monc:
 	ceph_monc_stop(&client->monc);
 fail:
+	ceph_messenger_fini(&client->msgr);
 	kfree(client);
 	return ERR_PTR(err);
 }
@@ -621,8 +625,8 @@
 
 	/* unmount */
 	ceph_osdc_stop(&client->osdc);
-
 	ceph_monc_stop(&client->monc);
+	ceph_messenger_fini(&client->msgr);
 
 	ceph_debugfs_client_cleanup(client);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1679f47..e3be1d2 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -6,6 +6,7 @@
 #include <linux/inet.h>
 #include <linux/kthread.h>
 #include <linux/net.h>
+#include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -479,7 +480,7 @@
 	int ret;
 
 	BUG_ON(con->sock);
-	ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family,
+	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		return ret;
@@ -1731,17 +1732,17 @@
 
 static bool addr_is_blank(struct sockaddr_storage *ss)
 {
+	struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
+	struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+
 	switch (ss->ss_family) {
 	case AF_INET:
-		return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+		return addr->s_addr == htonl(INADDR_ANY);
 	case AF_INET6:
-		return
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+		return ipv6_addr_any(addr6);
+	default:
+		return true;
 	}
-	return false;
 }
 
 static int addr_port(struct sockaddr_storage *ss)
@@ -2944,11 +2945,18 @@
 	msgr->tcp_nodelay = tcp_nodelay;
 
 	atomic_set(&msgr->stopping, 0);
+	write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
 
 	dout("%s %p\n", __func__, msgr);
 }
 EXPORT_SYMBOL(ceph_messenger_init);
 
+void ceph_messenger_fini(struct ceph_messenger *msgr)
+{
+	put_net(read_pnet(&msgr->net));
+}
+EXPORT_SYMBOL(ceph_messenger_fini);
+
 static void clear_standby(struct ceph_connection *con)
 {
 	/* come back from STANDBY? */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index eff7de1..e70fcd1 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -63,6 +63,8 @@
 
 	DEVID(acpi_device_id);
 	DEVID_FIELD(acpi_device_id, id);
+	DEVID_FIELD(acpi_device_id, cls);
+	DEVID_FIELD(acpi_device_id, cls_msk);
 
 	DEVID(pnp_device_id);
 	DEVID_FIELD(pnp_device_id, id);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 84c86f3..5f20882 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -523,12 +523,40 @@
 }
 ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry);
 
-/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */
+/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or
+ *             "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if)
+ *
+ * NOTE: Each driver should use one of the following : _HID, _CIDs
+ *       or _CLS. Also, bb, ss, and pp can be substituted with ??
+ *       as don't care byte.
+ */
 static int do_acpi_entry(const char *filename,
 			void *symval, char *alias)
 {
 	DEF_FIELD_ADDR(symval, acpi_device_id, id);
-	sprintf(alias, "acpi*:%s:*", *id);
+	DEF_FIELD_ADDR(symval, acpi_device_id, cls);
+	DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk);
+
+	if (id && strlen((const char *)*id))
+		sprintf(alias, "acpi*:%s:*", *id);
+	else if (cls) {
+		int i, byte_shift, cnt = 0;
+		unsigned int msk;
+
+		sprintf(&alias[cnt], "acpi*:");
+		cnt = 6;
+		for (i = 1; i <= 3; i++) {
+			byte_shift = 8 * (3-i);
+			msk = (*cls_msk >> byte_shift) & 0xFF;
+			if (msk)
+				sprintf(&alias[cnt], "%02x",
+					(*cls >> byte_shift) & 0xFF);
+			else
+				sprintf(&alias[cnt], "??");
+			cnt += 2;
+		}
+		sprintf(&alias[cnt], ":*");
+	}
 	return 1;
 }
 ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry);
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 91ee1b2..12d3db3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -886,7 +886,8 @@
 #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
 		".kprobes.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
-		".fixup", ".entry.text", ".exception.text", ".text.*"
+		".fixup", ".entry.text", ".exception.text", ".text.*", \
+		".coldtext"
 
 #define INIT_SECTIONS      ".init.*"
 #define MEM_INIT_SECTIONS  ".meminit.*"
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index f0e7267..9098083 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -41,4 +41,62 @@
 
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
+#include <linux/types.h>
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)res, (const void *)p, size);
+		barrier();
+	}
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		barrier();
+	}
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering.  One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define WRITE_ONCE(x, val) \
+	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
 #endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
deleted file mode 100644
index d07e586..0000000
--- a/tools/include/linux/export.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _TOOLS_LINUX_EXPORT_H_
-#define _TOOLS_LINUX_EXPORT_H_
-
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
-
-#endif
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
new file mode 100644
index 0000000..1125822
--- /dev/null
+++ b/tools/include/linux/rbtree.h
@@ -0,0 +1,104 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  See Documentation/rbtree.txt for documentation and samples.
+*/
+
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+struct rb_node {
+	unsigned long  __rb_parent_color;
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)  ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
+#define RB_EMPTY_NODE(node)  \
+	((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node)  \
+	((node)->__rb_parent_color = (unsigned long)(node))
+
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+				struct rb_node **rb_link)
+{
+	node->__rb_parent_color = (unsigned long)parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#define rb_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+	})
+
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+	rb_erase(n, root);
+	RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
new file mode 100644
index 0000000..43be941
--- /dev/null
+++ b/tools/include/linux/rbtree_augmented.h
@@ -0,0 +1,245 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  tools/linux/include/linux/rbtree_augmented.h
+
+  Copied from:
+  linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H
+#define _TOOLS_LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+	void (*propagate)(struct rb_node *node, struct rb_node *stop);
+	void (*copy)(struct rb_node *old, struct rb_node *new);
+	void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+		    const struct rb_augment_callbacks *augment)
+{
+	__rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
+			     rbtype, rbaugmented, rbcompute)		\
+static inline void							\
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+{									\
+	while (rb != stop) {						\
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
+		rbtype augmented = rbcompute(node);			\
+		if (node->rbaugmented == augmented)			\
+			break;						\
+		node->rbaugmented = augmented;				\
+		rb = rb_parent(&node->rbfield);				\
+	}								\
+}									\
+static inline void							\
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+}									\
+static void								\
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+	old->rbaugmented = rbcompute(old);				\
+}									\
+rbstatic const struct rb_augment_callbacks rbname = {			\
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	\
+};
+
+
+#define	RB_RED		0
+#define	RB_BLACK	1
+
+#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc)     ((pc) & 1)
+#define __rb_is_black(pc)  __rb_color(pc)
+#define __rb_is_red(pc)    (!__rb_color(pc))
+#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+				       struct rb_node *p, int color)
+{
+	rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+		  struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else
+		root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		     const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+	struct rb_node *parent, *rebalance;
+	unsigned long pc;
+
+	if (!tmp) {
+		/*
+		 * Case 1: node to erase has no more than 1 child (easy!)
+		 *
+		 * Note that if there is one child it must be red due to 5)
+		 * and node must be black due to 4). We adjust colors locally
+		 * so as to bypass __rb_erase_color() later on.
+		 */
+		pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, child, parent, root);
+		if (child) {
+			child->__rb_parent_color = pc;
+			rebalance = NULL;
+		} else
+			rebalance = __rb_is_black(pc) ? parent : NULL;
+		tmp = parent;
+	} else if (!child) {
+		/* Still case 1, but this time the child is node->rb_left */
+		tmp->__rb_parent_color = pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, tmp, parent, root);
+		rebalance = NULL;
+		tmp = parent;
+	} else {
+		struct rb_node *successor = child, *child2;
+		tmp = child->rb_left;
+		if (!tmp) {
+			/*
+			 * Case 2: node's successor is its right child
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (s)  ->  (x) (c)
+			 *        \
+			 *        (c)
+			 */
+			parent = successor;
+			child2 = successor->rb_right;
+			augment->copy(node, successor);
+		} else {
+			/*
+			 * Case 3: node's successor is leftmost under
+			 * node's right child subtree
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (y)  ->  (x) (y)
+			 *      /            /
+			 *    (p)          (p)
+			 *    /            /
+			 *  (s)          (c)
+			 *    \
+			 *    (c)
+			 */
+			do {
+				parent = successor;
+				successor = tmp;
+				tmp = tmp->rb_left;
+			} while (tmp);
+			parent->rb_left = child2 = successor->rb_right;
+			successor->rb_right = child;
+			rb_set_parent(child, successor);
+			augment->copy(node, successor);
+			augment->propagate(parent, successor);
+		}
+
+		successor->rb_left = tmp = node->rb_left;
+		rb_set_parent(tmp, successor);
+
+		pc = node->__rb_parent_color;
+		tmp = __rb_parent(pc);
+		__rb_change_child(node, successor, tmp, root);
+		if (child2) {
+			successor->__rb_parent_color = pc;
+			rb_set_parent_color(child2, parent, RB_BLACK);
+			rebalance = NULL;
+		} else {
+			unsigned long pc2 = successor->__rb_parent_color;
+			successor->__rb_parent_color = pc;
+			rebalance = __rb_is_black(pc2) ? parent : NULL;
+		}
+		tmp = successor;
+	}
+
+	augment->propagate(tmp, NULL);
+	return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+	if (rebalance)
+		__rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif	/* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
new file mode 100644
index 0000000..17c2b59
--- /dev/null
+++ b/tools/lib/rbtree.c
@@ -0,0 +1,548 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * red-black trees properties:  http://en.wikipedia.org/wiki/Rbtree
+ *
+ *  1) A node is either red or black
+ *  2) The root is black
+ *  3) All leaves (NULL) are black
+ *  4) Both children of every red node are black
+ *  5) Every simple path from root to leaves contains the same number
+ *     of black nodes.
+ *
+ *  4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ *  consecutive red nodes in a path and every red node is therefore followed by
+ *  a black. So if B is the number of black nodes on every simple path (as per
+ *  5), then the longest possible path due to 4 is 2B.
+ *
+ *  We shall indicate color with case, where black nodes are uppercase and red
+ *  nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ *  parentheses and have some accompanying text comment.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+	rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+	return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+			struct rb_root *root, int color)
+{
+	struct rb_node *parent = rb_parent(old);
+	new->__rb_parent_color = old->__rb_parent_color;
+	rb_set_parent_color(old, new, color);
+	__rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+	    void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+	while (true) {
+		/*
+		 * Loop invariant: node is red
+		 *
+		 * If there is a black parent, we are done.
+		 * Otherwise, take some corrective action as we don't
+		 * want a red root or two consecutive red nodes.
+		 */
+		if (!parent) {
+			rb_set_parent_color(node, NULL, RB_BLACK);
+			break;
+		} else if (rb_is_black(parent))
+			break;
+
+		gparent = rb_red_parent(parent);
+
+		tmp = gparent->rb_right;
+		if (parent != tmp) {	/* parent == gparent->rb_left */
+			if (tmp && rb_is_red(tmp)) {
+				/*
+				 * Case 1 - color flips
+				 *
+				 *       G            g
+				 *      / \          / \
+				 *     p   u  -->   P   U
+				 *    /            /
+				 *   n            n
+				 *
+				 * However, since g's parent might be red, and
+				 * 4) does not allow this, we need to recurse
+				 * at g.
+				 */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_right;
+			if (node == tmp) {
+				/*
+				 * Case 2 - left rotate at parent
+				 *
+				 *      G             G
+				 *     / \           / \
+				 *    p   U  -->    n   U
+				 *     \           /
+				 *      n         p
+				 *
+				 * This still leaves us in violation of 4), the
+				 * continuation into Case 3 will fix that.
+				 */
+				parent->rb_right = tmp = node->rb_left;
+				node->rb_left = parent;
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_right;
+			}
+
+			/*
+			 * Case 3 - right rotate at gparent
+			 *
+			 *        G           P
+			 *       / \         / \
+			 *      p   U  -->  n   g
+			 *     /                 \
+			 *    n                   U
+			 */
+			gparent->rb_left = tmp;  /* == parent->rb_right */
+			parent->rb_right = gparent;
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		} else {
+			tmp = gparent->rb_left;
+			if (tmp && rb_is_red(tmp)) {
+				/* Case 1 - color flips */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_left;
+			if (node == tmp) {
+				/* Case 2 - right rotate at parent */
+				parent->rb_left = tmp = node->rb_right;
+				node->rb_right = parent;
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_left;
+			}
+
+			/* Case 3 - left rotate at gparent */
+			gparent->rb_right = tmp;  /* == parent->rb_left */
+			parent->rb_left = gparent;
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		}
+	}
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+	while (true) {
+		/*
+		 * Loop invariants:
+		 * - node is black (or NULL on first iteration)
+		 * - node is not the root (parent is not NULL)
+		 * - All leaf paths going through parent and node have a
+		 *   black node count that is 1 lower than other leaf paths.
+		 */
+		sibling = parent->rb_right;
+		if (node != sibling) {	/* node == parent->rb_left */
+			if (rb_is_red(sibling)) {
+				/*
+				 * Case 1 - left rotate at parent
+				 *
+				 *     P               S
+				 *    / \             / \
+				 *   N   s    -->    p   Sr
+				 *      / \         / \
+				 *     Sl  Sr      N   Sl
+				 */
+				parent->rb_right = tmp1 = sibling->rb_left;
+				sibling->rb_left = parent;
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_right;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_left;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/*
+					 * Case 2 - sibling color flip
+					 * (p could be either color here)
+					 *
+					 *    (p)           (p)
+					 *    / \           / \
+					 *   N   S    -->  N   s
+					 *      / \           / \
+					 *     Sl  Sr        Sl  Sr
+					 *
+					 * This leaves us violating 5) which
+					 * can be fixed by flipping p to black
+					 * if it was red, or by recursing at p.
+					 * p is red when coming from Case 1.
+					 */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/*
+				 * Case 3 - right rotate at sibling
+				 * (p could be either color here)
+				 *
+				 *   (p)           (p)
+				 *   / \           / \
+				 *  N   S    -->  N   Sl
+				 *     / \             \
+				 *    sl  Sr            s
+				 *                       \
+				 *                        Sr
+				 */
+				sibling->rb_left = tmp1 = tmp2->rb_right;
+				tmp2->rb_right = sibling;
+				parent->rb_right = tmp2;
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/*
+			 * Case 4 - left rotate at parent + color flips
+			 * (p and sl could be either color here.
+			 *  After rotation, p becomes black, s acquires
+			 *  p's color, and sl keeps its color)
+			 *
+			 *      (p)             (s)
+			 *      / \             / \
+			 *     N   S     -->   P   Sr
+			 *        / \         / \
+			 *      (sl) sr      N  (sl)
+			 */
+			parent->rb_right = tmp2 = sibling->rb_left;
+			sibling->rb_left = parent;
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		} else {
+			sibling = parent->rb_left;
+			if (rb_is_red(sibling)) {
+				/* Case 1 - right rotate at parent */
+				parent->rb_left = tmp1 = sibling->rb_right;
+				sibling->rb_right = parent;
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_left;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_right;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/* Case 2 - sibling color flip */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/* Case 3 - right rotate at sibling */
+				sibling->rb_right = tmp1 = tmp2->rb_left;
+				tmp2->rb_left = sibling;
+				parent->rb_left = tmp2;
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/* Case 4 - left rotate at parent + color flips */
+			parent->rb_left = tmp2 = sibling->rb_right;
+			sibling->rb_right = parent;
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		}
+	}
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+	dummy_propagate, dummy_copy, dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	__rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *rebalance;
+	rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+	if (rebalance)
+		____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	__rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a right-hand child, go down and then left as far
+	 * as we can.
+	 */
+	if (node->rb_right) {
+		node = node->rb_right;
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No right-hand children. Everything down and left is smaller than us,
+	 * so any 'next' node must be in the general direction of our parent.
+	 * Go up the tree; any time the ancestor is a right-hand child of its
+	 * parent, keep going up. First time it's a left-hand child of its
+	 * parent, said parent is our 'next' node.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a left-hand child, go down and then right as far
+	 * as we can.
+	 */
+	if (node->rb_left) {
+		node = node->rb_left;
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No left-hand children. Go up till we find an ancestor which
+	 * is a right-hand child of its parent.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	__rb_change_child(victim, new, parent, root);
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+	for (;;) {
+		if (node->rb_left)
+			node = node->rb_left;
+		else if (node->rb_right)
+			node = node->rb_right;
+		else
+			return (struct rb_node *)node;
+	}
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+	const struct rb_node *parent;
+	if (!node)
+		return NULL;
+	parent = rb_parent(node);
+
+	/* If we're sitting on node, we've already seen our children */
+	if (parent && node == parent->rb_left && parent->rb_right) {
+		/* If we are the parent's left node, go to the parent's right
+		 * node then all the way down to the left */
+		return rb_left_deepest_node(parent->rb_right);
+	} else
+		/* Otherwise we are the parent's right node, and the parent
+		 * should be next */
+		return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+	if (!root->rb_node)
+		return NULL;
+
+	return rb_left_deepest_node(root->rb_node);
+}
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index fe50a1b..09dc0aa 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -18,6 +18,7 @@
 tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
 tools/lib/api
+tools/lib/rbtree.c
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
@@ -44,6 +45,8 @@
 tools/include/linux/list.h
 tools/include/linux/log2.h
 tools/include/linux/poison.h
+tools/include/linux/rbtree.h
+tools/include/linux/rbtree_augmented.h
 tools/include/linux/types.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
@@ -51,12 +54,10 @@
 include/asm-generic/bitops/__fls.h
 include/asm-generic/bitops/fls.h
 include/linux/perf_event.h
-include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
 lib/hweight.c
-lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
 arch/*/include/uapi/asm/unistd*.h
@@ -65,7 +66,6 @@
 arch/*/lib/memset*.S
 include/linux/poison.h
 include/linux/hw_breakpoint.h
-include/linux/rbtree_augmented.h
 include/uapi/linux/perf_event.h
 include/uapi/linux/const.h
 include/uapi/linux/swab.h
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 586a59d..601d114 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -139,7 +139,7 @@
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
deleted file mode 100644
index f06d89f..0000000
--- a/tools/perf/util/include/linux/rbtree.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __TOOLS_LINUX_PERF_RBTREE_H
-#define __TOOLS_LINUX_PERF_RBTREE_H
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree.h"
-
-/*
- * Handy for checking that we are not deleting an entry that is
- * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
- * probably should be moved to lib/rbtree.c...
- */
-static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
-{
-	rb_erase(n, root);
-	RB_CLEAR_NODE(n);
-}
-#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h
deleted file mode 100644
index 9d6fcdf..0000000
--- a/tools/perf/util/include/linux/rbtree_augmented.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree_augmented.h"