Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm

Pull second set of ARM updates from Russell King:
 "This is the second set of ARM updates for this merge window.

  Contained within are changes to allow the kernel to boot in hypervisor
  mode on CPUs supporting virtualization, and cache flushing support to
  the point of inner sharable unification, which are used by the
  suspend/resume code to avoid having to do a full cache flush.

  Also included is one fix for VFP code identified by Michael Olbrich."

* 'for-linus' of git://git.linaro.org/people/rmk/linux-arm:
  ARM: vfp: fix saving d16-d31 vfp registers on v6+ kernels
  ARM: 7549/1: HYP: fix boot on some ARM1136 cores
  ARM: 7542/1: mm: fix cache LoUIS API for xscale and feroceon
  ARM: mm: update __v7_setup() to the new LoUIS cache maintenance API
  ARM: kernel: update __cpu_disable to use cache LoUIS maintenance API
  ARM: kernel: update cpu_suspend code to use cache LoUIS operations
  ARM: mm: rename jump labels in v7_flush_dcache_all function
  ARM: mm: implement LoUIS API for cache maintenance ops
  ARM: virt: arch_timers: enable access to physical timers
  ARM: virt: Add CONFIG_ARM_VIRT_EXT option
  ARM: virt: Add boot-time diagnostics
  ARM: virt: Update documentation for hyp mode entry support
  ARM: zImage/virt: hyp mode entry support for the zImage loader
  ARM: virt: allow the kernel to be entered in HYP mode
  ARM: opcodes: add __ERET/__MSR_ELR_HYP instruction encoding
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
index a341d87..0c1f475 100644
--- a/Documentation/arm/Booting
+++ b/Documentation/arm/Booting
@@ -154,13 +154,33 @@
 
 - CPU mode
   All forms of interrupts must be disabled (IRQs and FIQs)
-  The CPU must be in SVC mode.  (A special exception exists for Angel)
+
+  For CPUs which do not include the ARM virtualization extensions, the
+  CPU must be in SVC mode.  (A special exception exists for Angel)
+
+  CPUs which include support for the virtualization extensions can be
+  entered in HYP mode in order to enable the kernel to make full use of
+  these extensions.  This is the recommended boot method for such CPUs,
+  unless the virtualisations are already in use by a pre-installed
+  hypervisor.
+
+  If the kernel is not entered in HYP mode for any reason, it must be
+  entered in SVC mode.
 
 - Caches, MMUs
   The MMU must be off.
   Instruction cache may be on or off.
   Data cache must be off.
 
+  If the kernel is entered in HYP mode, the above requirements apply to
+  the HYP mode configuration in addition to the ordinary PL1 (privileged
+  kernel modes) configuration.  In addition, all traps into the
+  hypervisor must be disabled, and PL1 access must be granted for all
+  peripherals and CPU resources for which this is architecturally
+  possible.  Except for entering in HYP mode, the system configuration
+  should be such that a kernel which does not include support for the
+  virtualization extensions can boot correctly without extra help.
+
 - The boot loader is expected to call the kernel image by jumping
   directly to the first instruction of the kernel image.
 
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index d0d441c..f79a08e 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -1,6 +1,7 @@
 ashldi3.S
 font.c
 lib1funcs.S
+hyp-stub.S
 piggy.gzip
 piggy.lzo
 piggy.lzma
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index bb26756..a517153 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -30,6 +30,10 @@
 OBJS		+= string.o
 CFLAGS_string.o	:= -Os
 
+ifeq ($(CONFIG_ARM_VIRT_EXT),y)
+OBJS		+= hyp-stub.o
+endif
+
 #
 # Architecture dependencies
 #
@@ -126,7 +130,7 @@
 endif
 
 ccflags-y := -fpic -fno-builtin -I$(obj)
-asflags-y := -Wa,-march=all
+asflags-y := -Wa,-march=all -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
 KBSS_SZ = $(shell $(CROSS_COMPILE)size $(obj)/../../../../vmlinux | \
@@ -198,3 +202,6 @@
 
 $(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
 	@sed "$(SEDFLAGS)" < $< > $@
+
+$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
+	$(call cmd,shipped)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index bc67cbf..90275f0 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -9,6 +9,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 /*
  * Debugging stuff
@@ -132,7 +133,12 @@
 		.word	start			@ absolute load/run zImage address
 		.word	_edata			@ zImage end address
  THUMB(		.thumb			)
-1:		mov	r7, r1			@ save architecture ID
+1:
+		mrs	r9, cpsr
+#ifdef CONFIG_ARM_VIRT_EXT
+		bl	__hyp_stub_install	@ get into SVC mode, reversibly
+#endif
+		mov	r7, r1			@ save architecture ID
 		mov	r8, r2			@ save atags pointer
 
 #ifndef __ARM_ARCH_2__
@@ -148,9 +154,9 @@
  ARM(		swi	0x123456	)	@ angel_SWI_ARM
  THUMB(		svc	0xab		)	@ angel_SWI_THUMB
 not_angel:
-		mrs	r2, cpsr		@ turn off interrupts to
-		orr	r2, r2, #0xc0		@ prevent angel from running
-		msr	cpsr_c, r2
+		safe_svcmode_maskall r0
+		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
+						@ SPSR
 #else
 		teqp	pc, #0x0c000003		@ turn off interrupts
 #endif
@@ -350,6 +356,20 @@
 		adr	r5, restart
 		bic	r5, r5, #31
 
+/* Relocate the hyp vector base if necessary */
+#ifdef CONFIG_ARM_VIRT_EXT
+		mrs	r0, spsr
+		and	r0, r0, #MODE_MASK
+		cmp	r0, #HYP_MODE
+		bne	1f
+
+		bl	__hyp_get_vectors
+		sub	r0, r0, r5
+		add	r0, r0, r10
+		bl	__hyp_set_vectors
+1:
+#endif
+
 		sub	r9, r6, r5		@ size to copy
 		add	r9, r9, #31		@ rounded up to a multiple
 		bic	r9, r9, #31		@ ... of 32 bytes
@@ -458,11 +478,29 @@
 		bl	decompress_kernel
 		bl	cache_clean_flush
 		bl	cache_off
-		mov	r0, #0			@ must be zero
 		mov	r1, r7			@ restore architecture number
 		mov	r2, r8			@ restore atags pointer
- ARM(		mov	pc, r4	)		@ call kernel
- THUMB(		bx	r4	)		@ entry point is always ARM
+
+#ifdef CONFIG_ARM_VIRT_EXT
+		mrs	r0, spsr		@ Get saved CPU boot mode
+		and	r0, r0, #MODE_MASK
+		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
+		bne	__enter_kernel		@ boot kernel directly
+
+		adr	r12, .L__hyp_reentry_vectors_offset
+		ldr	r0, [r12]
+		add	r0, r0, r12
+
+		bl	__hyp_set_vectors
+		__HVC(0)			@ otherwise bounce to hyp mode
+
+		b	.			@ should never be reached
+
+		.align	2
+.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
+#else
+		b	__enter_kernel
+#endif
 
 		.align	2
 		.type	LC0, #object
@@ -1196,6 +1234,25 @@
 #endif
 
 		.ltorg
+
+#ifdef CONFIG_ARM_VIRT_EXT
+.align 5
+__hyp_reentry_vectors:
+		W(b)	.			@ reset
+		W(b)	.			@ undef
+		W(b)	.			@ svc
+		W(b)	.			@ pabort
+		W(b)	.			@ dabort
+		W(b)	__enter_kernel		@ hyp
+		W(b)	.			@ irq
+		W(b)	.			@ fiq
+#endif /* CONFIG_ARM_VIRT_EXT */
+
+__enter_kernel:
+		mov	r0, #0			@ must be 0
+ ARM(		mov	pc, r4	)		@ call kernel
+ THUMB(		bx	r4	)		@ entry point is always ARM
+
 reloc_code_end:
 
 		.align
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5c8b3bf4..2ef9581 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -22,6 +22,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/domain.h>
+#include <asm/opcodes-virt.h>
 
 #define IOMEM(x)	(x)
 
@@ -240,6 +241,34 @@
 #endif
 
 /*
+ * Helper macro to enter SVC mode cleanly and mask interrupts. reg is
+ * a scratch register for the macro to overwrite.
+ *
+ * This macro is intended for forcing the CPU into SVC mode at boot time.
+ * you cannot return to the original mode.
+ *
+ * Beware, it also clobers LR.
+ */
+.macro safe_svcmode_maskall reg:req
+	mrs	\reg , cpsr
+	mov	lr , \reg
+	and	lr , lr , #MODE_MASK
+	cmp	lr , #HYP_MODE
+	orr	\reg , \reg , #PSR_I_BIT | PSR_F_BIT
+	bic	\reg , \reg , #MODE_MASK
+	orr	\reg , \reg , #SVC_MODE
+THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
+	bne	1f
+	orr	\reg, \reg, #PSR_A_BIT
+	adr	lr, BSYM(2f)
+	msr	spsr_cxsf, \reg
+	__MSR_ELR_HYP(14)
+	__ERET
+1:	msr	cpsr_c, \reg
+2:
+.endm
+
+/*
  * STRT/LDRT access macros with ARM and Thumb-2 variants
  */
 #ifdef CONFIG_THUMB2_KERNEL
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e4448e1..e1489c5 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -49,6 +49,13 @@
  *
  *		Unconditionally clean and invalidate the entire cache.
  *
+ *     flush_kern_louis()
+ *
+ *             Flush data cache levels up to the level of unification
+ *             inner shareable and invalidate the I-cache.
+ *             Only needed from v7 onwards, falls back to flush_cache_all()
+ *             for all other processor versions.
+ *
  *	flush_user_all()
  *
  *		Clean and invalidate all user space cache entries
@@ -97,6 +104,7 @@
 struct cpu_cache_fns {
 	void (*flush_icache_all)(void);
 	void (*flush_kern_all)(void);
+	void (*flush_kern_louis)(void);
 	void (*flush_user_all)(void);
 	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);
 
@@ -119,6 +127,7 @@
 
 #define __cpuc_flush_icache_all		cpu_cache.flush_icache_all
 #define __cpuc_flush_kern_all		cpu_cache.flush_kern_all
+#define __cpuc_flush_kern_louis		cpu_cache.flush_kern_louis
 #define __cpuc_flush_user_all		cpu_cache.flush_user_all
 #define __cpuc_flush_user_range		cpu_cache.flush_user_range
 #define __cpuc_coherent_kern_range	cpu_cache.coherent_kern_range
@@ -139,6 +148,7 @@
 
 extern void __cpuc_flush_icache_all(void);
 extern void __cpuc_flush_kern_all(void);
+extern void __cpuc_flush_kern_louis(void);
 extern void __cpuc_flush_user_all(void);
 extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
 extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);
@@ -204,6 +214,11 @@
 	__flush_icache_preferred();
 }
 
+/*
+ * Flush caches up to Level of Unification Inner Shareable
+ */
+#define flush_cache_louis()		__cpuc_flush_kern_louis()
+
 #define flush_cache_all()		__cpuc_flush_kern_all()
 
 static inline void vivt_flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index 4f8d2c0..cca9f15 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -132,6 +132,7 @@
 #ifndef MULTI_CACHE
 #define __cpuc_flush_icache_all		__glue(_CACHE,_flush_icache_all)
 #define __cpuc_flush_kern_all		__glue(_CACHE,_flush_kern_cache_all)
+#define __cpuc_flush_kern_louis		__glue(_CACHE,_flush_kern_cache_louis)
 #define __cpuc_flush_user_all		__glue(_CACHE,_flush_user_cache_all)
 #define __cpuc_flush_user_range		__glue(_CACHE,_flush_user_cache_range)
 #define __cpuc_coherent_kern_range	__glue(_CACHE,_coherent_kern_range)
diff --git a/arch/arm/include/asm/opcodes-virt.h b/arch/arm/include/asm/opcodes-virt.h
index b85665a..efcfdf9 100644
--- a/arch/arm/include/asm/opcodes-virt.h
+++ b/arch/arm/include/asm/opcodes-virt.h
@@ -26,4 +26,14 @@
 	0xF7E08000 | (((imm16) & 0xF000) << 4) | ((imm16) & 0x0FFF)	\
 )
 
+#define __ERET	__inst_arm_thumb32(					\
+	0xE160006E,							\
+	0xF3DE8F00							\
+)
+
+#define __MSR_ELR_HYP(regnum)	__inst_arm_thumb32(			\
+	0xE12EF300 | regnum,						\
+	0xF3808E30 | (regnum << 16)					\
+)
+
 #endif /* ! __ASM_ARM_OPCODES_VIRT_H */
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 44fe998..142d6ae 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -44,6 +44,7 @@
 #define IRQ_MODE	0x00000012
 #define SVC_MODE	0x00000013
 #define ABT_MODE	0x00000017
+#define HYP_MODE	0x0000001a
 #define UND_MODE	0x0000001b
 #define SYSTEM_MODE	0x0000001f
 #define MODE32_BIT	0x00000010
diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
index a7aadbd..6a6f1e4 100644
--- a/arch/arm/include/asm/vfpmacros.h
+++ b/arch/arm/include/asm/vfpmacros.h
@@ -28,7 +28,7 @@
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
 	tst	\tmp, #HWCAP_VFPv3D16
-	ldceq	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+	ldceql	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
 	addne	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
@@ -52,7 +52,7 @@
 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
 	ldr	\tmp, [\tmp, #0]
 	tst	\tmp, #HWCAP_VFPv3D16
-	stceq	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+	stceql	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
 	addne	\base, \base, #32*4		    @ step over unused register space
 #else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
new file mode 100644
index 0000000..86164df
--- /dev/null
+++ b/arch/arm/include/asm/virt.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2012 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef VIRT_H
+#define VIRT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * Flag indicating that the kernel was not entered in the same mode on every
+ * CPU.  The zImage loader stashes this value in an SPSR, so we need an
+ * architecturally defined flag bit here (the N flag, as it happens)
+ */
+#define BOOT_CPU_MODE_MISMATCH (1<<31)
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_ARM_VIRT_EXT
+/*
+ * __boot_cpu_mode records what mode the primary CPU was booted in.
+ * A correctly-implemented bootloader must start all CPUs in the same mode:
+ * if it fails to do this, the flag BOOT_CPU_MODE_MISMATCH is set to indicate
+ * that some CPU(s) were booted in a different mode.
+ *
+ * This allows the kernel to flag an error when the secondaries have come up.
+ */
+extern int __boot_cpu_mode;
+
+void __hyp_set_vectors(unsigned long phys_vector_base);
+unsigned long __hyp_get_vectors(void);
+#else
+#define __boot_cpu_mode	(SVC_MODE)
+#endif
+
+#ifndef ZIMAGE
+void hyp_mode_check(void);
+
+/* Reports the availability of HYP mode */
+static inline bool is_hyp_mode_available(void)
+{
+	return ((__boot_cpu_mode & MODE_MASK) == HYP_MODE &&
+		!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH));
+}
+
+/* Check if the bootloader has booted CPUs in different modes */
+static inline bool is_hyp_mode_mismatched(void)
+{
+	return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH);
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ! VIRT_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5dfef9d..5bbec7b 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -81,4 +81,6 @@
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
+obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
+
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 9874d07..4eee351 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -83,8 +83,12 @@
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
 
-	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
-						@ and irqs disabled
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install
+#endif
+	@ ensure svc mode and all interrupts masked
+	safe_svcmode_maskall r9
+
 	mrc	p15, 0, r9, c0, c0		@ get processor id
 	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
 	movs	r10, r5				@ invalid processor (r5=0)?
@@ -326,7 +330,11 @@
 	 * the processor type - there is no need to check the machine type
 	 * as it has already been validated by the primary processor.
 	 */
-	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifdef CONFIG_ARM_VIRT_EXT
+	bl	__hyp_stub_install
+#endif
+	safe_svcmode_maskall r9
+
 	mrc	p15, 0, r9, c0, c0		@ get processor id
 	bl	__lookup_processor_type
 	movs	r10, r5				@ invalid processor?
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
new file mode 100644
index 0000000..65b2417
--- /dev/null
+++ b/arch/arm/kernel/hyp-stub.S
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2012 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/virt.h>
+
+#ifndef ZIMAGE
+/*
+ * For the kernel proper, we need to find out the CPU boot mode long after
+ * boot, so we need to store it in a writable variable.
+ *
+ * This is not in .bss, because we set it sufficiently early that the boot-time
+ * zeroing of .bss would clobber it.
+ */
+.data
+ENTRY(__boot_cpu_mode)
+	.long	0
+.text
+
+	/*
+	 * Save the primary CPU boot mode. Requires 3 scratch registers.
+	 */
+	.macro	store_primary_cpu_mode	reg1, reg2, reg3
+	mrs	\reg1, cpsr
+	and	\reg1, \reg1, #MODE_MASK
+	adr	\reg2, .L__boot_cpu_mode_offset
+	ldr	\reg3, [\reg2]
+	str	\reg1, [\reg2, \reg3]
+	.endm
+
+	/*
+	 * Compare the current mode with the one saved on the primary CPU.
+	 * If they don't match, record that fact. The Z bit indicates
+	 * if there's a match or not.
+	 * Requires 3 additionnal scratch registers.
+	 */
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+	adr	\reg2, .L__boot_cpu_mode_offset
+	ldr	\reg3, [\reg2]
+	ldr	\reg1, [\reg2, \reg3]
+	cmp	\mode, \reg1		@ matches primary CPU boot mode?
+	orrne	r7, r7, #BOOT_CPU_MODE_MISMATCH
+	strne	r7, [r5, r6]		@ record what happened and give up
+	.endm
+
+#else	/* ZIMAGE */
+
+	.macro	store_primary_cpu_mode	reg1:req, reg2:req, reg3:req
+	.endm
+
+/*
+ * The zImage loader only runs on one CPU, so we don't bother with mult-CPU
+ * consistency checking:
+ */
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+	cmp	\mode, \mode
+	.endm
+
+#endif /* ZIMAGE */
+
+/*
+ * Hypervisor stub installation functions.
+ *
+ * These must be called with the MMU and D-cache off.
+ * They are not ABI compliant and are only intended to be called from the kernel
+ * entry points in head.S.
+ */
+@ Call this from the primary CPU
+ENTRY(__hyp_stub_install)
+	store_primary_cpu_mode	r4, r5, r6
+ENDPROC(__hyp_stub_install)
+
+	@ fall through...
+
+@ Secondary CPUs should call here
+ENTRY(__hyp_stub_install_secondary)
+	mrs	r4, cpsr
+	and	r4, r4, #MODE_MASK
+
+	/*
+	 * If the secondary has booted with a different mode, give up
+	 * immediately.
+	 */
+	compare_cpu_mode_with_primary	r4, r5, r6, r7
+	bxne	lr
+
+	/*
+	 * Once we have given up on one CPU, we do not try to install the
+	 * stub hypervisor on the remaining ones: because the saved boot mode
+	 * is modified, it can't compare equal to the CPSR mode field any
+	 * more.
+	 *
+	 * Otherwise...
+	 */
+
+	cmp	r4, #HYP_MODE
+	bxne	lr			@ give up if the CPU is not in HYP mode
+
+/*
+ * Configure HSCTLR to set correct exception endianness/instruction set
+ * state etc.
+ * Turn off all traps
+ * Eventually, CPU-specific code might be needed -- assume not for now
+ *
+ * This code relies on the "eret" instruction to synchronize the
+ * various coprocessor accesses.
+ */
+	@ Now install the hypervisor stub:
+	adr	r7, __hyp_stub_vectors
+	mcr	p15, 4, r7, c12, c0, 0	@ set hypervisor vector base (HVBAR)
+
+	@ Disable all traps, so we don't get any nasty surprise
+	mov	r7, #0
+	mcr	p15, 4, r7, c1, c1, 0	@ HCR
+	mcr	p15, 4, r7, c1, c1, 2	@ HCPTR
+	mcr	p15, 4, r7, c1, c1, 3	@ HSTR
+
+THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	orr	r7, #(1 << 9)		@ HSCTLR.EE
+#endif
+	mcr	p15, 4, r7, c1, c0, 0	@ HSCTLR
+
+	mrc	p15, 4, r7, c1, c1, 1	@ HDCR
+	and	r7, #0x1f		@ Preserve HPMN
+	mcr	p15, 4, r7, c1, c1, 1	@ HDCR
+
+#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
+	@ make CNTP_* and CNTPCT accessible from PL1
+	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
+	lsr	r7, #16
+	and	r7, #0xf
+	cmp	r7, #1
+	bne	1f
+	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
+	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+1:
+#endif
+
+	bic	r7, r4, #MODE_MASK
+	orr	r7, r7, #SVC_MODE
+THUMB(	orr	r7, r7, #PSR_T_BIT	)
+	msr	spsr_cxsf, r7		@ This is SPSR_hyp.
+
+	__MSR_ELR_HYP(14)		@ msr elr_hyp, lr
+	__ERET				@ return, switching to SVC mode
+					@ The boot CPU mode is left in r4.
+ENDPROC(__hyp_stub_install_secondary)
+
+__hyp_stub_do_trap:
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	mcrne	p15, 4, r0, c12, c0, 0	@ set HVBAR
+	__ERET
+ENDPROC(__hyp_stub_do_trap)
+
+/*
+ * __hyp_set_vectors: Call this after boot to set the initial hypervisor
+ * vectors as part of hypervisor installation.  On an SMP system, this should
+ * be called on each CPU.
+ *
+ * r0 must be the physical address of the new vector table (which must lie in
+ * the bottom 4GB of physical address space.
+ *
+ * r0 must be 32-byte aligned.
+ *
+ * Before calling this, you must check that the stub hypervisor is installed
+ * everywhere, by waiting for any secondary CPUs to be brought up and then
+ * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true.
+ *
+ * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
+ * something else went wrong... in such cases, trying to install a new
+ * hypervisor is unlikely to work as desired.
+ *
+ * When you call into your shiny new hypervisor, sp_hyp will contain junk,
+ * so you will need to set that to something sensible at the new hypervisor's
+ * initialisation entry point.
+ */
+ENTRY(__hyp_get_vectors)
+	mov	r0, #-1
+ENDPROC(__hyp_get_vectors)
+	@ fall through
+ENTRY(__hyp_set_vectors)
+	__HVC(0)
+	bx	lr
+ENDPROC(__hyp_set_vectors)
+
+#ifndef ZIMAGE
+.align 2
+.L__boot_cpu_mode_offset:
+	.long	__boot_cpu_mode - .
+#endif
+
+.align 5
+__hyp_stub_vectors:
+__hyp_stub_reset:	W(b)	.
+__hyp_stub_und:		W(b)	.
+__hyp_stub_svc:		W(b)	.
+__hyp_stub_pabort:	W(b)	.
+__hyp_stub_dabort:	W(b)	.
+__hyp_stub_trap:	W(b)	__hyp_stub_do_trap
+__hyp_stub_irq:		W(b)	.
+__hyp_stub_fiq:		W(b)	.
+ENDPROC(__hyp_stub_vectors)
+
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index febafa0..da1d1aa 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -53,6 +53,7 @@
 #include <asm/traps.h>
 #include <asm/unwind.h>
 #include <asm/memblock.h>
+#include <asm/virt.h>
 
 #include "atags.h"
 #include "tcm.h"
@@ -703,6 +704,21 @@
 	return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }
 
+void __init hyp_mode_check(void)
+{
+#ifdef CONFIG_ARM_VIRT_EXT
+	if (is_hyp_mode_available()) {
+		pr_info("CPU: All CPU(s) started in HYP mode.\n");
+		pr_info("CPU: Virtualization extensions available.\n");
+	} else if (is_hyp_mode_mismatched()) {
+		pr_warn("CPU: WARNING: CPU(s) started in wrong/inconsistent modes (primary CPU mode 0x%x)\n",
+			__boot_cpu_mode & MODE_MASK);
+		pr_warn("CPU: This may indicate a broken bootloader or firmware.\n");
+	} else
+		pr_info("CPU: All CPU(s) started in SVC mode.\n");
+#endif
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	struct machine_desc *mdesc;
@@ -748,6 +764,10 @@
 		smp_init_cpus();
 	}
 #endif
+
+	if (!is_smp())
+		hyp_mode_check();
+
 	reserve_crashkernel();
 
 	tcm_init();
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index d100eac..8e20754d 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -43,6 +43,7 @@
 #include <asm/ptrace.h>
 #include <asm/localtimer.h>
 #include <asm/smp_plat.h>
+#include <asm/virt.h>
 #include <asm/mach/arch.h>
 
 /*
@@ -202,8 +203,11 @@
 	/*
 	 * Flush user cache and TLB mappings, and then remove this CPU
 	 * from the vm mask set of all processes.
+	 *
+	 * Caches are flushed to the Level of Unification Inner Shareable
+	 * to write-back dirty lines to unified caches shared by all CPUs.
 	 */
-	flush_cache_all();
+	flush_cache_louis();
 	local_flush_tlb_all();
 
 	clear_tasks_mm_cpumask(cpu);
@@ -355,6 +359,8 @@
 	       num_online_cpus(),
 	       bogosum / (500000/HZ),
 	       (bogosum / (5000/HZ)) % 100);
+
+	hyp_mode_check();
 }
 
 void __init smp_prepare_boot_cpu(void)
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 1794cc3..358bca3 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -17,6 +17,8 @@
  */
 void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 {
+	u32 *ctx = ptr;
+
 	*save_ptr = virt_to_phys(ptr);
 
 	/* This must correspond to the LDM in cpu_resume() assembly */
@@ -26,7 +28,20 @@
 
 	cpu_do_suspend(ptr);
 
-	flush_cache_all();
+	flush_cache_louis();
+
+	/*
+	 * flush_cache_louis does not guarantee that
+	 * save_ptr and ptr are cleaned to main memory,
+	 * just up to the Level of Unification Inner Shareable.
+	 * Since the context pointer and context itself
+	 * are to be retrieved with the MMU off that
+	 * data must be cleaned from all cache levels
+	 * to main memory using "area" cache primitives.
+	*/
+	__cpuc_flush_dcache_area(ctx, ptrsz);
+	__cpuc_flush_dcache_area(save_ptr, sizeof(*save_ptr));
+
 	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
 	outer_clean_range(virt_to_phys(save_ptr),
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 101b968..c9a4963 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -624,6 +624,23 @@
 	  Say Y here if you have a CPU with the ThumbEE extension and code to
 	  make use of it. Say N for code that can run on CPUs without ThumbEE.
 
+config ARM_VIRT_EXT
+	bool "Native support for the ARM Virtualization Extensions"
+	depends on MMU && CPU_V7
+	help
+	  Enable the kernel to make use of the ARM Virtualization
+	  Extensions to install hypervisors without run-time firmware
+	  assistance.
+
+	  A compliant bootloader is required in order to make maximum
+	  use of this feature.  Refer to Documentation/arm/Booting for
+	  details.
+
+	  It is safe to enable this option even if the kernel may not be
+	  booted in HYP mode, may not have support for the
+	  virtualization extensions, or may be booted with a
+	  non-compliant bootloader.
+
 config SWP_EMULATE
 	bool "Emulate SWP/SWPB instructions"
 	depends on !CPU_USE_DOMAINS && CPU_V7
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 0720163..e505bef 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -240,6 +240,9 @@
 	mov	pc, lr
 ENDPROC(fa_dma_unmap_area)
 
+	.globl	fa_flush_kern_cache_louis
+	.equ	fa_flush_kern_cache_louis, fa_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
index 52e35f3..8a3fade 100644
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S
@@ -128,6 +128,9 @@
 ENDPROC(v3_dma_unmap_area)
 ENDPROC(v3_dma_map_area)
 
+	.globl	v3_flush_kern_cache_louis
+	.equ	v3_flush_kern_cache_louis, v3_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 022135d..43e5d77 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -140,6 +140,9 @@
 ENDPROC(v4_dma_unmap_area)
 ENDPROC(v4_dma_map_area)
 
+	.globl	v4_flush_kern_cache_louis
+	.equ	v4_flush_kern_cache_louis, v4_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index 8f1eeae..cd49453 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -251,6 +251,9 @@
 	mov	pc, lr
 ENDPROC(v4wb_dma_unmap_area)
 
+	.globl	v4wb_flush_kern_cache_louis
+	.equ	v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index b34a5f9..11e5e58 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -196,6 +196,9 @@
 ENDPROC(v4wt_dma_unmap_area)
 ENDPROC(v4wt_dma_map_area)
 
+	.globl	v4wt_flush_kern_cache_louis
+	.equ	v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 4b10760..d8fd4d4 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -326,6 +326,9 @@
 	mov	pc, lr
 ENDPROC(v6_dma_unmap_area)
 
+	.globl	v6_flush_kern_cache_louis
+	.equ	v6_flush_kern_cache_louis, v6_flush_kern_cache_all
+
 	__INITDATA
 
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 3b17227..cd95664 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,6 +33,24 @@
 	mov	pc, lr
 ENDPROC(v7_flush_icache_all)
 
+ /*
+ *     v7_flush_dcache_louis()
+ *
+ *     Flush the D-cache up to the Level of Unification Inner Shareable
+ *
+ *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ */
+
+ENTRY(v7_flush_dcache_louis)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
+	ands	r3, r0, #0xe00000		@ extract LoUIS from clidr
+	mov	r3, r3, lsr #20			@ r3 = LoUIS * 2
+	moveq	pc, lr				@ return if level == 0
+	mov	r10, #0				@ r10 (starting level) = 0
+	b	flush_levels			@ start flushing cache levels
+ENDPROC(v7_flush_dcache_louis)
+
 /*
  *	v7_flush_dcache_all()
  *
@@ -49,7 +67,7 @@
 	mov	r3, r3, lsr #23			@ left align loc bit field
 	beq	finished			@ if loc is 0, then no need to clean
 	mov	r10, #0				@ start clean at cache level 0
-loop1:
+flush_levels:
 	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
 	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
 	and	r1, r1, #7			@ mask of the bits for current cache only
@@ -71,9 +89,9 @@
 	clz	r5, r4				@ find bit position of way size increment
 	ldr	r7, =0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
-loop2:
+loop1:
 	mov	r9, r4				@ create working copy of max way size
-loop3:
+loop2:
  ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11
  THUMB(	lsl	r6, r9, r5		)
  THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
@@ -82,13 +100,13 @@
  THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
 	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
 	subs	r9, r9, #1			@ decrement the way
-	bge	loop3
-	subs	r7, r7, #1			@ decrement the index
 	bge	loop2
+	subs	r7, r7, #1			@ decrement the index
+	bge	loop1
 skip:
 	add	r10, r10, #2			@ increment cache number
 	cmp	r3, r10
-	bgt	loop1
+	bgt	flush_levels
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
@@ -120,6 +138,24 @@
 	mov	pc, lr
 ENDPROC(v7_flush_kern_cache_all)
 
+ /*
+ *     v7_flush_kern_cache_louis(void)
+ *
+ *     Flush the data cache up to Level of Unification Inner Shareable.
+ *     Invalidate the I-cache to the point of unification.
+ */
+ENTRY(v7_flush_kern_cache_louis)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	v7_flush_dcache_louis
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	mov	pc, lr
+ENDPROC(v7_flush_kern_cache_louis)
+
 /*
  *	v7_flush_cache_all()
  *
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 0650bb8..2bb61e7 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -368,6 +368,9 @@
 	mov	pc, lr
 ENDPROC(arm1020_dma_unmap_area)
 
+	.globl	arm1020_flush_kern_cache_louis
+	.equ	arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm1020
 
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 4188478..8f96aa4 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -354,6 +354,9 @@
 	mov	pc, lr
 ENDPROC(arm1020e_dma_unmap_area)
 
+	.globl	arm1020e_flush_kern_cache_louis
+	.equ	arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm1020e
 
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 33c6882..8ebe4a4 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -343,6 +343,9 @@
 	mov	pc, lr
 ENDPROC(arm1022_dma_unmap_area)
 
+	.globl	arm1022_flush_kern_cache_louis
+	.equ	arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm1022
 
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index fbc1d5f..093fc7e 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -337,6 +337,9 @@
 	mov	pc, lr
 ENDPROC(arm1026_dma_unmap_area)
 
+	.globl	arm1026_flush_kern_cache_louis
+	.equ	arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm1026
 
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 1a8c138..2c3b942 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -319,6 +319,9 @@
 	mov	pc, lr
 ENDPROC(arm920_dma_unmap_area)
 
+	.globl	arm920_flush_kern_cache_louis
+	.equ	arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm920
 #endif
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 4c44d7e..4464c49 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -321,6 +321,9 @@
 	mov	pc, lr
 ENDPROC(arm922_dma_unmap_area)
 
+	.globl	arm922_flush_kern_cache_louis
+	.equ	arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm922
 #endif
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ec5b118..281eb9b 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -376,6 +376,9 @@
 	mov	pc, lr
 ENDPROC(arm925_dma_unmap_area)
 
+	.globl	arm925_flush_kern_cache_louis
+	.equ	arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm925
 
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index c31e62c..f1803f7 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -339,6 +339,9 @@
 	mov	pc, lr
 ENDPROC(arm926_dma_unmap_area)
 
+	.globl	arm926_flush_kern_cache_louis
+	.equ	arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm926
 
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index a613a7d..8da189d 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -267,6 +267,9 @@
 	mov	pc, lr
 ENDPROC(arm940_dma_unmap_area)
 
+	.globl	arm940_flush_kern_cache_louis
+	.equ	arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm940
 
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 9f4f299..f666cf3 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -310,6 +310,9 @@
 	mov	pc, lr
 ENDPROC(arm946_dma_unmap_area)
 
+	.globl	arm946_flush_kern_cache_louis
+	.equ	arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions arm946
 
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 23a8e4c..4106b09 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -415,6 +415,9 @@
 	mov	pc, lr
 ENDPROC(feroceon_dma_unmap_area)
 
+	.globl	feroceon_flush_kern_cache_louis
+	.equ	feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions feroceon
 
@@ -431,6 +434,7 @@
 	range_alias flush_icache_all
 	range_alias flush_user_cache_all
 	range_alias flush_kern_cache_all
+	range_alias flush_kern_cache_louis
 	range_alias flush_user_cache_range
 	range_alias coherent_kern_range
 	range_alias coherent_user_range
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 2d8ff3a..b29a226 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -299,6 +299,7 @@
 ENTRY(\name\()_cache_fns)
 	.long	\name\()_flush_icache_all
 	.long	\name\()_flush_kern_cache_all
+	.long   \name\()_flush_kern_cache_louis
 	.long	\name\()_flush_user_cache_all
 	.long	\name\()_flush_user_cache_range
 	.long	\name\()_coherent_kern_range
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index fbb2124..82f9cdc 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -303,6 +303,9 @@
 	mov	pc, lr
 ENDPROC(mohawk_dma_unmap_area)
 
+	.globl	mohawk_flush_kern_cache_louis
+	.equ	mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions mohawk
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index c2e2b66..846d279 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -172,7 +172,7 @@
 __v7_setup:
 	adr	r12, __v7_setup_stack		@ the local stack
 	stmia	r12, {r0-r5, r7, r9, r11, lr}
-	bl	v7_flush_dcache_all
+	bl      v7_flush_dcache_louis
 	ldmia	r12, {r0-r5, r7, r9, r11, lr}
 
 	mrc	p15, 0, r0, c0, c0, 0		@ read main ID register
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index b0d5786..eb93d64 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -337,6 +337,9 @@
 	mov	pc, lr
 ENDPROC(xsc3_dma_unmap_area)
 
+	.globl	xsc3_flush_kern_cache_louis
+	.equ	xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions xsc3
 
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 4ffebaa..2551036 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -410,6 +410,9 @@
 	mov	pc, lr
 ENDPROC(xscale_dma_unmap_area)
 
+	.globl	xscale_flush_kern_cache_louis
+	.equ	xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
+
 	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 	define_cache_functions xscale
 
@@ -439,6 +442,7 @@
 	a0_alias flush_icache_all
 	a0_alias flush_user_cache_all
 	a0_alias flush_kern_cache_all
+	a0_alias flush_kern_cache_louis
 	a0_alias flush_user_cache_range
 	a0_alias coherent_kern_range
 	a0_alias coherent_user_range