Merge tag 'xtensa-for-next-20140815' into for_next

Xtensa improvements for 3.17:
- support highmem on cores with aliasing data cache. Enable highmem on kc705
  by default;
- simplify addition of new core variants (no need to modify Kconfig /
  Makefiles);
- improve robustness of unaligned access handler and its interaction with
  window overflow/underflow exception handlers;
- deprecate atomic and spill registers syscalls;
- clean up Kconfig: remove orphan MATH_EMULATION, sort 'select' statements;
- wire up renameat2 syscall.

Various fixes:
- fix address checks in dma_{alloc,free}_coherent (runtime BUG);
- fix access to THREAD_RA/THREAD_SP/THREAD_DS (debug build breakage);
- fix TLBTEMP_BASE_2 region handling in fast_second_level_miss (runtime
  unrecoverable exception);
- fix a6 and a7 handling in fast_syscall_xtensa (runtime userspace register
  clobbering);
- fix kernel/user jump out of fast_unaligned (potential runtime unrecoverable
  exception);
- replace termios IOCTL code definitions with constants (userspace build
  breakage).

Signed-off-by: Chris Zankel <chris@zankel.net>
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 3a617af..49c6c3d 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -4,24 +4,23 @@
 config XTENSA
 	def_bool y
 	select ARCH_WANT_FRAME_POINTERS
-	select HAVE_IDE
-	select GENERIC_ATOMIC64
-	select GENERIC_CLOCKEVENTS
-	select VIRT_TO_BUS
-	select GENERIC_IRQ_SHOW
-	select GENERIC_SCHED_CLOCK
-	select MODULES_USE_ELF_RELA
-	select GENERIC_PCI_IOMAP
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
-	select IRQ_DOMAIN
-	select HAVE_OPROFILE
+	select COMMON_CLK
+	select GENERIC_ATOMIC64
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_IRQ_SHOW
+	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK
 	select HAVE_FUNCTION_TRACER
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
-	select COMMON_CLK
+	select IRQ_DOMAIN
+	select MODULES_USE_ELF_RELA
+	select VIRT_TO_BUS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
@@ -62,7 +61,9 @@
 	def_bool y
 
 config MMU
-	def_bool n
+	bool
+	default n if !XTENSA_VARIANT_CUSTOM
+	default XTENSA_VARIANT_MMU if XTENSA_VARIANT_CUSTOM
 
 config VARIANT_IRQ_SWITCH
 	def_bool n
@@ -102,8 +103,40 @@
 	select VARIANT_IRQ_SWITCH
 	select ARCH_REQUIRE_GPIOLIB
 	select XTENSA_CALIBRATE_CCOUNT
+
+config XTENSA_VARIANT_CUSTOM
+	bool "Custom Xtensa processor configuration"
+	select MAY_HAVE_SMP
+	select HAVE_XTENSA_GPIO32
+	help
+	  Select this variant to use a custom Xtensa processor configuration.
+	  You will be prompted for a processor variant CORENAME.
 endchoice
 
+config XTENSA_VARIANT_CUSTOM_NAME
+	string "Xtensa Processor Custom Core Variant Name"
+	depends on XTENSA_VARIANT_CUSTOM
+	help
+	  Provide the name of a custom Xtensa processor variant.
+	  This CORENAME selects arch/xtensa/variant/CORENAME.
+	  Dont forget you have to select MMU if you have one.
+
+config XTENSA_VARIANT_NAME
+	string
+	default "dc232b"			if XTENSA_VARIANT_DC232B
+	default "dc233c"			if XTENSA_VARIANT_DC233C
+	default "fsf"				if XTENSA_VARIANT_FSF
+	default "s6000"				if XTENSA_VARIANT_S6000
+	default XTENSA_VARIANT_CUSTOM_NAME	if XTENSA_VARIANT_CUSTOM
+
+config XTENSA_VARIANT_MMU
+	bool "Core variant has a Full MMU (TLB, Pages, Protection, etc)"
+	depends on XTENSA_VARIANT_CUSTOM
+	default y
+	help
+	  Build a Conventional Kernel with full MMU support,
+	  ie: it supports a TLB with auto-loading, page protection.
+
 config XTENSA_UNALIGNED_USER
 	bool "Unaligned memory access in use space"
 	help
@@ -156,13 +189,9 @@
 
 	  Say N if you want to disable CPU hotplug.
 
-config MATH_EMULATION
-	bool "Math emulation"
-	help
-	Can we use information of configuration file?
-
 config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
 	bool "Initialize Xtensa MMU inside the Linux kernel code"
+	depends on MMU
 	default y
 	help
 	  Earlier version initialized the MMU in the exception vector
@@ -192,6 +221,7 @@
 
 config HIGHMEM
 	bool "High Memory Support"
+	depends on MMU
 	help
 	  Linux can use the full amount of RAM in the system by
 	  default. However, the default MMUv2 setup only maps the
@@ -208,6 +238,32 @@
 
 	  If unsure, say Y.
 
+config FAST_SYSCALL_XTENSA
+	bool "Enable fast atomic syscalls"
+	default n
+	help
+	  fast_syscall_xtensa is a syscall that can make atomic operations
+	  on UP kernel when processor has no s32c1i support.
+
+	  This syscall is deprecated. It may have issues when called with
+	  invalid arguments. It is provided only for backwards compatibility.
+	  Only enable it if your userspace software requires it.
+
+	  If unsure, say N.
+
+config FAST_SYSCALL_SPILL_REGISTERS
+	bool "Enable spill registers syscall"
+	default n
+	help
+	  fast_syscall_spill_registers is a syscall that spills all active
+	  register windows of a calling userspace task onto its stack.
+
+	  This syscall is deprecated. It may have issues when called with
+	  invalid arguments. It is provided only for backwards compatibility.
+	  Only enable it if your userspace software requires it.
+
+	  If unsure, say N.
+
 endmenu
 
 config XTENSA_CALIBRATE_CCOUNT
@@ -250,12 +306,14 @@
 
 config XTENSA_PLATFORM_XT2000
 	bool "XT2000"
+	select HAVE_IDE
 	help
 	  XT2000 is the name of Tensilica's feature-rich emulation platform.
 	  This hardware is capable of running a full Linux distribution.
 
 config XTENSA_PLATFORM_S6105
 	bool "S6105"
+	select HAVE_IDE
 	select SERIAL_CONSOLE
 	select NO_IOPORT_MAP
 
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 81250ec..4725330 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -4,6 +4,7 @@
 # for more details.
 #
 # Copyright (C) 2001 - 2005  Tensilica Inc.
+# Copyright (C) 2014 Cadence Design Systems Inc.
 #
 # This file is included by the global makefile so that you can add your own
 # architecture-specific flags and dependencies. Remember to do have actions
@@ -13,11 +14,7 @@
 # Core configuration.
 # (Use VAR=<xtensa_config> to use another default compiler.)
 
-variant-$(CONFIG_XTENSA_VARIANT_FSF)		:= fsf
-variant-$(CONFIG_XTENSA_VARIANT_DC232B)		:= dc232b
-variant-$(CONFIG_XTENSA_VARIANT_DC233C)		:= dc233c
-variant-$(CONFIG_XTENSA_VARIANT_S6000)		:= s6000
-variant-$(CONFIG_XTENSA_VARIANT_LINUX_CUSTOM)	:= custom
+variant-y := $(patsubst "%",%,$(CONFIG_XTENSA_VARIANT_NAME))
 
 VARIANT = $(variant-y)
 export VARIANT
diff --git a/arch/xtensa/boot/dts/kc705.dts b/arch/xtensa/boot/dts/kc705.dts
index 742a347..c4d17a3 100644
--- a/arch/xtensa/boot/dts/kc705.dts
+++ b/arch/xtensa/boot/dts/kc705.dts
@@ -4,8 +4,11 @@
 
 / {
 	compatible = "cdns,xtensa-kc705";
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000";
+	};
 	memory@0 {
 		device_type = "memory";
-		reg = <0x00000000 0x08000000>;
+		reg = <0x00000000 0x38000000>;
 	};
 };
diff --git a/arch/xtensa/configs/common_defconfig b/arch/xtensa/configs/common_defconfig
index f6000fe..721df12 100644
--- a/arch/xtensa/configs/common_defconfig
+++ b/arch/xtensa/configs/common_defconfig
@@ -66,7 +66,6 @@
 CONFIG_MMU=y
 # CONFIG_XTENSA_UNALIGNED_USER is not set
 # CONFIG_PREEMPT is not set
-# CONFIG_MATH_EMULATION is not set
 # CONFIG_HIGHMEM is not set
 
 #
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 1493c68..b966baf 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -146,7 +146,6 @@
 # CONFIG_XTENSA_VARIANT_S6000 is not set
 # CONFIG_XTENSA_UNALIGNED_USER is not set
 # CONFIG_PREEMPT is not set
-# CONFIG_MATH_EMULATION is not set
 CONFIG_XTENSA_CALIBRATE_CCOUNT=y
 CONFIG_SERIAL_CONSOLE=y
 CONFIG_XTENSA_ISS_NETWORK=y
@@ -308,7 +307,7 @@
 # EEPROM support
 #
 # CONFIG_EEPROM_93CX6 is not set
-CONFIG_HAVE_IDE=y
+# CONFIG_HAVE_IDE is not set
 # CONFIG_IDE is not set
 
 #
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
index 12a492a..9471265 100644
--- a/arch/xtensa/configs/s6105_defconfig
+++ b/arch/xtensa/configs/s6105_defconfig
@@ -109,7 +109,6 @@
 CONFIG_XTENSA_VARIANT_S6000=y
 # CONFIG_XTENSA_UNALIGNED_USER is not set
 CONFIG_PREEMPT=y
-# CONFIG_MATH_EMULATION is not set
 # CONFIG_HIGHMEM is not set
 CONFIG_XTENSA_CALIBRATE_CCOUNT=y
 CONFIG_SERIAL_CONSOLE=y
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index 555a98a..e72aaca 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -37,6 +37,7 @@
  * specials for cache aliasing:
  *
  * __flush_invalidate_dcache_page_alias(vaddr,paddr)
+ * __invalidate_dcache_page_alias(vaddr,paddr)
  * __invalidate_icache_page_alias(vaddr,paddr)
  */
 
@@ -62,6 +63,7 @@
 
 #if defined(CONFIG_MMU) && (DCACHE_WAY_SIZE > PAGE_SIZE)
 extern void __flush_invalidate_dcache_page_alias(unsigned long, unsigned long);
+extern void __invalidate_dcache_page_alias(unsigned long, unsigned long);
 #else
 static inline void __flush_invalidate_dcache_page_alias(unsigned long virt,
 							unsigned long phys) { }
diff --git a/arch/xtensa/include/asm/fixmap.h b/arch/xtensa/include/asm/fixmap.h
index 9f6c33d0..62b507d 100644
--- a/arch/xtensa/include/asm/fixmap.h
+++ b/arch/xtensa/include/asm/fixmap.h
@@ -23,8 +23,8 @@
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
  * compile time, but to set the physical address only
- * in the boot process. We allocate these special  addresses
- * from the end of the consistent memory region backwards.
+ * in the boot process. We allocate these special addresses
+ * from the start of the consistent memory region upwards.
  * Also this lets us do fail-safe vmalloc(), we
  * can guarantee that these special addresses and
  * vmalloc()-ed addresses never overlap.
@@ -38,7 +38,8 @@
 #ifdef CONFIG_HIGHMEM
 	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_BEGIN,
-	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
+	FIX_KMAP_END = FIX_KMAP_BEGIN +
+		(KM_TYPE_NR * NR_CPUS * DCACHE_N_COLORS) - 1,
 #endif
 	__end_of_fixed_addresses
 };
@@ -47,7 +48,28 @@
 #define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
 
-#include <asm-generic/fixmap.h>
+#define __fix_to_virt(x)	(FIXADDR_START + ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	(((x) - FIXADDR_START) >> PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without translation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#endif
 
 #define kmap_get_fixmap_pte(vaddr) \
 	pte_offset_kernel( \
diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h
index 2653ef5..2c7901e 100644
--- a/arch/xtensa/include/asm/highmem.h
+++ b/arch/xtensa/include/asm/highmem.h
@@ -12,19 +12,55 @@
 #ifndef _XTENSA_HIGHMEM_H
 #define _XTENSA_HIGHMEM_H
 
+#include <linux/wait.h>
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kmap_types.h>
 #include <asm/pgtable.h>
 
-#define PKMAP_BASE		(FIXADDR_START - PMD_SIZE)
-#define LAST_PKMAP		PTRS_PER_PTE
+#define PKMAP_BASE		((FIXADDR_START - \
+				  (LAST_PKMAP + 1) * PAGE_SIZE) & PMD_MASK)
+#define LAST_PKMAP		(PTRS_PER_PTE * DCACHE_N_COLORS)
 #define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
 #define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
 
 #define kmap_prot		PAGE_KERNEL
 
+#if DCACHE_WAY_SIZE > PAGE_SIZE
+#define get_pkmap_color get_pkmap_color
+static inline int get_pkmap_color(struct page *page)
+{
+	return DCACHE_ALIAS(page_to_phys(page));
+}
+
+extern unsigned int last_pkmap_nr_arr[];
+
+static inline unsigned int get_next_pkmap_nr(unsigned int color)
+{
+	last_pkmap_nr_arr[color] =
+		(last_pkmap_nr_arr[color] + DCACHE_N_COLORS) & LAST_PKMAP_MASK;
+	return last_pkmap_nr_arr[color] + color;
+}
+
+static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
+{
+	return pkmap_nr < DCACHE_N_COLORS;
+}
+
+static inline int get_pkmap_entries_count(unsigned int color)
+{
+	return LAST_PKMAP / DCACHE_N_COLORS;
+}
+
+extern wait_queue_head_t pkmap_map_wait_arr[];
+
+static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
+{
+	return pkmap_map_wait_arr + color;
+}
+#endif
+
 extern pte_t *pkmap_page_table;
 
 void *kmap_high(struct page *page);
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 47f5823..abe24c6 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -78,7 +78,9 @@
 # define DCACHE_ALIAS_EQ(a,b)	((((a) ^ (b)) & DCACHE_ALIAS_MASK) == 0)
 #else
 # define DCACHE_ALIAS_ORDER	0
+# define DCACHE_ALIAS(a)	((void)(a), 0)
 #endif
+#define DCACHE_N_COLORS		(1 << DCACHE_ALIAS_ORDER)
 
 #if ICACHE_WAY_SIZE > PAGE_SIZE
 # define ICACHE_ALIAS_ORDER	(ICACHE_WAY_SHIFT - PAGE_SHIFT)
@@ -134,6 +136,7 @@
 #endif
 
 struct page;
+struct vm_area_struct;
 extern void clear_page(void *page);
 extern void copy_page(void *to, void *from);
 
@@ -143,8 +146,15 @@
  */
 
 #if DCACHE_WAY_SIZE > PAGE_SIZE
-extern void clear_user_page(void*, unsigned long, struct page*);
-extern void copy_user_page(void*, void*, unsigned long, struct page*);
+extern void clear_page_alias(void *vaddr, unsigned long paddr);
+extern void copy_page_alias(void *to, void *from,
+			    unsigned long to_paddr, unsigned long from_paddr);
+
+#define clear_user_highpage clear_user_highpage
+void clear_user_highpage(struct page *page, unsigned long vaddr);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma);
 #else
 # define clear_user_page(page, vaddr, pg)	clear_page(page)
 # define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 4b0ca35..b2173e5 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -67,7 +67,12 @@
 #define VMALLOC_START		0xC0000000
 #define VMALLOC_END		0xC7FEFFFF
 #define TLBTEMP_BASE_1		0xC7FF0000
-#define TLBTEMP_BASE_2		0xC7FF8000
+#define TLBTEMP_BASE_2		(TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)
+#if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE
+#define TLBTEMP_SIZE		(2 * DCACHE_WAY_SIZE)
+#else
+#define TLBTEMP_SIZE		ICACHE_WAY_SIZE
+#endif
 
 /*
  * For the Xtensa architecture, the PTE layout is as follows:
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index fd686dc..c7211e7 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -52,7 +52,12 @@
  */
 	.macro	get_fs	ad, sp
 	GET_CURRENT(\ad,\sp)
+#if THREAD_CURRENT_DS > 1020
+	addi	\ad, \ad, TASK_THREAD
+	l32i	\ad, \ad, THREAD_CURRENT_DS - TASK_THREAD
+#else
 	l32i	\ad, \ad, THREAD_CURRENT_DS
+#endif
 	.endm
 
 /*
diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index b4cb110..a47909f 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -28,17 +28,17 @@
 #define TCSETSW		0x5403
 #define TCSETSF		0x5404
 
-#define TCGETA		_IOR('t', 23, struct termio)
-#define TCSETA		_IOW('t', 24, struct termio)
-#define TCSETAW		_IOW('t', 25, struct termio)
-#define TCSETAF		_IOW('t', 28, struct termio)
+#define TCGETA		0x80127417	/* _IOR('t', 23, struct termio) */
+#define TCSETA		0x40127418	/* _IOW('t', 24, struct termio) */
+#define TCSETAW		0x40127419	/* _IOW('t', 25, struct termio) */
+#define TCSETAF		0x4012741C	/* _IOW('t', 28, struct termio) */
 
 #define TCSBRK		_IO('t', 29)
 #define TCXONC		_IO('t', 30)
 #define TCFLSH		_IO('t', 31)
 
-#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
-#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define TIOCSWINSZ	0x40087467	/* _IOW('t', 103, struct winsize) */
+#define TIOCGWINSZ	0x80087468	/* _IOR('t', 104, struct winsize) */
 #define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
 #define	TIOCSTOP	_IO('t', 111)		/* stop output, like ^S */
 #define TIOCOUTQ        _IOR('t', 115, int)     /* output queue size */
@@ -88,7 +88,6 @@
 #define TIOCSETD	_IOW('T', 35, int)
 #define TIOCGETD	_IOR('T', 36, int)
 #define TCSBRKP		_IOW('T', 37, int)   /* Needed for POSIX tcsendbreak()*/
-#define TIOCTTYGSTRUCT	_IOR('T', 38, struct tty_struct) /* For debugging only*/
 #define TIOCSBRK	_IO('T', 39) 	     /* BSD compatibility */
 #define TIOCCBRK	_IO('T', 40)	     /* BSD compatibility */
 #define TIOCGSID	_IOR('T', 41, pid_t) /* Return the session ID of FD*/
@@ -114,8 +113,10 @@
 #define TIOCSERGETLSR   _IOR('T', 89, unsigned int) /* Get line status reg. */
   /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
 # define TIOCSER_TEMT    0x01		     /* Transmitter physically empty */
-#define TIOCSERGETMULTI _IOR('T', 90, struct serial_multiport_struct) /* Get multiport config  */
-#define TIOCSERSETMULTI _IOW('T', 91, struct serial_multiport_struct) /* Set multiport config */
+#define TIOCSERGETMULTI 0x80a8545a /* Get multiport config  */
+			/* _IOR('T', 90, struct serial_multiport_struct) */
+#define TIOCSERSETMULTI 0x40a8545b /* Set multiport config */
+			/* _IOW('T', 91, struct serial_multiport_struct) */
 
 #define TIOCMIWAIT	_IO('T', 92) /* wait for a change on serial input line(s) */
 #define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index b939552..8883fc8 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -739,7 +739,10 @@
 #define __NR_sched_getattr			335
 __SYSCALL(335, sys_sched_getattr, 3)
 
-#define __NR_syscall_count			336
+#define __NR_renameat2				336
+__SYSCALL(336, sys_renameat2, 5)
+
+#define __NR_syscall_count			337
 
 /*
  * sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
index d4cef60..890004a 100644
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S
@@ -8,6 +8,7 @@
  * this archive for more details.
  *
  * Copyright (C) 2001 - 2005 Tensilica, Inc.
+ * Copyright (C) 2014 Cadence Design Systems Inc.
  *
  * Rewritten by Chris Zankel <chris@zankel.net>
  *
@@ -174,6 +175,10 @@
 	s32i	a0, a2, PT_AREG2
 	s32i	a3, a2, PT_AREG3
 
+	rsr	a3, excsave1
+	movi	a4, fast_unaligned_fixup
+	s32i	a4, a3, EXC_TABLE_FIXUP
+
 	/* Keep value of SAR in a0 */
 
 	rsr	a0, sar
@@ -225,10 +230,6 @@
 	addx8	a5, a6, a5
 	jx	a5			# jump into table
 
-	/* Invalid instruction, CRITICAL! */
-.Linvalid_instruction_load:
-	j	.Linvalid_instruction
-
 	/* Load: Load memory address. */
 
 .Lload: movi	a3, ~3
@@ -272,18 +273,6 @@
 	/* Set target register. */
 
 1:
-
-#if XCHAL_HAVE_LOOPS
-	rsr	a5, lend		# check if we reached LEND
-	bne	a7, a5, 1f
-	rsr	a5, lcount		# and LCOUNT != 0
-	beqz	a5, 1f
-	addi	a5, a5, -1		# decrement LCOUNT and set
-	rsr	a7, lbeg		# set PC to LBEGIN
-	wsr	a5, lcount
-#endif
-
-1:	wsr	a7, epc1		# skip load instruction
 	extui	a4, a4, INSN_T, 4	# extract target register
 	movi	a5, .Lload_table
 	addx8	a4, a4, a5
@@ -326,6 +315,35 @@
 	mov	a3, a14		;	_j 1f;	.align 8
 	mov	a3, a15		;	_j 1f;	.align 8
 
+	/* We cannot handle this exception. */
+
+	.extern _kernel_exception
+.Linvalid_instruction_load:
+.Linvalid_instruction_store:
+
+	movi	a4, 0
+	rsr	a3, excsave1
+	s32i	a4, a3, EXC_TABLE_FIXUP
+
+	/* Restore a4...a8 and SAR, set SP, and jump to default exception. */
+
+	l32i	a8, a2, PT_AREG8
+	l32i	a7, a2, PT_AREG7
+	l32i	a6, a2, PT_AREG6
+	l32i	a5, a2, PT_AREG5
+	l32i	a4, a2, PT_AREG4
+	wsr	a0, sar
+	mov	a1, a2
+
+	rsr	a0, ps
+	bbsi.l  a0, PS_UM_BIT, 2f     # jump if user mode
+
+	movi	a0, _kernel_exception
+	jx	a0
+
+2:	movi	a0, _user_exception
+	jx	a0
+
 1: 	# a7: instruction pointer, a4: instruction, a3: value
 
 	movi	a6, 0			# mask: ffffffff:00000000
@@ -353,17 +371,6 @@
 	/* Get memory address */
 
 1:
-#if XCHAL_HAVE_LOOPS
-	rsr	a4, lend		# check if we reached LEND
-	bne	a7, a4, 1f
-	rsr	a4, lcount		# and LCOUNT != 0
-	beqz	a4, 1f
-	addi	a4, a4, -1		# decrement LCOUNT and set
-	rsr	a7, lbeg		# set PC to LBEGIN
-	wsr	a4, lcount
-#endif
-
-1:	wsr	a7, epc1		# skip store instruction
 	movi	a4, ~3
 	and	a4, a4, a8		# align memory address
 
@@ -375,25 +382,25 @@
 #endif
 
 	__ssa8r a8
-	__src_b	a7, a5, a6		# lo-mask  F..F0..0 (BE) 0..0F..F (LE)
+	__src_b	a8, a5, a6		# lo-mask  F..F0..0 (BE) 0..0F..F (LE)
 	__src_b	a6, a6, a5		# hi-mask  0..0F..F (BE) F..F0..0 (LE)
 #ifdef UNALIGNED_USER_EXCEPTION
 	l32e	a5, a4, -8
 #else
 	l32i	a5, a4, 0		# load lower address word
 #endif
-	and	a5, a5, a7		# mask
-	__sh	a7, a3 			# shift value
-	or	a5, a5, a7		# or with original value
+	and	a5, a5, a8		# mask
+	__sh	a8, a3 			# shift value
+	or	a5, a5, a8		# or with original value
 #ifdef UNALIGNED_USER_EXCEPTION
 	s32e	a5, a4, -8
-	l32e	a7, a4, -4
+	l32e	a8, a4, -4
 #else
 	s32i	a5, a4, 0		# store
-	l32i	a7, a4, 4		# same for upper address word
+	l32i	a8, a4, 4		# same for upper address word
 #endif
 	__sl	a5, a3
-	and	a6, a7, a6
+	and	a6, a8, a6
 	or	a6, a6, a5
 #ifdef UNALIGNED_USER_EXCEPTION
 	s32e	a6, a4, -4
@@ -401,9 +408,27 @@
 	s32i	a6, a4, 4
 #endif
 
-	/* Done. restore stack and return */
-
 .Lexit:
+#if XCHAL_HAVE_LOOPS
+	rsr	a4, lend		# check if we reached LEND
+	bne	a7, a4, 1f
+	rsr	a4, lcount		# and LCOUNT != 0
+	beqz	a4, 1f
+	addi	a4, a4, -1		# decrement LCOUNT and set
+	rsr	a7, lbeg		# set PC to LBEGIN
+	wsr	a4, lcount
+#endif
+
+1:	wsr	a7, epc1		# skip emulated instruction
+
+	/* Update icount if we're single-stepping in userspace. */
+	rsr	a4, icountlevel
+	beqz	a4, 1f
+	bgeui	a4, LOCKLEVEL + 1, 1f
+	rsr	a4, icount
+	addi	a4, a4, 1
+	wsr	a4, icount
+1:
 	movi	a4, 0
 	rsr	a3, excsave1
 	s32i	a4, a3, EXC_TABLE_FIXUP
@@ -424,31 +449,40 @@
 	l32i	a2, a2, PT_AREG2
 	rfe
 
-	/* We cannot handle this exception. */
+ENDPROC(fast_unaligned)
 
-	.extern _kernel_exception
-.Linvalid_instruction_store:
-.Linvalid_instruction:
+ENTRY(fast_unaligned_fixup)
 
-	/* Restore a4...a8 and SAR, set SP, and jump to default exception. */
+	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
+	wsr	a3, excsave1
 
 	l32i	a8, a2, PT_AREG8
 	l32i	a7, a2, PT_AREG7
 	l32i	a6, a2, PT_AREG6
 	l32i	a5, a2, PT_AREG5
 	l32i	a4, a2, PT_AREG4
+	l32i	a0, a2, PT_AREG2
+	xsr	a0, depc			# restore depc and a0
 	wsr	a0, sar
-	mov	a1, a2
+
+	rsr	a0, exccause
+	s32i	a0, a2, PT_DEPC			# mark as a regular exception
 
 	rsr	a0, ps
-	bbsi.l  a2, PS_UM_BIT, 1f     # jump if user mode
+	bbsi.l  a0, PS_UM_BIT, 1f		# jump if user mode
 
-	movi	a0, _kernel_exception
+	rsr	a0, exccause
+	addx4	a0, a0, a3              	# find entry in table
+	l32i	a0, a0, EXC_TABLE_FAST_KERNEL   # load handler
+	l32i	a3, a2, PT_AREG3
+	jx	a0
+1:
+	rsr	a0, exccause
+	addx4	a0, a0, a3              	# find entry in table
+	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
+	l32i	a3, a2, PT_AREG3
 	jx	a0
 
-1:	movi	a0, _user_exception
-	jx	a0
-
-ENDPROC(fast_unaligned)
+ENDPROC(fast_unaligned_fixup)
 
 #endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index ef7f499..82bbfa5 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -986,6 +986,8 @@
  *		j done
  */
 
+#ifdef CONFIG_FAST_SYSCALL_XTENSA
+
 #define TRY								\
 	.section __ex_table, "a";					\
 	.word	66f, 67f;						\
@@ -1001,9 +1003,8 @@
 	movi	a7, 4			# sizeof(unsigned int)
 	access_ok a3, a7, a0, a2, .Leac	# a0: scratch reg, a2: sp
 
-	addi	a6, a6, -1		# assuming SYS_XTENSA_ATOMIC_SET = 1
-	_bgeui	a6, SYS_XTENSA_COUNT - 1, .Lill
-	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP - 1, .Lnswp
+	_bgeui	a6, SYS_XTENSA_COUNT, .Lill
+	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP, .Lnswp
 
 	/* Fall through for ATOMIC_CMP_SWP. */
 
@@ -1015,27 +1016,26 @@
 	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, 1			# and return 1
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 1:	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, 0			# return 0 (note that we cannot set
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 .Lnswp:	/* Atomic set, add, and exg_add. */
 
 TRY	l32i	a7, a3, 0		# orig
+	addi	a6, a6, -SYS_XTENSA_ATOMIC_SET
 	add	a0, a4, a7		# + arg
 	moveqz	a0, a4, a6		# set
+	addi	a6, a6, SYS_XTENSA_ATOMIC_SET
 TRY	s32i	a0, a3, 0		# write new value
 
 	mov	a0, a2
 	mov	a2, a7
 	l32i	a7, a0, PT_AREG7	# restore a7
 	l32i	a0, a0, PT_AREG0	# restore a0
-	addi	a6, a6, 1		# restore a6 (really necessary?)
 	rfe
 
 CATCH
@@ -1044,13 +1044,25 @@
 	movi	a2, -EFAULT
 	rfe
 
-.Lill:	l32i	a7, a2, PT_AREG0	# restore a7
+.Lill:	l32i	a7, a2, PT_AREG7	# restore a7
 	l32i	a0, a2, PT_AREG0	# restore a0
 	movi	a2, -EINVAL
 	rfe
 
 ENDPROC(fast_syscall_xtensa)
 
+#else /* CONFIG_FAST_SYSCALL_XTENSA */
+
+ENTRY(fast_syscall_xtensa)
+
+	l32i    a0, a2, PT_AREG0        # restore a0
+	movi	a2, -ENOSYS
+	rfe
+
+ENDPROC(fast_syscall_xtensa)
+
+#endif /* CONFIG_FAST_SYSCALL_XTENSA */
+
 
 /* fast_syscall_spill_registers.
  *
@@ -1066,6 +1078,8 @@
  * Note: We assume the stack pointer is EXC_TABLE_KSTK in the fixup handler.
  */
 
+#ifdef CONFIG_FAST_SYSCALL_SPILL_REGISTERS
+
 ENTRY(fast_syscall_spill_registers)
 
 	/* Register a FIXUP handler (pass current wb as a parameter) */
@@ -1400,6 +1414,18 @@
 
 ENDPROC(fast_syscall_spill_registers_fixup_return)
 
+#else /* CONFIG_FAST_SYSCALL_SPILL_REGISTERS */
+
+ENTRY(fast_syscall_spill_registers)
+
+	l32i    a0, a2, PT_AREG0        # restore a0
+	movi	a2, -ENOSYS
+	rfe
+
+ENDPROC(fast_syscall_spill_registers)
+
+#endif /* CONFIG_FAST_SYSCALL_SPILL_REGISTERS */
+
 #ifdef CONFIG_MMU
 /*
  * We should never get here. Bail out!
@@ -1565,7 +1591,7 @@
 	rsr	a0, excvaddr
 	bltu	a0, a3, 2f
 
-	addi	a1, a0, -(2 << (DCACHE_ALIAS_ORDER + PAGE_SHIFT))
+	addi	a1, a0, -TLBTEMP_SIZE
 	bgeu	a1, a3, 2f
 
 	/* Check if we have to restore an ITLB mapping. */
@@ -1820,7 +1846,6 @@
 
 	entry	a1, 16
 
-	mov	a10, a2			# preserve 'prev' (a2)
 	mov	a11, a3			# and 'next' (a3)
 
 	l32i	a4, a2, TASK_THREAD_INFO
@@ -1828,8 +1853,14 @@
 
 	save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
-	s32i	a0, a10, THREAD_RA	# save return address
-	s32i	a1, a10, THREAD_SP	# save stack pointer
+#if THREAD_RA > 1020 || THREAD_SP > 1020
+	addi	a10, a2, TASK_THREAD
+	s32i	a0, a10, THREAD_RA - TASK_THREAD	# save return address
+	s32i	a1, a10, THREAD_SP - TASK_THREAD	# save stack pointer
+#else
+	s32i	a0, a2, THREAD_RA	# save return address
+	s32i	a1, a2, THREAD_SP	# save stack pointer
+#endif
 
 	/* Disable ints while we manipulate the stack pointer. */
 
@@ -1870,7 +1901,6 @@
 	load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
 
 	wsr	a14, ps
-	mov	a2, a10			# return 'prev'
 	rsync
 
 	retw
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 2d9cc6d..e8b76b8 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -49,9 +49,8 @@
 
 	/* We currently don't support coherent memory outside KSEG */
 
-	if (ret < XCHAL_KSEG_CACHED_VADDR
-	    || ret >= XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE)
-		BUG();
+	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
+	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
 
 	if (ret != 0) {
@@ -68,10 +67,11 @@
 void dma_free_coherent(struct device *hwdev, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	long addr=(long)vaddr+XCHAL_KSEG_CACHED_VADDR-XCHAL_KSEG_BYPASS_VADDR;
+	unsigned long addr = (unsigned long)vaddr +
+		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
 
-	if (addr < 0 || addr >= XCHAL_KSEG_SIZE)
-		BUG();
+	BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
+	       addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
 	free_pages(addr, get_order(size));
 }
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index eebbfd8..9d2f45f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -101,9 +101,8 @@
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
 #ifdef CONFIG_XTENSA_UNALIGNED_USER
 { EXCCAUSE_UNALIGNED,		USER,	   fast_unaligned },
-#else
-{ EXCCAUSE_UNALIGNED,		0,	   do_unaligned_user },
 #endif
+{ EXCCAUSE_UNALIGNED,		0,	   do_unaligned_user },
 { EXCCAUSE_UNALIGNED,		KRNL,	   fast_unaligned },
 #endif
 #ifdef CONFIG_MMU
@@ -264,7 +263,6 @@
  */
 
 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-#ifndef CONFIG_XTENSA_UNALIGNED_USER
 void
 do_unaligned_user (struct pt_regs *regs)
 {
@@ -286,7 +284,6 @@
 
 }
 #endif
-#endif
 
 void
 do_debug(struct pt_regs *regs)
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 8453e6e..1b397a9 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -454,8 +454,14 @@
 	s32i	a0, a2, PT_DEPC
 
 _DoubleExceptionVector_handle_exception:
+	addi	a0, a0, -EXCCAUSE_UNALIGNED
+	beqz	a0, 2f
 	addx4	a0, a0, a3
-	l32i	a0, a0, EXC_TABLE_FAST_USER
+	l32i	a0, a0, EXC_TABLE_FAST_USER + 4 * EXCCAUSE_UNALIGNED
+	xsr	a3, excsave1
+	jx	a0
+2:
+	movi	a0, user_exception
 	xsr	a3, excsave1
 	jx	a0
 
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index d16db6d..fc1bc2b 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -269,13 +269,13 @@
 		  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
 		  .DoubleExceptionVector.literal,
-		  DOUBLEEXC_VECTOR_VADDR - 40,
+		  DOUBLEEXC_VECTOR_VADDR - 48,
 		  SIZEOF(.UserExceptionVector.text),
 		  .UserExceptionVector.text)
   SECTION_VECTOR (_DoubleExceptionVector_text,
 		  .DoubleExceptionVector.text,
 		  DOUBLEEXC_VECTOR_VADDR,
-		  40,
+		  48,
 		  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 63cbb86..d75aa14 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -59,9 +59,68 @@
  *
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && defined(CONFIG_HIGHMEM)
-#error "HIGHMEM is not supported on cores with aliasing cache."
-#endif
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
+static inline void kmap_invalidate_coherent(struct page *page,
+					    unsigned long vaddr)
+{
+	if (!DCACHE_ALIAS_EQ(page_to_phys(page), vaddr)) {
+		unsigned long kvaddr;
+
+		if (!PageHighMem(page)) {
+			kvaddr = (unsigned long)page_to_virt(page);
+
+			__invalidate_dcache_page(kvaddr);
+		} else {
+			kvaddr = TLBTEMP_BASE_1 +
+				(page_to_phys(page) & DCACHE_ALIAS_MASK);
+
+			__invalidate_dcache_page_alias(kvaddr,
+						       page_to_phys(page));
+		}
+	}
+}
+
+static inline void *coherent_kvaddr(struct page *page, unsigned long base,
+				    unsigned long vaddr, unsigned long *paddr)
+{
+	if (PageHighMem(page) || !DCACHE_ALIAS_EQ(page_to_phys(page), vaddr)) {
+		*paddr = page_to_phys(page);
+		return (void *)(base + (vaddr & DCACHE_ALIAS_MASK));
+	} else {
+		*paddr = 0;
+		return page_to_virt(page);
+	}
+}
+
+void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	unsigned long paddr;
+	void *kvaddr = coherent_kvaddr(page, TLBTEMP_BASE_1, vaddr, &paddr);
+
+	pagefault_disable();
+	kmap_invalidate_coherent(page, vaddr);
+	set_bit(PG_arch_1, &page->flags);
+	clear_page_alias(kvaddr, paddr);
+	pagefault_enable();
+}
+
+void copy_user_highpage(struct page *dst, struct page *src,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	unsigned long dst_paddr, src_paddr;
+	void *dst_vaddr = coherent_kvaddr(dst, TLBTEMP_BASE_1, vaddr,
+					  &dst_paddr);
+	void *src_vaddr = coherent_kvaddr(src, TLBTEMP_BASE_2, vaddr,
+					  &src_paddr);
+
+	pagefault_disable();
+	kmap_invalidate_coherent(dst, vaddr);
+	set_bit(PG_arch_1, &dst->flags);
+	copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr);
+	pagefault_enable();
+}
+
+#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
 
@@ -103,7 +162,8 @@
 		if (!alias && !mapping)
 			return;
 
-		__flush_invalidate_dcache_page((long)page_address(page));
+		virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
+		__flush_invalidate_dcache_page_alias(virt, phys);
 
 		virt = TLBTEMP_BASE_1 + (temp & DCACHE_ALIAS_MASK);
 
@@ -168,13 +228,12 @@
 #if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
 
 	if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) {
-
-		unsigned long paddr = (unsigned long) page_address(page);
 		unsigned long phys = page_to_phys(page);
-		unsigned long tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
+		unsigned long tmp;
 
-		__flush_invalidate_dcache_page(paddr);
-
+		tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
+		__flush_invalidate_dcache_page_alias(tmp, phys);
+		tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
 		__flush_invalidate_dcache_page_alias(tmp, phys);
 		__invalidate_icache_page_alias(tmp, phys);
 
diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c
index 17a8c0d..8cfb71e 100644
--- a/arch/xtensa/mm/highmem.c
+++ b/arch/xtensa/mm/highmem.c
@@ -14,23 +14,45 @@
 
 static pte_t *kmap_pte;
 
+#if DCACHE_WAY_SIZE > PAGE_SIZE
+unsigned int last_pkmap_nr_arr[DCACHE_N_COLORS];
+wait_queue_head_t pkmap_map_wait_arr[DCACHE_N_COLORS];
+
+static void __init kmap_waitqueues_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pkmap_map_wait_arr); ++i)
+		init_waitqueue_head(pkmap_map_wait_arr + i);
+}
+#else
+static inline void kmap_waitqueues_init(void)
+{
+}
+#endif
+
+static inline enum fixed_addresses kmap_idx(int type, unsigned long color)
+{
+	return (type + KM_TYPE_NR * smp_processor_id()) * DCACHE_N_COLORS +
+		color;
+}
+
 void *kmap_atomic(struct page *page)
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr;
-	int type;
 
 	pagefault_disable();
 	if (!PageHighMem(page))
 		return page_address(page);
 
-	type = kmap_atomic_idx_push();
-	idx = type + KM_TYPE_NR * smp_processor_id();
+	idx = kmap_idx(kmap_atomic_idx_push(),
+		       DCACHE_ALIAS(page_to_phys(page)));
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
-	BUG_ON(!pte_none(*(kmap_pte - idx)));
+	BUG_ON(!pte_none(*(kmap_pte + idx)));
 #endif
-	set_pte(kmap_pte - idx, mk_pte(page, PAGE_KERNEL_EXEC));
+	set_pte(kmap_pte + idx, mk_pte(page, PAGE_KERNEL_EXEC));
 
 	return (void *)vaddr;
 }
@@ -38,12 +60,10 @@
 
 void __kunmap_atomic(void *kvaddr)
 {
-	int idx, type;
-
 	if (kvaddr >= (void *)FIXADDR_START &&
 	    kvaddr < (void *)FIXADDR_TOP) {
-		type = kmap_atomic_idx();
-		idx = type + KM_TYPE_NR * smp_processor_id();
+		int idx = kmap_idx(kmap_atomic_idx(),
+				   DCACHE_ALIAS((unsigned long)kvaddr));
 
 		/*
 		 * Force other mappings to Oops if they'll try to access this
@@ -51,7 +71,7 @@
 		 * is a bad idea also, in case the page changes cacheability
 		 * attributes or becomes a protected page in a hypervisor.
 		 */
-		pte_clear(&init_mm, kvaddr, kmap_pte - idx);
+		pte_clear(&init_mm, kvaddr, kmap_pte + idx);
 		local_flush_tlb_kernel_range((unsigned long)kvaddr,
 					     (unsigned long)kvaddr + PAGE_SIZE);
 
@@ -69,4 +89,5 @@
 	/* cache the first kmap pte */
 	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+	kmap_waitqueues_init();
 }
diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
index 1f68558..11a01c3 100644
--- a/arch/xtensa/mm/misc.S
+++ b/arch/xtensa/mm/misc.S
@@ -110,41 +110,24 @@
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 /*
- * clear_user_page (void *addr, unsigned long vaddr, struct page *page)
- *                     a2              a3                 a4
+ * clear_page_alias(void *addr, unsigned long paddr)
+ *                     a2              a3
  */
 
-ENTRY(clear_user_page)
+ENTRY(clear_page_alias)
 
 	entry	a1, 32
 
-	/* Mark page dirty and determine alias. */
+	/* Skip setting up a temporary DTLB if not aliased low page. */
 
-	movi	a7, (1 << PG_ARCH_1)
-	l32i	a5, a4, PAGE_FLAGS
-	xor	a6, a2, a3
-	extui	a3, a3, PAGE_SHIFT, DCACHE_ALIAS_ORDER
-	extui	a6, a6, PAGE_SHIFT, DCACHE_ALIAS_ORDER
-	or	a5, a5, a7
-	slli	a3, a3, PAGE_SHIFT
-	s32i	a5, a4, PAGE_FLAGS
+	movi	a5, PAGE_OFFSET
+	movi	a6, 0
+	beqz	a3, 1f
 
-	/* Skip setting up a temporary DTLB if not aliased. */
+	/* Setup a temporary DTLB for the addr. */
 
-	beqz	a6, 1f
-
-	/* Invalidate kernel page. */
-
-	mov	a10, a2
-	call8	__invalidate_dcache_page
-
-	/* Setup a temporary DTLB with the color of the VPN */
-
-	movi	a4, ((PAGE_KERNEL | _PAGE_HW_WRITE) - PAGE_OFFSET) & 0xffffffff
-	movi	a5, TLBTEMP_BASE_1			# virt
-	add	a6, a2, a4				# ppn
-	add	a2, a5, a3				# add 'color'
-
+	addi	a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
+	mov	a4, a2
 	wdtlb	a6, a2
 	dsync
 
@@ -165,62 +148,43 @@
 
 	/* We need to invalidate the temporary idtlb entry, if any. */
 
-1:	addi	a2, a2, -PAGE_SIZE
-	idtlb	a2
+1:	idtlb	a4
 	dsync
 
 	retw
 
-ENDPROC(clear_user_page)
+ENDPROC(clear_page_alias)
 
 /*
- * copy_page_user (void *to, void *from, unsigned long vaddr, struct page *page)
- *                    a2          a3	        a4		    a5
+ * copy_page_alias(void *to, void *from,
+ *			a2	  a3
+ *                 unsigned long to_paddr, unsigned long from_paddr)
+ *	        		 a4			 a5
  */
 
-ENTRY(copy_user_page)
+ENTRY(copy_page_alias)
 
 	entry	a1, 32
 
-	/* Mark page dirty and determine alias for destination. */
+	/* Skip setting up a temporary DTLB for destination if not aliased. */
 
-	movi	a8, (1 << PG_ARCH_1)
-	l32i	a9, a5, PAGE_FLAGS
-	xor	a6, a2, a4
-	xor	a7, a3, a4
-	extui	a4, a4, PAGE_SHIFT, DCACHE_ALIAS_ORDER
-	extui	a6, a6, PAGE_SHIFT, DCACHE_ALIAS_ORDER
-	extui	a7, a7, PAGE_SHIFT, DCACHE_ALIAS_ORDER
-	or	a9, a9, a8
-	slli	a4, a4, PAGE_SHIFT
-	s32i	a9, a5, PAGE_FLAGS
-	movi	a5, ((PAGE_KERNEL | _PAGE_HW_WRITE) - PAGE_OFFSET) & 0xffffffff
+	movi	a6, 0
+	movi	a7, 0
+	beqz	a4, 1f
 
-	beqz	a6, 1f
+	/* Setup a temporary DTLB for destination. */
 
-	/* Invalidate dcache */
-
-	mov	a10, a2
-	call8	__invalidate_dcache_page
-
-	/* Setup a temporary DTLB with a matching color. */
-
-	movi	a8, TLBTEMP_BASE_1			# base
-	add	a6, a2, a5				# ppn
-	add	a2, a8, a4				# add 'color'
-
+	addi	a6, a4, (PAGE_KERNEL | _PAGE_HW_WRITE)
 	wdtlb	a6, a2
 	dsync
 
-	/* Skip setting up a temporary DTLB for destination if not aliased. */
+	/* Skip setting up a temporary DTLB for source if not aliased. */
 
-1:	beqz	a7, 1f
+1:	beqz	a5, 1f
 
-	/* Setup a temporary DTLB with a matching color. */
+	/* Setup a temporary DTLB for source. */
 
-	movi	a8, TLBTEMP_BASE_2			# base
-	add	a7, a3, a5				# ppn
-	add	a3, a8, a4
+	addi	a7, a5, PAGE_KERNEL
 	addi	a8, a3, 1				# way1
 
 	wdtlb	a7, a8
@@ -271,7 +235,7 @@
 
 	retw
 
-ENDPROC(copy_user_page)
+ENDPROC(copy_page_alias)
 
 #endif
 
@@ -300,6 +264,30 @@
 	retw
 
 ENDPROC(__flush_invalidate_dcache_page_alias)
+
+/*
+ * void __invalidate_dcache_page_alias (addr, phys)
+ *                                       a2    a3
+ */
+
+ENTRY(__invalidate_dcache_page_alias)
+
+	entry	sp, 16
+
+	movi	a7, 0			# required for exception handler
+	addi	a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
+	mov	a4, a2
+	wdtlb	a6, a2
+	dsync
+
+	___invalidate_dcache_page a2 a3
+
+	idtlb	a4
+	dsync
+
+	retw
+
+ENDPROC(__invalidate_dcache_page_alias)
 #endif
 
 ENTRY(__tlbtemp_mapping_itlb)
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 3429b48..abe4513 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -18,32 +18,38 @@
 #include <asm/io.h>
 
 #if defined(CONFIG_HIGHMEM)
-static void * __init init_pmd(unsigned long vaddr)
+static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
 {
 	pgd_t *pgd = pgd_offset_k(vaddr);
 	pmd_t *pmd = pmd_offset(pgd, vaddr);
+	pte_t *pte;
+	unsigned long i;
 
-	if (pmd_none(*pmd)) {
-		unsigned i;
-		pte_t *pte = alloc_bootmem_low_pages(PAGE_SIZE);
+	n_pages = ALIGN(n_pages, PTRS_PER_PTE);
 
-		for (i = 0; i < 1024; i++)
-			pte_clear(NULL, 0, pte + i);
+	pr_debug("%s: vaddr: 0x%08lx, n_pages: %ld\n",
+		 __func__, vaddr, n_pages);
 
-		set_pmd(pmd, __pmd(((unsigned long)pte) & PAGE_MASK));
-		BUG_ON(pte != pte_offset_kernel(pmd, 0));
-		pr_debug("%s: vaddr: 0x%08lx, pmd: 0x%p, pte: 0x%p\n",
-			 __func__, vaddr, pmd, pte);
-		return pte;
-	} else {
-		return pte_offset_kernel(pmd, 0);
+	pte = alloc_bootmem_low_pages(n_pages * sizeof(pte_t));
+
+	for (i = 0; i < n_pages; ++i)
+		pte_clear(NULL, 0, pte + i);
+
+	for (i = 0; i < n_pages; i += PTRS_PER_PTE, ++pmd) {
+		pte_t *cur_pte = pte + i;
+
+		BUG_ON(!pmd_none(*pmd));
+		set_pmd(pmd, __pmd(((unsigned long)cur_pte) & PAGE_MASK));
+		BUG_ON(cur_pte != pte_offset_kernel(pmd, 0));
+		pr_debug("%s: pmd: 0x%p, pte: 0x%p\n",
+			 __func__, pmd, cur_pte);
 	}
+	return pte;
 }
 
 static void __init fixedrange_init(void)
 {
-	BUILD_BUG_ON(FIXADDR_SIZE > PMD_SIZE);
-	init_pmd(__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK);
+	init_pmd(__fix_to_virt(0), __end_of_fixed_addresses);
 }
 #endif
 
@@ -52,7 +58,7 @@
 	memset(swapper_pg_dir, 0, PAGE_SIZE);
 #ifdef CONFIG_HIGHMEM
 	fixedrange_init();
-	pkmap_page_table = init_pmd(PKMAP_BASE);
+	pkmap_page_table = init_pmd(PKMAP_BASE, LAST_PKMAP);
 	kmap_init();
 #endif
 }