Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:
 "Several build/bug fixes for sparc, including:

  1) Configuring a mix of static vs.  modular sparc64 crypto modules
     didn't work, remove an ill-conceived attempt to only have to build
     the device match table for these drivers once to fix the problem.

     Reported by Meelis Roos.

  2) Make the montgomery multiple/square and mpmul instructions actually
     usable in 32-bit tasks.  Essentially this involves providing 32-bit
     userspace with a way to use a 64-bit stack when it needs to.

  3) Our sparc64 atomic backoffs don't yield cpu strands properly on
     Niagara chips.  Use pause instruction when available to achieve
     this, otherwise use a benign instruction we know blocks the strand
     for some time.

  4) Wire up kcmp

  5) Fix the build of various drivers by removing the unnecessary
     blocking of OF_GPIO when SPARC.

  6) Fix unintended regression wherein of_address_to_resource stopped
     being provided.  Fix from Andreas Larsson.

  7) Fix NULL dereference in leon_handle_ext_irq(), also from Andreas
     Larsson."

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix build with mix of modular vs. non-modular crypto drivers.
  sparc: Support atomic64_dec_if_positive properly.
  of/address: sparc: Declare of_address_to_resource() as an extern function for sparc again
  sparc32, leon: Check for existent irq_map entry in leon_handle_ext_irq
  sparc: Add sparc support for platform_get_irq()
  sparc: Allow OF_GPIO on sparc.
  qlogicpti: Fix build warning.
  sparc: Wire up sys_kcmp.
  sparc64: Improvde documentation and readability of atomic backoff code.
  sparc64: Use pause instruction when available.
  sparc64: Fix cpu strand yielding.
  sparc64: Make montmul/montsqr/mpmul usable in 32-bit threads.
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b6b442b..9f2edb5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -20,6 +20,7 @@
 	select HAVE_ARCH_TRACEHOOK
 	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select HAVE_IRQ_WORK
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index 6ae1ad5..5d469d8 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -13,13 +13,13 @@
 
 obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
 
-sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
-sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
-sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
-md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
+sha1-sparc64-y := sha1_asm.o sha1_glue.o
+sha256-sparc64-y := sha256_asm.o sha256_glue.o
+sha512-sparc64-y := sha512_asm.o sha512_glue.o
+md5-sparc64-y := md5_asm.o md5_glue.o
 
-aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
-des-sparc64-y := des_asm.o des_glue.o crop_devid.o
-camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
+aes-sparc64-y := aes_asm.o aes_glue.o
+des-sparc64-y := des_asm.o des_glue.o
+camellia-sparc64-y := camellia_asm.o camellia_glue.o
 
-crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
+crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 8f1c998..3965d1d 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -475,3 +475,5 @@
 MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
 
 MODULE_ALIAS("aes");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 42905c0..62c89af 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -320,3 +320,5 @@
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
 
 MODULE_ALIAS("aes");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index 0bd89ce..5162fad 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -177,3 +177,5 @@
 MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
 
 MODULE_ALIAS("crc32c");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index c4940c2..41524ce 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -527,3 +527,5 @@
 MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
 
 MODULE_ALIAS("des");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 603d723..09a9ea1 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -186,3 +186,5 @@
 MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
 
 MODULE_ALIAS("md5");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
index 2bbb20b..6cd5f29 100644
--- a/arch/sparc/crypto/sha1_glue.c
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -181,3 +181,5 @@
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
 
 MODULE_ALIAS("sha1");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
index 591e656..04f555a 100644
--- a/arch/sparc/crypto/sha256_glue.c
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -239,3 +239,5 @@
 
 MODULE_ALIAS("sha224");
 MODULE_ALIAS("sha256");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
index 486f0a2..f04d199 100644
--- a/arch/sparc/crypto/sha512_glue.c
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -224,3 +224,5 @@
 
 MODULE_ALIAS("sha384");
 MODULE_ALIAS("sha512");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index ce35a1c..be56a24 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -1,7 +1,7 @@
 /* atomic.h: Thankfully the V9 is at least reasonable for this
  *           stuff.
  *
- * Copyright (C) 1996, 1997, 2000 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1996, 1997, 2000, 2012 David S. Miller (davem@redhat.com)
  */
 
 #ifndef __ARCH_SPARC64_ATOMIC__
@@ -106,6 +106,8 @@
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
+extern long atomic64_dec_if_positive(atomic64_t *v);
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/arch/sparc/include/asm/backoff.h b/arch/sparc/include/asm/backoff.h
index db3af0d..4e02086 100644
--- a/arch/sparc/include/asm/backoff.h
+++ b/arch/sparc/include/asm/backoff.h
@@ -1,6 +1,46 @@
 #ifndef _SPARC64_BACKOFF_H
 #define _SPARC64_BACKOFF_H
 
+/* The macros in this file implement an exponential backoff facility
+ * for atomic operations.
+ *
+ * When multiple threads compete on an atomic operation, it is
+ * possible for one thread to be continually denied a successful
+ * completion of the compare-and-swap instruction.  Heavily
+ * threaded cpu implementations like Niagara can compound this
+ * problem even further.
+ *
+ * When an atomic operation fails and needs to be retried, we spin a
+ * certain number of times.  At each subsequent failure of the same
+ * operation we double the spin count, realizing an exponential
+ * backoff.
+ *
+ * When we spin, we try to use an operation that will cause the
+ * current cpu strand to block, and therefore make the core fully
+ * available to any other other runnable strands.  There are two
+ * options, based upon cpu capabilities.
+ *
+ * On all cpus prior to SPARC-T4 we do three dummy reads of the
+ * condition code register.  Each read blocks the strand for something
+ * between 40 and 50 cpu cycles.
+ *
+ * For SPARC-T4 and later we have a special "pause" instruction
+ * available.  This is implemented using writes to register %asr27.
+ * The cpu will block the number of cycles written into the register,
+ * unless a disrupting trap happens first.  SPARC-T4 specifically
+ * implements pause with a granularity of 8 cycles.  Each strand has
+ * an internal pause counter which decrements every 8 cycles.  So the
+ * chip shifts the %asr27 value down by 3 bits, and writes the result
+ * into the pause counter.  If a value smaller than 8 is written, the
+ * chip blocks for 1 cycle.
+ *
+ * To achieve the same amount of backoff as the three %ccr reads give
+ * on earlier chips, we shift the backoff value up by 7 bits.  (Three
+ * %ccr reads block for about 128 cycles, 1 << 7 == 128) We write the
+ * whole amount we want to block into the pause register, rather than
+ * loop writing 128 each time.
+ */
+
 #define BACKOFF_LIMIT	(4 * 1024)
 
 #ifdef CONFIG_SMP
@@ -11,16 +51,25 @@
 #define BACKOFF_LABEL(spin_label, continue_label) \
 	spin_label
 
-#define BACKOFF_SPIN(reg, tmp, label)	\
-	mov	reg, tmp; \
-88:	brnz,pt	tmp, 88b; \
-	 sub	tmp, 1, tmp; \
-	set	BACKOFF_LIMIT, tmp; \
-	cmp	reg, tmp; \
-	bg,pn	%xcc, label; \
-	 nop; \
-	ba,pt	%xcc, label; \
-	 sllx	reg, 1, reg;
+#define BACKOFF_SPIN(reg, tmp, label)		\
+	mov		reg, tmp;		\
+88:	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	.section	.pause_3insn_patch,"ax";\
+	.word		88b;			\
+	sllx		tmp, 7, tmp;		\
+	wr		tmp, 0, %asr27;		\
+	clr		tmp;			\
+	.previous;				\
+	brnz,pt		tmp, 88b;		\
+	 sub		tmp, 1, tmp;		\
+	set		BACKOFF_LIMIT, tmp;	\
+	cmp		reg, tmp;		\
+	bg,pn		%xcc, label;		\
+	 nop;					\
+	ba,pt		%xcc, label;		\
+	 sllx		reg, 1, reg;
 
 #else
 
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index cef99fb..830502fe 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -232,9 +232,10 @@
 	struct pt_regs *regs = current_thread_info()->kregs;
 	unsigned long usp = regs->u_regs[UREG_I6];
 
-	if (!(test_thread_flag(TIF_32BIT)))
+	if (test_thread_64bit_stack(usp))
 		usp += STACK_BIAS;
-	else
+
+	if (test_thread_flag(TIF_32BIT))
 		usp &= 0xffffffffUL;
 
 	usp -= len;
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 4e5a483..721e25f 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -196,7 +196,22 @@
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->tpc)
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->u_regs[UREG_FP])
 
-#define cpu_relax()	barrier()
+/* Please see the commentary in asm/backoff.h for a description of
+ * what these instructions are doing and how they have been choosen.
+ * To make a long story short, we are trying to yield the current cpu
+ * strand during busy loops.
+ */
+#define cpu_relax()	asm volatile("\n99:\n\t"			\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     ".section	.pause_3insn_patch,\"ax\"\n\t"\
+				     ".word	99b\n\t"		\
+				     "wr	%%g0, 128, %%asr27\n\t"	\
+				     "nop\n\t"				\
+				     "nop\n\t"				\
+				     ".previous"			\
+				     ::: "memory")
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
  * UltraSPARC-I will treat these as nops, and UltraSPARC-II has
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index c287651..f930031 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -63,5 +63,10 @@
 extern void irq_trans_init(struct device_node *dp);
 extern char *build_path_component(struct device_node *dp);
 
+/* SPARC has a local implementation */
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
+#define of_address_to_resource of_address_to_resource
+
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 4e227663..a3fe4dc 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -259,6 +259,11 @@
 
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 
+#define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0)
+#define test_thread_64bit_stack(__SP) \
+	((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \
+	 false : true)
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h
index 48f2807..71b5a67 100644
--- a/arch/sparc/include/asm/ttable.h
+++ b/arch/sparc/include/asm/ttable.h
@@ -372,7 +372,9 @@
 
 /* Normal 32bit spill */
 #define SPILL_2_GENERIC(ASI)				\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
 	stwa	%l0, [%sp + %g0] ASI;			\
 	mov	0x04, %g3;				\
 	stwa	%l1, [%sp + %g3] ASI;			\
@@ -398,14 +400,16 @@
 	stwa	%i6, [%g1 + %g0] ASI;			\
 	stwa	%i7, [%g1 + %g3] ASI;			\
 	saved;						\
-        retry; nop; nop;				\
+        retry;						\
 	b,a,pt	%xcc, spill_fixup_dax;			\
 	b,a,pt	%xcc, spill_fixup_mna;			\
 	b,a,pt	%xcc, spill_fixup;
 
 #define SPILL_2_GENERIC_ETRAP		\
 etrap_user_spill_32bit:			\
-	srl	%sp, 0, %sp;		\
+	and	%sp, 1, %g3;		\
+	brnz,pn	%g3, etrap_user_spill_64bit;	\
+	 srl	%sp, 0, %sp;		\
 	stwa	%l0, [%sp + 0x00] %asi;	\
 	stwa	%l1, [%sp + 0x04] %asi;	\
 	stwa	%l2, [%sp + 0x08] %asi;	\
@@ -427,7 +431,7 @@
 	ba,pt	%xcc, etrap_save;	\
 	 wrpr	%g1, %cwp;		\
 	nop; nop; nop; nop;		\
-	nop; nop; nop; nop;		\
+	nop; nop;			\
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
 	ba,a,pt	%xcc, etrap_spill_fixup_32bit;
@@ -592,7 +596,9 @@
 
 /* Normal 32bit fill */
 #define FILL_2_GENERIC(ASI)				\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
 	lduwa	[%sp + %g0] ASI, %l0;			\
 	mov	0x04, %g2;				\
 	mov	0x08, %g3;				\
@@ -616,14 +622,16 @@
 	lduwa	[%g1 + %g3] ASI, %i6;			\
 	lduwa	[%g1 + %g5] ASI, %i7;			\
 	restored;					\
-	retry; nop; nop; nop; nop;			\
+	retry; nop; nop;				\
 	b,a,pt	%xcc, fill_fixup_dax;			\
 	b,a,pt	%xcc, fill_fixup_mna;			\
 	b,a,pt	%xcc, fill_fixup;
 
 #define FILL_2_GENERIC_RTRAP				\
 user_rtt_fill_32bit:					\
-	srl	%sp, 0, %sp;				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, user_rtt_fill_64bit;		\
+	 srl	%sp, 0, %sp;				\
 	lduwa	[%sp + 0x00] %asi, %l0;			\
 	lduwa	[%sp + 0x04] %asi, %l1;			\
 	lduwa	[%sp + 0x08] %asi, %l2;			\
@@ -643,7 +651,7 @@
 	ba,pt	%xcc, user_rtt_pre_restore;		\
 	 restored;					\
 	nop; nop; nop; nop; nop;			\
-	nop; nop; nop; nop; nop;			\
+	nop; nop; nop;					\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 8974ef7..cac719d 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -405,8 +405,13 @@
 #define __NR_setns		337
 #define __NR_process_vm_readv	338
 #define __NR_process_vm_writev	339
+#define __NR_kern_features	340
+#define __NR_kcmp		341
 
-#define NR_syscalls		340
+#define NR_syscalls		342
+
+/* Bitmask values returned from kern_features system call.  */
+#define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 0c218e4..cc3c5cb 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -59,6 +59,13 @@
 extern struct popc_6insn_patch_entry __popc_6insn_patch,
 	__popc_6insn_patch_end;
 
+struct pause_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[3];
+};
+extern struct pause_patch_entry __pause_3insn_patch,
+	__pause_3insn_patch_end;
+
 extern void __init per_cpu_patch(void);
 extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
 				    struct sun4v_1insn_patch_entry *);
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index f8b6eee..87f60ee 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -56,11 +56,13 @@
 static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned int eirq;
+	struct irq_bucket *p;
 	int cpu = sparc_leon3_cpuid();
 
 	eirq = leon_eirq_get(cpu);
-	if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
-		generic_handle_irq(irq_map[eirq]->irq);
+	p = irq_map[eirq];
+	if ((eirq & 0x10) && p && p->irq) /* bit4 tells if IRQ happened */
+		generic_handle_irq(p->irq);
 }
 
 /* The extended IRQ controller has been found, this function registers it */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 885a8af..b5c38fa 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1762,15 +1762,25 @@
 
 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
 	do {
-		struct sparc_stackf32 *usf, sf;
 		unsigned long pc;
 
-		usf = (struct sparc_stackf32 *) ufp;
-		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
-			break;
+		if (thread32_stack_is_64bit(ufp)) {
+			struct sparc_stackf *usf, sf;
 
-		pc = sf.callers_pc;
-		ufp = (unsigned long)sf.fp;
+			ufp += STACK_BIAS;
+			usf = (struct sparc_stackf *) ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc & 0xffffffff;
+			ufp = ((unsigned long) sf.fp) & 0xffffffff;
+		} else {
+			struct sparc_stackf32 *usf, sf;
+			usf = (struct sparc_stackf32 *) ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc;
+			ufp = (unsigned long)sf.fp;
+		}
 		perf_callchain_store(entry, pc);
 	} while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d778248..c6e0c29 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -452,13 +452,16 @@
 /* It's a bit more tricky when 64-bit tasks are involved... */
 static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 {
+	bool stack_64bit = test_thread_64bit_stack(psp);
 	unsigned long fp, distance, rval;
 
-	if (!(test_thread_flag(TIF_32BIT))) {
+	if (stack_64bit) {
 		csp += STACK_BIAS;
 		psp += STACK_BIAS;
 		__get_user(fp, &(((struct reg_window __user *)psp)->ins[6]));
 		fp += STACK_BIAS;
+		if (test_thread_flag(TIF_32BIT))
+			fp &= 0xffffffff;
 	} else
 		__get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6]));
 
@@ -472,7 +475,7 @@
 	rval = (csp - distance);
 	if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
 		rval = 0;
-	else if (test_thread_flag(TIF_32BIT)) {
+	else if (!stack_64bit) {
 		if (put_user(((u32)csp),
 			     &(((struct reg_window32 __user *)rval)->ins[6])))
 			rval = 0;
@@ -507,18 +510,18 @@
 
 	flush_user_windows();
 	if ((window = get_thread_wsaved()) != 0) {
-		int winsize = sizeof(struct reg_window);
-		int bias = 0;
-
-		if (test_thread_flag(TIF_32BIT))
-			winsize = sizeof(struct reg_window32);
-		else
-			bias = STACK_BIAS;
-
 		window -= 1;
 		do {
-			unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
 			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp;
+
+			sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
 
 			if (!copy_to_user((char __user *)sp, rwin, winsize)) {
 				shift_window_buffer(window, get_thread_wsaved() - 1, t);
@@ -544,13 +547,6 @@
 {
 	struct thread_info *t = current_thread_info();
 	unsigned long window;
-	int winsize = sizeof(struct reg_window);
-	int bias = 0;
-
-	if (test_thread_flag(TIF_32BIT))
-		winsize = sizeof(struct reg_window32);
-	else
-		bias = STACK_BIAS;
 
 	flush_user_windows();
 	window = get_thread_wsaved();
@@ -558,8 +554,16 @@
 	if (likely(window != 0)) {
 		window -= 1;
 		do {
-			unsigned long sp = (t->rwbuf_stkptrs[window] + bias);
 			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp;
+
+			sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
 
 			if (unlikely(sp & 0x7UL))
 				stack_unaligned(sp);
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 484daba..7ff45e4 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -151,7 +151,7 @@
 {
 	unsigned long rw_addr = regs->u_regs[UREG_I6];
 
-	if (test_tsk_thread_flag(current, TIF_32BIT)) {
+	if (!test_thread_64bit_stack(rw_addr)) {
 		struct reg_window32 win32;
 		int i;
 
@@ -176,7 +176,7 @@
 {
 	unsigned long rw_addr = regs->u_regs[UREG_I6];
 
-	if (test_tsk_thread_flag(current, TIF_32BIT)) {
+	if (!test_thread_64bit_stack(rw_addr)) {
 		struct reg_window32 win32;
 		int i;
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 0800e71..0eaf005 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -316,6 +316,25 @@
 	}
 }
 
+static void __init pause_patch(void)
+{
+	struct pause_patch_entry *p;
+
+	p = &__pause_3insn_patch;
+	while (p < &__pause_3insn_patch_end) {
+		unsigned long i, addr = p->addr;
+
+		for (i = 0; i < 3; i++) {
+			*(unsigned int *) (addr +  (i * 4)) = p->insns[i];
+			wmb();
+			__asm__ __volatile__("flush	%0"
+					     : : "r" (addr +  (i * 4)));
+		}
+
+		p++;
+	}
+}
+
 #ifdef CONFIG_SMP
 void __init boot_cpu_id_too_large(int cpu)
 {
@@ -528,6 +547,8 @@
 
 	if (sparc64_elf_hwcap & AV_SPARC_POPC)
 		popc_patch();
+	if (sparc64_elf_hwcap & AV_SPARC_PAUSE)
+		pause_patch();
 }
 
 void __init setup_arch(char **cmdline_p)
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 11c6c96..878ef3d 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -751,3 +751,8 @@
 		      : "cc");
 	return __res;
 }
+
+asmlinkage long sys_kern_features(void)
+{
+	return KERN_FEATURE_MIXED_MODE_STACK;
+}
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 63402f9..5147f57 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -85,3 +85,4 @@
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 /*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.long sys_ni_syscall, sys_kcmp
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 3a58e0d..1c9af9f 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -86,6 +86,7 @@
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
 	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp
 
 #endif /* CONFIG_COMPAT */
 
@@ -163,3 +164,4 @@
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 	.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index f81d038..8201c25e 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -113,21 +113,24 @@
 
 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	unsigned long value;
+	unsigned long value, fp;
 	
 	if (reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		value = win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		get_user(value, &win32->locals[reg - 16]);
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		get_user(value, &win->locals[reg - 16]);
 	}
 	return value;
@@ -135,19 +138,24 @@
 
 static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
 {
+	unsigned long fp;
+
 	if (reg < 16)
 		return &regs->u_regs[reg];
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 *win32;
-		win32 = (struct reg_window32 *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 *)((unsigned long)((u32)fp));
 		return (unsigned long *)&win32->locals[reg - 16];
 	} else {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
 	}
 }
@@ -392,13 +400,15 @@
 		if (rd)
 			regs->u_regs[rd] = ret;
 	} else {
-		if (test_thread_flag(TIF_32BIT)) {
+		unsigned long fp = regs->u_regs[UREG_FP];
+
+		if (!test_thread_64bit_stack(fp)) {
 			struct reg_window32 __user *win32;
-			win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+			win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 			put_user(ret, &win32->locals[rd - 16]);
 		} else {
 			struct reg_window __user *win;
-			win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+			win = (struct reg_window __user *)(fp + STACK_BIAS);
 			put_user(ret, &win->locals[rd - 16]);
 		}
 	}
@@ -554,7 +564,7 @@
 		reg[0] = 0;
 		if ((insn & 0x780000) == 0x180000)
 			reg[1] = 0;
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
 		put_user(0, (int __user *) reg);
 		if ((insn & 0x780000) == 0x180000)
 			put_user(0, ((int __user *) reg) + 1);
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index 08e074b7..c096c62 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -149,21 +149,24 @@
 
 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
 {
-	unsigned long value;
+	unsigned long value, fp;
 	
 	if (reg < 16)
 		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
 	if (regs->tstate & TSTATE_PRIV) {
 		struct reg_window *win;
-		win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window *)(fp + STACK_BIAS);
 		value = win->locals[reg - 16];
-	} else if (test_thread_flag(TIF_32BIT)) {
+	} else if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		get_user(value, &win32->locals[reg - 16]);
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		get_user(value, &win->locals[reg - 16]);
 	}
 	return value;
@@ -172,16 +175,18 @@
 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
 							  struct pt_regs *regs)
 {
+	unsigned long fp = regs->u_regs[UREG_FP];
+
 	BUG_ON(reg < 16);
 	BUG_ON(regs->tstate & TSTATE_PRIV);
 
-	if (test_thread_flag(TIF_32BIT)) {
+	if (!test_thread_64bit_stack(fp)) {
 		struct reg_window32 __user *win32;
-		win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
 		return (unsigned long __user *)&win32->locals[reg - 16];
 	} else {
 		struct reg_window __user *win;
-		win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
 		return &win->locals[reg - 16];
 	}
 }
@@ -204,7 +209,7 @@
 	} else {
 		unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
 
-		if (test_thread_flag(TIF_32BIT))
+		if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
 			__put_user((u32)val, (u32 __user *)rd_user);
 		else
 			__put_user(val, rd_user);
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 89c2c29..0bacceb 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -132,6 +132,11 @@
 		*(.popc_6insn_patch)
 		__popc_6insn_patch_end = .;
 	}
+	.pause_3insn_patch : {
+		__pause_3insn_patch = .;
+		*(.pause_3insn_patch)
+		__pause_3insn_patch_end = .;
+	}
 	PERCPU_SECTION(SMP_CACHE_BYTES)
 
 	. = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S
index a6b0863..1e67ce9 100644
--- a/arch/sparc/kernel/winfixup.S
+++ b/arch/sparc/kernel/winfixup.S
@@ -43,6 +43,8 @@
 spill_fixup_dax:
 	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx	[%g6 + TI_FLAGS], %g1
+	andcc	%sp, 0x1, %g0
+	movne	%icc, 0, %g1
 	andcc	%g1, _TIF_32BIT, %g0
 	ldub	[%g6 + TI_WSAVED], %g1
 	sll	%g1, 3, %g3
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 4d502da..85c233d 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -1,6 +1,6 @@
 /* atomic.S: These things are too big to do inline.
  *
- * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
  */
 
 #include <linux/linkage.h>
@@ -117,3 +117,17 @@
 	 sub	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 ENDPROC(atomic64_sub_ret)
+
+ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
+	BACKOFF_SETUP(%o2)
+1:	ldx	[%o0], %g1
+	brlez,pn %g1, 3f
+	 sub	%g1, 1, %g7
+	casx	[%o0], %g1, %g7
+	cmp	%g1, %g7
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+3:	retl
+	 sub	%g1, 1, %o0
+2:	BACKOFF_SPIN(%o2, %o3, 1b)
+ENDPROC(atomic64_dec_if_positive)
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index ee31b88..0c4e35e 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -116,6 +116,7 @@
 EXPORT_SYMBOL(atomic64_add_ret);
 EXPORT_SYMBOL(atomic64_sub);
 EXPORT_SYMBOL(atomic64_sub_ret);
+EXPORT_SYMBOL(atomic64_dec_if_positive);
 
 /* Atomic bit operations. */
 EXPORT_SYMBOL(test_and_set_bit);
diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c
index 1704068..034aadb 100644
--- a/arch/sparc/math-emu/math_64.c
+++ b/arch/sparc/math-emu/math_64.c
@@ -320,7 +320,7 @@
 					XR = 0;
 				else if (freg < 16)
 					XR = regs->u_regs[freg];
-				else if (test_thread_flag(TIF_32BIT)) {
+				else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
 					struct reg_window32 __user *win32;
 					flushw_user ();
 					win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8727e9c..72c776f 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -83,9 +83,16 @@
  */
 int platform_get_irq(struct platform_device *dev, unsigned int num)
 {
+#ifdef CONFIG_SPARC
+	/* sparc does not have irqs represented as IORESOURCE_IRQ resources */
+	if (!dev || num >= dev->archdata.num_irqs)
+		return -ENXIO;
+	return dev->archdata.irqs[num];
+#else
 	struct resource *r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 
 	return r ? r->start : -ENXIO;
+#endif
 }
 EXPORT_SYMBOL_GPL(platform_get_irq);
 
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index d055cee..f11d8e3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -47,7 +47,7 @@
 
 config OF_GPIO
 	def_bool y
-	depends on OF && !SPARC
+	depends on OF
 
 config DEBUG_GPIO
 	bool "Debug GPIO calls"
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index b191dd5..71fddbc 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1294,26 +1294,19 @@
 static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
-	const struct of_device_id *match;
-	struct scsi_host_template *tpnt;
 	struct device_node *dp = op->dev.of_node;
 	struct Scsi_Host *host;
 	struct qlogicpti *qpti;
 	static int nqptis;
 	const char *fcode;
 
-	match = of_match_device(qpti_match, &op->dev);
-	if (!match)
-		return -EINVAL;
-	tpnt = match->data;
-
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
 	 */
 	if (op->archdata.irqs[0] == 0)
 		return -ENODEV;
 
-	host = scsi_host_alloc(tpnt, sizeof(struct qlogicpti));
+	host = scsi_host_alloc(&qpti_template, sizeof(struct qlogicpti));
 	if (!host)
 		return -ENOMEM;
 
@@ -1445,19 +1438,15 @@
 static const struct of_device_id qpti_match[] = {
 	{
 		.name = "ptisp",
-		.data = &qpti_template,
 	},
 	{
 		.name = "PTI,ptisp",
-		.data = &qpti_template,
 	},
 	{
 		.name = "QLGC,isp",
-		.data = &qpti_template,
 	},
 	{
 		.name = "SUNW,isp",
-		.data = &qpti_template,
 	},
 	{},
 };
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index a1984dd..e20e3af 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -28,11 +28,13 @@
 #endif
 
 #else /* CONFIG_OF_ADDRESS */
+#ifndef of_address_to_resource
 static inline int of_address_to_resource(struct device_node *dev, int index,
 					 struct resource *r)
 {
 	return -EINVAL;
 }
+#endif
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,