double hummer related fixes and patches

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
diff --git a/arch/powerpc/configs/bgp_defconfig b/arch/powerpc/configs/bgp_defconfig
index 3d317ff..86ef959 100644
--- a/arch/powerpc/configs/bgp_defconfig
+++ b/arch/powerpc/configs/bgp_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.22
-# Mon Nov  3 16:20:50 2008
+# Wed Feb 11 21:13:17 2009
 #
 # CONFIG_PPC64 is not set
 CONFIG_PPC32=y
@@ -830,7 +830,7 @@
 CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
 # CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
+CONFIG_FUSE_FS=y
 
 #
 # CD-ROM/DVD Filesystems
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 2cb1d94..744442c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -88,7 +88,9 @@
 	DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
-
+#ifdef CONFIG_DOUBLE_HUMMER
+	DEFINE(THREAD_SFPR0, offsetof(struct thread_struct, sfpr[0]));
+#endif /* CONFIG_DOUBLE_HUMMER */
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
 	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 821e152..3829223 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -51,6 +51,9 @@
 	toreal(r4)
 	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
 	SAVE_32FPRS(0, r4)
+#ifdef CONFIG_DOUBLE_HUMMER
+        SAVE_32SFPRS(0, r10, r3)
+#endif /* CONFIG_DOUBLE_HUMMER */
 	mffs	fr0
 	stfd	fr0,THREAD_FPSCR(r4)
 	PPC_LL	r5,PT_REGS(r4)
@@ -78,6 +81,9 @@
 	lfd	fr0,THREAD_FPSCR(r5)
 	MTFSF_L(fr0)
 	REST_32FPRS(0, r5)
+#ifdef  CONFIG_DOUBLE_HUMMER
+        REST_32SFPRS(0, r10, r5)
+#endif  /* CONFIG_DOUBLE_HUMMER */
 #ifndef CONFIG_SMP
 	subi	r4,r5,THREAD
 	fromreal(r4)
@@ -107,6 +113,9 @@
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
 	SAVE_32FPRS(0, r3)
+#ifdef CONFIG_DOUBLE_HUMMER
+        SAVE_32SFPRS(0, r10, r3)
+#endif /* CONFIG_DOUBLE_HUMMER */
 	mffs	fr0
 	stfd	fr0,THREAD_FPSCR(r3)
 	beq	1f
diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h
index 8aca5e2..1352c2e 100644
--- a/include/asm-powerpc/ppc_asm.h
+++ b/include/asm-powerpc/ppc_asm.h
@@ -83,23 +83,6 @@
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#if 0
-/* Only a patched toolchain will support the stfpdx/lfpdx instructions */
-#define LFPDX(frt,ra,rb)   .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(462<<1)
-#define STFPDX(frt,ra,rb)  .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(974<<1)
-#define SAVE_DFPR(n,b,base)       li b,THREAD_FPR0+(16*(n)); STFPDX(n,base,b)
-#define SAVE_2DFPRS(n,b,base)     SAVE_DFPR(n,b,base);   SAVE_DFPR(n+1,b,base)
-#define SAVE_4DFPRS(n,b,base)     SAVE_2DFPRS(n,b,base); SAVE_2DFPRS(n+2,b,base)
-#define SAVE_8DFPRS(n,b,base)     SAVE_4DFPRS(n,b,base); SAVE_4DFPRS(n+4,b,base)
-#define SAVE_16DFPRS(n,b,base)    SAVE_8DFPRS(n,b,base); SAVE_8DFPRS(n+8,b,base)
-#define SAVE_32DFPRS(n,b,base)    SAVE_16DFPRS(n,b,base);SAVE_16DFPRS(n+16,b,base)
-#define REST_DFPR(n,b,base)       li b,THREAD_FPR0+(16*(n)); LFPDX(n,base,b)
-#define REST_2DFPRS(n,b,base)     REST_DFPR(n,b,base);   REST_DFPR(n+1,b,base)
-#define REST_4DFPRS(n,b,base)     REST_2DFPRS(n,b,base); REST_2DFPRS(n+2,b,base)
-#define REST_8DFPRS(n,b,base)     REST_4DFPRS(n,b,base); REST_4DFPRS(n+4,b,base)
-#define REST_16DFPRS(n,b,base)    REST_8DFPRS(n,b,base); REST_8DFPRS(n+8,b,base)
-#define REST_32DFPRS(n,b,base)    REST_16DFPRS(n,b,base);REST_16DFPRS(n+16,b,base)
-#endif
 
 #define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*(n)(base)
 #define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
@@ -114,6 +97,30 @@
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
+/* In the case of the Double Hummer as on Blue Gene we must take care of */
+/* the secondary floating point registers.  We reserve space in the thread */
+/* structure similar to the fpr array but called sfpr array */
+/* Only a bg patched toolchain will support the stfsdx/lfsdx instructions */
+/* So we manifest the necessary opcodes by hand with the following macros
+/* these instructions only load and store the secondary floating point registers of */
+/* the double hummer */
+#define LFSDX(frt,ra,rb)   .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(206<<1)
+#define STFSDX(frt,ra,rb)  .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(718<<1)
+
+#define SAVE_SFPR(n,b,base)       li b,THREAD_SFPR0+(8*(n)); STFSDX(n,b,base)
+#define SAVE_2SFPRS(n,b,base)     SAVE_SFPR(n,b,base);   SAVE_SFPR(n+1,b,base)
+#define SAVE_4SFPRS(n,b,base)     SAVE_2SFPRS(n,b,base); SAVE_2SFPRS(n+2,b,base)
+#define SAVE_8SFPRS(n,b,base)     SAVE_4SFPRS(n,b,base); SAVE_4SFPRS(n+4,b,base)
+#define SAVE_16SFPRS(n,b,base)    SAVE_8SFPRS(n,b,base); SAVE_8SFPRS(n+8,b,base)
+#define SAVE_32SFPRS(n,b,base)    SAVE_16SFPRS(n,b,base);SAVE_16SFPRS(n+16,b,base)
+#define REST_SFPR(n,b,base)       li b,THREAD_SFPR0+(8*(n)); LFSDX(n,b,base)
+#define REST_2SFPRS(n,b,base)     REST_SFPR(n,b,base);   REST_SFPR(n+1,b,base)
+#define REST_4SFPRS(n,b,base)     REST_2SFPRS(n,b,base); REST_2SFPRS(n+2,b,base)
+#define REST_8SFPRS(n,b,base)     REST_4SFPRS(n,b,base); REST_4SFPRS(n+4,b,base)
+#define REST_16SFPRS(n,b,base)    REST_8SFPRS(n,b,base); REST_8SFPRS(n+8,b,base)
+#define REST_32SFPRS(n,b,base)    REST_16SFPRS(n,b,base);REST_16SFPRS(n+16,b,base)
+
+
 #define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,b,base
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
 #define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
@@ -140,6 +147,7 @@
 #define REST_16EVRS(n,s,base)	REST_8EVRS(n,s,base); REST_8EVRS(n+8,s,base)
 #define REST_32EVRS(n,s,base)	REST_16EVRS(n,s,base); REST_16EVRS(n+16,s,base)
 
+
 /* Macros to adjust thread priority for hardware multithreading */
 #define HMT_VERY_LOW	or	31,31,31	# very low priority
 #define HMT_LOW		or	1,1,1
diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h
index 0b3ce14..2f39ae6 100644
--- a/include/asm-powerpc/processor.h
+++ b/include/asm-powerpc/processor.h
@@ -166,7 +166,7 @@
 	int		used_spe;	/* set if process has used spe */
 #endif /* CONFIG_SPE */
 #ifdef CONFIG_DOUBLE_HUMMER
-	double		sfpr[32];
+        double          sfpr[32];
 #endif /* CONFIG_DOUBLE_HUMMER */
 };