bluegene: Import unified patch from Argonne National Labs repository

I had trouble merging the Argonne repository patch changesets with
an actual kernel tree so I generated a diff between 2.6.29.1 and
their full tree and then applied it in a single commit.

This patchstream is inclusive of commits up to:
commit 69dc6c8f107ad8bf2e40fb84aaf9f2b5b386a939
Author: T. J. C. Ward <tjcw@tjcwt61.hursley.ibm.com>
Date:   Fri Jan 14 11:27:24 2011 +0000

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
diff --git a/.gitignore b/.gitignore
index 869e1a3..305fa03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,3 +64,5 @@
 *.orig
 *~
 \#*#
+
+arch/powerpc/include/zspi
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 74cc312..dd2f3af 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -510,6 +510,13 @@
 	  some command-line options at build time by entering them here.  In
 	  most cases you will need to specify the root device here.
 
+config  WRAP_COPY_TOFROM_USER
+    bool "C-language wrapper for copy to/from user"
+    default n
+    help
+      Set this if you want to instrument the low-level function which block-copies data 
+      between user-space and kernel-space
+
 config EXTRA_TARGETS
 	string "Additional default image types"
 	help
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 08f7cc0..7bb0db5 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -208,9 +208,16 @@
 	help
 	  Select this to enable early debugging for Celleb with Beat.
 
+config PPC_EARLY_DEBUG_BGP
+	bool "Early debugging for BGP"
+	depends on BLUEGENE
+	help
+	  Select this and BLUEGENE_NOISY_BOOT to enably early debugging for
+	  BGP Node. See arch/powerpc/boot/bgp.c
+		
 config PPC_EARLY_DEBUG_44x
 	bool "Early serial debugging for IBM/AMCC 44x CPUs"
-	depends on 44x
+	depends on 44x && !BLUEGENE
 	help
 	  Select this to enable early debugging for IBM 44x chips via the
 	  inbuilt serial port.  If you enable this, ensure you set
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 72d17f5..fc68dca 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -125,7 +125,9 @@
 KBUILD_CFLAGS		+= -mno-sched-epilog
 endif
 
-cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
+ifndef CONFIG_BGP
+cpu-as-$(CONFIG_4xx)		+= -Wa,-m450
+endif
 cpu-as-$(CONFIG_6xx)		+= -Wa,-maltivec
 cpu-as-$(CONFIG_POWER4)		+= -Wa,-maltivec
 cpu-as-$(CONFIG_E500)		+= -Wa,-me500
@@ -151,6 +153,7 @@
 core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
 core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
+core-$(CONFIG_BGP)      += arch/powerpc/syslib/bgdd/
 
 drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
 
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e84df33..c2dc236 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -39,6 +39,11 @@
 
 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405
 $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
+ifdef CONFIG_PPC_EARLY_DEBUG_BGP
+$(obj)/bgp.o: BOOTCFLAGS += -mcpu=405 -DCONFIG_PPC_EARLY_DEBUG_BGP=y
+else
+$(obj)/bgp.o: BOOTCFLAGS += -mcpu=405
+endif
 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405
@@ -60,7 +65,7 @@
 		gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
 		4xx.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c bamboo.c \
 		cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
-		fsl-soc.c mpc8xx.c pq2.c
+		fsl-soc.c mpc8xx.c pq2.c bgp.c
 src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
 		cuboot-ebony.c treeboot-ebony.c prpmc2800.c \
 		ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
@@ -193,6 +198,7 @@
 image-$(CONFIG_PPC_MAPLE)		+= zImage.pseries
 image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
 image-$(CONFIG_PPC_PS3)			+= dtbImage.ps3
+image-$(CONFIG_BGP)			+= dtbImage.bgp
 image-$(CONFIG_PPC_CELLEB)		+= zImage.pseries
 image-$(CONFIG_PPC_CELL_QPACE)		+= zImage.pseries
 image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
diff --git a/arch/powerpc/boot/bgcns.h b/arch/powerpc/boot/bgcns.h
new file mode 100644
index 0000000..238ad40
--- /dev/null
+++ b/arch/powerpc/boot/bgcns.h
@@ -0,0 +1,1060 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Tom Gooding, IBM
+ */
+
+
+#ifndef _BGCNS_H
+#define _BGCNS_H
+
+
+#ifndef __ASSEMBLY__
+
+/*! @page CNS Common Node Services
+ *
+ *  @section CNS_S10 Overview
+ *
+ *  As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services
+ *  to the kernel.  These services may be simple queries abstracting various node
+ *  specific data (such as DDR size) or they may be more sophisticated software services,
+ *  such as common machine check handling.  Additionally, some services may be implicit,
+ *  such as the initialization of various hardware devices unique to Blue Gene, such as
+ *  Netbus and SerDes.
+ *
+ *  Services are not directly linked into the kernel, but rather are invoked from kernel
+ *  code via a <b>service directory</b> which is itself part of an overall <b>service
+ *  descriptor</b>.  This service descriptor is constructed during initialization and
+ *  is passed to the kernel when the kernel is booted.  The service directory is a
+ *  collection of <b>service references</b>.
+ *
+ *  During partition (block) booting, ELF images are loaded onto the compute and I/O nodes.
+ *  The bootloader (@i aka microloader) boots first and then transfers control to the Common
+ *  Node Services layer so that it, in turn, may boot.
+ *
+ *  Once the CNS layer has booted, control is transferred to the kernel so that it may also
+ *  boot.  All services provided by the CNS layer are immediately available at this time.
+ *
+ *  @section CNS_S20 Programming Model
+ *
+ *  A kernel running on top of the CNS layer is not statically linked to the common services.
+ *  Instead, the services are called via function pointers provided by the services descriptor,
+ *  which is described here:  @ref _BGCNS_ServiceDirectory.
+ *
+ *  The kernel must operate under the following rules and restrictions:
+ *  @li The kernel must not alter the services descriptor.  The descriptor must be treated as a read-only
+ *      data structure even though the kernel may have the ability to alter it.  Because CNS trusts the
+ *      kernel, this also implies that the kernel must not expose the descriptor to any untrusted
+ *      software (such as application code).
+ *  @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any
+ *      service.
+ *  @li The kernel must ensure that any data passed to services via parameters is mapped.
+ *      Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable
+ *      (input parameters) or readable and writeable (output parameters).
+ *  @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress ,
+ *      _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1)  as reserved.
+ *      That is, the kernel must not use this region of virtual memory for anything besides accessing
+ *      the services descriptor.
+ *  @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress,
+ *      _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved.  The
+ *      kernel must not map this memory for any other use.
+ *  @li The kernel must not access any of the reserved virtual address regions with TLB settings that
+ *      are different from those used by CNS.  The kernel is allowed to unmap any of the reserved
+ *      memory TLBs for its own use.  However, in such a case and per the rule above, the kernel must
+ *      ensure that the region is mapped prior to using any CNS facilities (such as invoking a service).
+ *  @li CNS may need to map one or more TLB entries in order to access Blue Gene devices.  In such a case,
+ *      CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns
+ *      control to the invoking kernel.  Kernels may avoid this behavior for specific devices by using
+ *      the mapDevice service.
+ *  @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF.  This
+ *      region is used for the pre-relocated CNS image and will be available for general use once CNS boot
+ *      is complete.
+ *  @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers.
+ *
+ *  The CNS software may behave unpredictably if any of these rules and restrictions is violated.
+ *
+ *  Kernels may make the following assumptions about CNS:
+ *
+ *  @li The data passed in the firmware descriptor (see below) is static.  Specifically, the base addresses,
+ *      size and service directory will not change once CNS boot is complete.
+ *
+ *  @subsection CNS_21 Programming Examples
+ *
+ *  @subsubsection CNS_211 Obtaining the Personality
+ *
+ *  The following example shows how to fetch a copy of the Blue Gene personality structure and also
+ *  serves as a simple example of invoking a service:
+ *
+ *  @code
+ *
+ *      BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *     _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)();
+ *     ...
+ *  @endcode
+ *
+ *  The programming model guarantees that the descriptor is static.  Thus, one can provide a
+ *  convenience method to make service invocation a little more readable
+ *
+ *  @code
+ *
+ *
+ *  static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time
+ *
+ *  inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; }
+ *
+ *  void foo() {
+ *     _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData();
+ *     ...
+ *  }
+ *
+ *  @endcode
+ *
+ *  This style will be used in all of the subsequent examples.
+ *
+ *  @subsubsection CNS_212 SMP Initialization
+ *
+ *  Common Node Services will launch the kernel on a single core (typically core 0) and will
+ *  leave the remaining cores parked.  The kernel can activate additional cores via the @c takeCPU
+ *  service.  Here is a very simple example of such kernel code:
+ *
+ *  @code
+ *
+ *    void anEntryPoint(unsigned core, void* arg_not_used) {
+ *        // Do whatever your kernel needs to do here.  Typically,
+ *        // this function never returns.  You will arrive here
+ *        // when takeCPU is invoked (below).
+ *    }
+ *
+ *    void someCodeOnTheMainThread() {
+ *
+ *        // ...
+ *
+ *        unsigned N = cns()->getNumberOfCores();
+ *
+ *        for (core = 1; core < N; core++) {
+ *            if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) {
+ *                // error handling goes here
+ *            }
+ *        }
+ *
+ *        // ...
+ *    }
+ *
+ *  @endcode
+ *
+ *  @subsubsection CNS_213 Version Compatibility
+ *
+ *  Common Node Services structures and APIs should remain compatible within maintenance
+ *  releases and e-fixes.  Kernel's may add a runtime check to ensure that the version
+ *  of CNS is compatible with the version from compile time.  This is done as follows:
+ *
+ *  @code
+ *
+ *      BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *
+ *      if ( ! BGCNS_IS_COMPATIBLE(descr) ) {
+ *           // incompatible CNS (panic?)
+ *      }
+ *
+ *  @endcode
+ *
+ *  @subsubsection CNS_23 Interrupts
+ *
+ *  A kernel wanting to use the CNS interrupt services would first have to enable interrupts
+ *  for the appropriate Blue Gene BIC group and IRQ within that group.  This would likely be
+ *  done at boot time.  So, for example, such a kernel could enable interrupts for the Universal
+ *  Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as
+ *  follows:
+ *
+ *  @code
+ *      cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0);
+ *  @endcode
+ *
+ *  Such a kernel might also maintain a collection of routines that act as subhandlers of the
+ *  non-critical interrupt handler.  In this example, we'll assume it is simply a two
+ *  dimensional array indexed by group and IRQ:
+ *
+ *  @code
+ *      subhandlers[5][2] = &theUpcSubHandler;
+ *  @endcode
+ *
+ *  That kernel's non-critical interrupt handler would then typically handle all interrupts by
+ *  successively invoking the getInterrupt() service to determine the group and IRQ, and then
+ *  dispatching the appropriate subhandler.  Additionally, the interrupt will be acknowledged
+ *  so to avoid continuous interruption:
+ *
+ *  @code
+ *      unsigned grp, irq;
+ *
+ *      while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) {
+ *          (*subhandlers[g][i])(); // dispatch the handler
+ *          cns()->acknowledgeInterrupt(g,i); // ack the interrupt
+ *      }
+ *  @endcode
+ *
+ *  @subsubsection CNS_24 Global Barriers and Interrupts
+ *
+ *  The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels
+ *  that may be configured as either a global barrier or a global interrupt.
+ *
+ *  Barriers are constructed by invoking the barrier service:
+ *
+ *  @code
+ *      unsigned channel = 0;
+ *
+ *      // synchronize:
+ *      int reset = 1;
+ *      int rc;
+ *      while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ *        reset = 0;
+ *      }
+ *
+ *      if ( rc == BGCNS_RC_COMPLETE ) {
+ *        // good path
+ *      }
+ *      else {
+ *        // error
+ *      }
+ *  @endcode
+ *
+ *  Similarly, a barrier with a timeout can also be constructed:
+ *
+ *  @code
+ *      unsigned channel = 0;
+ *      int reset = 1;
+ *      unsigned long long startTime = ...; // obtain current time
+ *      int rc;
+ *
+ *      while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ *         reset = 0;
+ *         unsigned long long currentTime = ...; // obtain current time
+ *         if ( currentTime - startTime > timeout )
+ *           break;
+ *      }
+ *
+ *      if ( rc == BGCNS_RC_COMPLETE )  {
+ *        // good path
+ *      }
+ *      else {
+ *        // timeout or error
+ *      }
+ *  @endcode
+ *
+ *  A node may opt out of a barrier channel via the disableBarrier service:
+ *
+ *  @code
+ *
+ *    // some other synchronization mechanism needs to go here
+ *
+ *    cns()->disableBarrier(channel);
+ *
+ *  @endcode
+ *
+ *  Conversely, it may opt back in:
+ *
+ *  @code
+ *    cns()->enableBarrier(channel, user_mode);
+ *  @endcode
+ *
+ *  By default, CNS reserves the use of channel 2 as a global interrupt for environmental
+ *  monitoring.  It also reserves channel 3 for use as a supervisory mode, compute-node
+ *  only barrier.  Compute node kernels are free to share this channel for the same
+ *  purpose (compute node, supervisory barrier).  The enable/disable barrier services
+ *  may return errors if operating on a reserved channel.
+ *
+ *  NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node
+ *  Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an
+ *  compute-node only barrier when jobs execute.
+ *
+ *
+ *  @section CNS_3 DMA Services
+ *
+ *  The DMA services provided in CNS are low-level services.  Interested readers of this area should
+ *  also look at the documentation for the DMA SPIs, which are at a slightly higher level.
+ *
+ *
+ *
+ *  @section CNS_4 Reserved and Preferred Addresses
+ *
+ *
+ *  The following virtual memory regions are reserved and must be avoided by
+ *  kernels:
+ *
+ *  @code
+ *
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | Lower      | Upper      | Size | Usage                | Attributes            |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | CNSlow[1]  | CNShigh[2] | 256K | CNS                  | I, Rs, Ws, Xs         |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *
+ *    [1] CNSlow  = descr->baseVirtualAddress , usually 0xFFF40000
+ *    [2] CNShigh = descr->baseVirtualAddress + descr->size - 1;  usually 0xFFF7FFFF
+ *
+ *  @endcode
+ *
+ *  The following virtual memory regions are used by default in CNS.  Kernels that wish to have
+ *  a different memory map may do so via the mapDevice service.
+ *
+ *  @code
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | Lower      | Upper      | Size | Usage                | Attributes            |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFB0000 | 0xFFFCFFFF |  64K | Torus                | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFD0000 | 0xFFFD3FFF |  16K | DMA                  | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFD9000 | 0xFFFD9FFF |   4K | DevBus               | I, G, Rs, Ws          |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDA000 | 0xFFFDAFFF |   4K | UPC                  | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDC000 | 0xFFFDD3FF |   4K | Collective           | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDE000 | 0xFFFDEFFF |   4K | BIC                  | I, G, Rs, Ws, Xs      |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF0000 | 0xFFFF3FFF |  16K | Lockbox (supervisor) | I, G, Rs, Ws          |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF4000 | 0xFFFF7FFF |  16K | Lockbox (user)       | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF8000 | 0xFFFFFFFF |  32K | SRAM                 | SWOA, WL1, Rs, Ws, Xs |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  @endcode
+ *
+ */
+
+
+#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */
+#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS
+
+/* ! @enum  BGCNS_InterruptType */
+/* ! @brief Defines the different types of interrupts known to */
+/* !        Common Node Services. */
+typedef enum  {
+    BGCNS_NonCritical,     //!< Non-critical interrupt
+    BGCNS_Critical,        //!< Critical interrupt
+    BGCNS_MachineCheck,    //!< Machine check
+} BGCNS_InterruptType;
+
+/* ! @enum   BGCNS_FifoOperation */
+/* ! @brief  Defines the types of FIFO operations */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaFifoControls */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaLocalCopies */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaPriority */
+typedef enum {
+    BGCNS_Disable = 0,
+    BGCNS_Enable = 1,
+    BGCNS_Reenable = 2
+} BGCNS_FifoOperation;
+
+/* ! @enum BGCNS_FifoFacility */
+/* ! @brief Defines the various types of FIFO facilities */
+typedef enum {
+    BGCNS_InjectionFifo,                 //!< Normal Injection FIFO
+    BGCNS_ReceptionFifo,                 //!< Normal Reception FIFO
+    BGCNS_ReceptionHeaderFifo,           //!< Reception Header FIFO (typically used only for debugging)
+    BGCNS_InjectionFifoInterrupt,
+    BGCNS_ReceptionFifoInterrupt,
+    BGCNS_ReceptionHeaderFifoInterrupt,
+    BGCNS_InjectionCounterInterrupt,
+    BGCNS_ReceptionCounterInterrupt
+} BGCNS_FifoFacility;
+
+/* ! @enum  BGCNS_LinkType */
+/* ! @brief Defines the types of MAC links. */
+/* ! @see   _BGCNS_ServiceDirectory::macTestLink */
+typedef enum {
+    BGCNS_Transmitter,  //!< A transmitter link.
+    BGCNS_Receiver      //!< A receiver link.
+} BGCNS_LinkType;
+
+/* ! @enum  BGCNS_EnvmonParameter */
+/* ! @brief Enumerates the various environmental monitor parameters. */
+/* ! @see   _BGCNS_ServiceDirectory::getEnvmonParm */
+/* ! @see   _BGCNS_ServiceDirectory::setEnvmonParm */
+typedef enum {
+    BGCNS_envmon_period  = 0,
+    BGCNS_envmon_policy,
+    BGCNS_envmon_globintwire,
+
+     /*  temporary */
+    BGCNS_envmon_duration,
+    BGCNS_envmon_ddrratio,
+    BGCNS_envmon_numparms
+} BGCNS_EnvmonParameter;
+
+
+#define BGCNS_RC_COMPLETE  0         //!< Indicates that the operation completed normally.
+#define BGCNS_RC_CONTINUE  1         //!< Indicates that the operation is still in progress.
+#define BGCNS_RC_TIMEOUT  -1         //!< Indicates that the operation timed out.
+#define BGCNS_RC_ERROR    -2         //!< Indicates that the operation failed.
+
+#define BGCNS_NUM_DMA_RECEPTION_GROUPS           4
+#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP  8
+
+/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */
+/* !     The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */
+/* !     from the DMA packet. */
+/* ! */
+/* !     The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */
+/* !     and local copy. */
+typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP];
+
+/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */
+/* ! @see   _BGCNS_ServiceDirectory::enableInterrupt */
+#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu
+
+
+/* ! @enum   BGCNS_DeviceMasks */
+/* ! @brief  Provides a list of masks for various Blue Gene devices */
+
+typedef enum {
+    BGCNS_SRAM       = 0x80000000u,
+    BGCNS_BIC        = 0x40000000u,
+    BGCNS_Torus      = 0x20000000u,
+    BGCNS_DevBus     = 0x10000000u,
+    BGCNS_XEMAC      = 0x08000000u,
+    BGCNS_LockBox    = 0x04000000u,
+    BGCNS_Collective = 0x02000000u,
+    BGCNS_SRAM_Err   = 0x01000000u,
+    BGCNS_DMA        = 0x00800000u,
+    BGCNS_UPC        = 0x00400000u
+} BGCNS_DeviceMasks;
+
+/* ! @typedef BGCNS_ServiceDirectory */
+/* ! @struct  _BGCNS_ServiceDirectory */
+/* ! @brief   The service directory is a collection of function pointers to services */
+/* !          provided by the Common Node Services. */
+typedef struct _BGCNS_ServiceDirectory {
+
+    /*------------------------------------------*/
+    /*--- Informational services for the node --*/
+    /*------------------------------------------*/
+
+
+    int (*isIONode)(void);                             //!< Returns 1 if this is an I/O node; 0 if not.
+
+
+    /*-----------------------------------------------------------------*/
+    /*--- Informational services for obtaining Raw personality data ---*/
+    /*-----------------------------------------------------------------*/
+
+    unsigned int (*getPersonalitySize)(void);           //!< Returns the size (in bytes) of the Blue Gene personality.
+    void* (*getPersonalityData)(void);		      //!< Returns a pointer to the raw personality data.
+
+
+    /*-----------------------------------------------*/
+    /*--- Services for Symmetric Multi-Processing ---*/
+    /*-----------------------------------------------*/
+
+
+    unsigned (*getNumberOfCores)(void);                  //!< Returns the number of CPUs on this node.
+
+     /* ! @brief Called by the kernel to activate a CPU. */
+     /* ! @param[in] cpu The index of the cpu (core) to be activated. */
+     /* ! @param[in] entry The (kernel) entry point function.  This function will be invoked when */
+     /* !            the CPU is actually activated. */
+     /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */
+     /* ! @return Zero (0) if the CPU was succsessfully activated.  Non-zero if the CPU was not */
+     /* !            activated (e.g. invalid cpu argument, or the cpu has already been */
+     /* !            activated). */
+     /* ! @remarks   See Section x of the Common Node Services overview for details. */
+    int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+
+    /*--------------------------------------*/
+    /*--- Services for Blue Gene devices ---*/
+    /*--------------------------------------*/
+
+     /* ! @brief  Checks active devices for a clean termination state and returns 0 */
+     /* !         if everything is nominal.  Returns non-zero if any anomaly is */
+     /* !         detected and logs violations. */
+     /* ! @param[in] job_rc specifies the return code of the job that is terminating. */
+    int (*terminationCheck)(int job_rc);
+
+    /*-------------------------------*/
+    /*--- Services for interrupts ---*/
+    /*-------------------------------*/
+
+
+     /* ! @brief Enables the specified interrupt.  For all interrupts except inter-processor */
+     /* !        interrupts, the interrupt will bendled by the specified core. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @param[in] itype Specifies the type of interrupt that hardware will present */
+     /* !            for this group/irq. */
+     /* ! @param[in] core Specifies which core will handle the interrupt.  If specified as */
+     /* !            BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */
+     /* ! @return    Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+    int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core);
+
+     /* ! @brief Disables the specified interrupt. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @return    Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+    int (*disableInterrupt)(unsigned group, unsigned irq);
+
+     /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */
+     /* !        type and returns the group/IRQ.  This service is typically used in the */
+     /* !        context of an interrupt handler.  Since multiple interrupt conditions */
+     /* !        may be present, the service is typically invoked from the handler */
+     /* !        (along with corresponding acknowledgement) until the return code */
+     /* !        indicates that no more interrupts are present. */
+     /* ! @param[out] group Specifies the Blue Gene interrupt group.  The value is valid */
+     /* !        only when the return code is 0. */
+     /* ! @param[out] irq  Specifies the interrupt index within the group.  The value is */
+     /* !        valid only when the reutrn code is zero. */
+     /* ! @param[in] itype Specifies the type of interrupt being queried. */
+     /* ! @return Returns zero (0) if an interrupt condition of the specified type exists.  Returns -1 */
+     /* !        if no such condition exists. */
+    int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq);
+
+     /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */
+     /* !       condition in the interrupt controller hardware. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @return    Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+     /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */
+     /* !       the interrupt; the hardware condition that triggered the interrupt may */
+     /* !       also need to be cleared. */
+    int (*acknowledgeInterrupt)(unsigned group, unsigned irq);
+
+     /* ! @brief Raises the specified interrupt. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+    int (*raiseInterrupt)(unsigned group, unsigned irq);
+
+
+    /*------------------------*/
+    /*--- Mailbox services ---*/
+    /*------------------------*/
+
+    unsigned (*getMailboxMaximumConsoleInputSize)(void);   //!< Returns the actual maximum console message input data size.
+    unsigned (*getMailboxMaximumConsoleOutputSize)(void);  //!< Returns the actual maximum console message output data size.
+
+     /* ! @brief Writes a text message to the output mailbox. */
+     /* ! @param[in] msg a pointer to the message to be written. */
+     /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+     /* ! @remarks As with all common services, the message data area must be mapped via */
+     /* !          the TLB when the service is called.  The behavior is not defined if this */
+     /* !          is not the case. */
+     /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+     /*            wrong (including a message that is too large). */
+    int (*writeToMailboxConsole)(char *msg, unsigned msglen);
+
+     /* ! @brief Writes a text message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] msg a pointer to the message to be written. */
+     /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+     /* ! @remarks As with all common services, the message data area must be mapped via */
+     /* !          the TLB when the service is called.  The behavior is not defined if this */
+     /* !          is not the case. */
+     /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+     /*            wrong (including a message that is too large). */
+    int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen);
+
+     /* ! @brief Tests the outbox to see if the last message was picked up by the control */
+     /* !        system. */
+     /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */
+     /* ! @remarks Typically the caller will invoke this service after having called */
+     /* !        writeToMailboxConsole_nonBlocking and will then invoke this service in a */
+     /* !        loop until zero is returned. */
+    int (*testForOutboxCompletion)(void);
+
+     /* ! @brief Reads a message from the input mail box. */
+     /* ! @param msg a pointer to a data area into which the message will be placed. */
+     /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */
+     /* !        that may be safely received into the buffer. */
+     /* ! @return The actual length of the message (0 if no message was receieved). */
+     /* ! @remarks As with all common services, the message data area must be mapped */
+     /* !          via the TLB when this service is called.  The results are not defined if */
+     /* !          this is not the case. */
+    unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize);
+
+    int (*testInboxAttention)(void);                      //!< Returns 1 if something is available in the input mailbox.
+
+    int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use.
+
+    int (*writeToMailbox)(void* message, unsigned length, unsigned cmd);
+
+    /*------------------------------------*/
+    /*---  RAS and diagnostic services ---*/
+    /*------------------------------------*/
+
+     /* ! @brief TBD */
+    void (*machineCheck)(void *regs);
+
+     /* ! @brief Writes a RAS event to the log. */
+     /* ! @param[in] facility The facility (aka component). */
+     /* ! @param[in] unit The unit (aka subcomponent). */
+     /* ! @param[in] err_code The error code. */
+     /* ! @param[in] numDetails The number of additional details. */
+     /* ! @param[in] details The list of additional details. */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+     /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+    int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+     /* ! @brief Writes a RAS string to the log. */
+     /* ! @param[in] facility The facility (aka component). */
+     /* ! @param[in] unit The unit (aka subcomponent). */
+     /* ! @param[in] err_code The error code. */
+     /* ! @param[in] str The message string being written (ASCII encoded, null-terminated).  Note that the length of this string is */
+     /* !     limited to _BGP_RAS_ASCII_MAX_LEN characters.  The implementation may choose to truncate the string if it exceeds this */
+     /* !     length. */
+     /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+     /* !      string was truncated). */
+     /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+    int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+
+    /*---------------------------------*/
+    /*--- Global Interrupt services ---*/
+    /*---------------------------------*/
+
+     /* ! @brief A global (compute node) barrier.  This call will block until all other compute nodes */
+     /* !        in the partition also arrive at the barrier. */
+    int (*globalBarrier)(void);
+
+     /* ! @brief  A global (compute node) barrier.  This call will block until all other compute nodes */
+     /* !         in the partition also arrive at the barrier or until the timeout is reached. */
+     /* ! @param  timeoutInMillis specifies the timeout duration.  Units are milliseconds. */
+     /* ! @return BGCNS_RC_COMPLETE if the barrier completed.  BGCNS_RC_TIMEOUT if the barrier timed */
+     /* !         out.  BGCNS_RC_ERROR if some other error occurred. */
+    int (*globalBarrierWithTimeout)(unsigned timeoutInMillis);
+
+
+
+    /*-------------------------*/
+    /*---  Network services ---*/
+    /*-------------------------*/
+
+
+    void (*initializeNetworks)(void);  //!< @todo Is this is going away??? Talk to Andy
+
+    void (*_no_longer_in_use_381)(void); //!< @warning Do not use
+
+    void (*_no_longer_in_use_384)(void);//!< @warning Do not use
+
+
+    /*--------------------------*/
+    /*---  DMA unit services ---*/
+    /*--------------------------*/
+
+#define BGCNS_DMA_CAPTURE_X_PLUS         0   //!< watch the X+ receiver
+#define BGCNS_DMA_CAPTURE_X_MINUS        1   //!< watch the X- receiver
+#define BGCNS_DMA_CAPTURE_Y_PLUS         2   //!< watch the Y+ receiver
+#define BGCNS_DMA_CAPTURE_Y_MINUS        3   //!< watch the Y- receiver
+#define BGCNS_DMA_CAPTURE_Z_PLUS         4   //!< watch the Z+ receiver
+#define BGCNS_DMA_CAPTURE_Z_MINUS        5   //!< watch the Z- receiver
+#define BGCNS_DMA_CAPTURE_DISABLE        7   //!< disable link capturing
+
+     /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */
+     /* !        receiver (or disable). */
+     /* ! @param[in] link Specifies the link being monitored.  Use the BGCNS_DMA_CAPTURE_* */
+     /* !        mnemonics defined above. */
+     /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */
+     /* !        link was specified). */
+    int (*setDmaLinkCapture)(int link);
+
+     /* ! @brief Clears the link capture unit so that another packet can be captured. */
+    void (*clearDmaLinkCapture)(void);
+
+#define BGCNS_RC_DMA_NO_PACKET_CAPTURED      0
+#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR     -1
+#define BGCNS_RC_DMA_DATA_CONFLICT          -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured
+#define BGCNS_RC_DMA_DATA_CONFLICT2         -3 //!< if bad packet is captured, but all the bytes are the same
+     /* ! @brief Reads the DMA link capture packets. */
+    int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size);
+
+
+#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF
+
+     /* ! @brief Sets FIFO controls for the DMA unit. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */
+     /* ! The FIFOs are organized into four groups of 32.  The mask argument is a bit mask (bit i controls the i-th imFIFO */
+     /* ! within that group, that is the (group*32)+i imFIFO. */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */
+     /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */
+     /* ! group.  The mask argument is ignored.  Note that the header FIFO is typically used for debugging. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */
+     /* ! specified injection FIFO.  Threshold interrupts occur if available space is less than the configured */
+     /* ! threshold when the FIFO is used for a remote get operation.  The group and mask arguments are as */
+     /* ! described in the BGCNS_InjectionFifo operation (above). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */
+     /* ! reception FIFO(s).  If enabled, an interrupt will occur when the reception FIFO's available space drops */
+     /* ! below the configured threshold.  The group argument selects the interrupt type (type 0, 1, 2 or 3). */
+     /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */
+     /* ! reception header FIFO.  Reception header FIFOs are used for debug purposes only. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */
+     /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3.  The mask */
+     /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */
+     /* ! are partitioned into 32 subgroups of 8 counters). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */
+     /* ! for reception counters.  The group and mask arguments are the as as described in the the */
+     /* ! BGCNS_InjectionCounterInterrupt operation (above). */
+     /* ! */
+     /* ! The buffer argument is used as a means to save/restore in an opaque manner.  This is achieved by passing */
+     /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */
+     /* ! operation (the buffer is used to snapshot state). */
+     /* ! */
+     /* ! */
+     /* ! @code */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | Facility                        | group     | mask    | Notes | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionFifo             | 0..3      | 32 bits | [1]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionFifo             | n/a       | 32 bits | [2]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionHeaderFifo       | 0..3, ALL | N/A     |       | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionFifoInterrupt    | 0..3      | 32 bits | [1]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionFifoInterrupt    | 0..3      | 32 bits | [3]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionCounterInterrupt | 0..3      | 32 bits | [3][4]| */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionCounterInterrupt | 0..3      | 32 bits | [3][4]| */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* ! */
+     /* !     [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */
+     /* !     [2] There are 32 normal reception FIFOs in BG/P. */
+     /* !     [3] There are 4 interrupt lines.  The group argument selects one these 4. */
+     /* !     [4] There are 256 counters of each type (injection and reception).  The */
+     /* !         32-bit mask partitions them into groups of 8. */
+     /* ! */
+     /* ! @endcode */
+     /* ! */
+     /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */
+     /* ! @param[in] facility defines the type of FIFO being configured. */
+     /* ! @param[in] group is interpreted differently based on the facility. */
+     /* ! @param[in] mask is interpreted differently based on the facility. */
+     /* ! @param[out] buffer is interpreted differently based on the operation and facility.  It is generally used to capture */
+     /* !   a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored).  It is */
+     /* !   generally used as the value to be loaded in a re-enable operation.  In this manner, a state value captured by an enable */
+     /* !   operation may be easily restored by a subsequent re-enable operation.  The buffer argument is generally ignored by */
+     /* !   disable operations. */
+    int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer);
+
+     /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */
+     /* !   given group). */
+     /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */
+     /* !   physical torus injection FIFOs are mapped.  Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */
+     /* !   group 1.  Bits 3 and 7 are the high priority FIFOs. */
+     /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */
+     /* ! @return Zero if the map was properly set.  Non-zero if it was not, including the case of illegal arguments. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */
+
+    int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos);
+
+     /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs.  A local copy injection FIFO */
+     /* !   can be used to perform memory copies within a node via the DMA engine. */
+     /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs.  The BGCNS_Reenable */
+     /* !   operation is not supported. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+     /* ! @return Zero if the operation succeeded; non-zero if it did not. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3. */
+    int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+     /* ! @brief Enables or disables the priority bit for the specified injection FIFOs.  The priority bit */
+     /* !   is used by the hardware arbitration (details are not further documented here). */
+     /* ! @param[in] operation specifies whether priority bits are being set or cleared. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3. */
+    int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+     /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs.  The hardware supports */
+     /* !   8 torus FIFOs (six torus dimensions plus high priority plus local copy).  Furthermore, the hardware supports */
+     /* !   4 groups as derived from the PID0 and PID1 bits of the DMA packet.  Thus the mapping is a 4 x 8 matrix of */
+     /* !   reception FIFO ids. */
+     /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */
+     /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold).  For BGP, */
+     /* !   N=2 (there are 32 normal reception FIFOs). */
+     /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold).  For */
+     /* !   BGP, N=4 (there are 4 reception header FIFOs).  Note that reception header FIFOs are typically only used for debugging purposes. */
+     /* ! @param[in] threshold is an array of N threshold values.  The value threshold[i] specifies the threshold value for reception */
+     /* !   FIFO type i.  If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */
+     /* !   below its threshold, an interrupt is driven.  For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */
+    int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]);
+
+     /* ! @brief Gets the reception map. */
+     /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */
+    int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]);
+
+
+     /* ! @deprecated */
+    int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void);
+
+
+     /* ! @brief Resets the MAC unit's PHY. */
+     /* ! @return Zero if the unit was properly reset.  Returns non-zero if some error occurred. */
+     /* ! @deprecated See macResetPHY_nonBlocking. */
+    int (*macResetPHY)(void);
+
+     /* ! @brief Tests the MAC unit's link. */
+     /* ! @param[in] link_type specifies the type of link to be tested. */
+     /* ! @return One (1) if the link is active; zero (0) if it is not. */
+     /* ! @deprecated See macTestLink_nonBlocking */
+    int (*macTestLink)(BGCNS_LinkType link_type);
+
+     /* ! @brief Reads one of the MAC's XGMII registers. */
+     /* ! @param[in] device_address */
+     /* ! @param[in] port_address */
+     /* ! @param[in] register_address */
+     /* ! @return The register's value or a negative number if some error occurred. */
+     /* ! @deprecated Low level MAC register access is being eliminated. */
+    int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address);
+
+     /* ! @brief Writes one of the MAC's XGMII registers. */
+     /* ! @param[in] device_address */
+     /* ! @param[in] port_address */
+     /* ! @param[in] register_address */
+     /* ! @param[in] value */
+     /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */
+     /* ! @deprecated Low level MAC register access is being eliminated. */
+    int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value);
+
+
+     /* ! @brief Trains SerDes in a non-blocking manner.  The standard usage is to inititate */
+     /* !      training with trainSerDes(1), check the return code, and then continue to invoke */
+     /* !      trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */
+     /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */
+     /* !      continuations. */
+     /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */
+     /* !      the service again (with reset=0).  BGCNS_RC_COMPLETE if training is complete. */
+     /* !      BGCNS_ERROR if some error has occurred. */
+    int (*trainSerDes)(int reset);
+
+     /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */
+     /* ! @param[in] parameter Parameter to retrieve.  Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+     /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */
+     /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+    int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value);
+
+     /* ! @brief Stores a value to the specified control parameter of the environmental monitor */
+     /* ! @param[in] parameter Parameter to store.  Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+     /* ! @param[in] value New value for the parameter */
+     /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+    int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value);
+
+     /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */
+     /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */
+     /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */
+     /* !            time the operating system needs to call performEnvMgmt.  Failure to do so may result in poorly performing */
+     /* !            nodes or shutdown of the block / rack. */
+    int (*performEnvMgmt)(unsigned long long* nextCallbackTime);
+
+
+     /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] facility The facility (aka component).  See bgp_ras.h for a list of facilities. */
+     /* ! @param[in] unit The unit (aka subcomponent).  See bgp_ras.h for a list of units. */
+     /* ! @param[in] err_code The error code.  See bgp_ras.h for a list of error code.s */
+     /* ! @param[in] numDetails The number of additional details. */
+     /* ! @param[in] details The list of additional details. */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+    int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+     /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] facility The facility (aka component).  See bgp_ras.h for a list of facilities. */
+     /* ! @param[in] unit The unit (aka subcomponent).  See bgp_ras.h for a list of units. */
+     /* ! @param[in] err_code The error code.  See bgp_ras.h for a list of error code.s */
+     /* ! @param[in] str The message string being written (ASCII encoded, null-terminated).  Note that the length of this string is */
+     /* !     limited to _BGP_RAS_ASCII_MAX_LEN characters.  The implementation may choose to truncate the string if it exceeds this */
+     /* !     length. */
+     /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+     /* !      string was truncated). */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+    int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+     /* ! @brief Sets the core's timebase registers to the specified value. */
+     /* ! @param[in] newtime The new 64-bit timebase */
+     /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */
+     /* ! @deprecated */
+    int (*synchronizeTimebase)(unsigned long long newtime);
+
+     /* ! @brief Sets the node's DMA physical protection settings. */
+     /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */
+     /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */
+    int (*dmaSetRange)(unsigned numreadranges,  unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr,
+			 unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr);
+
+     /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */
+     /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */
+     /* ! @return Zero if successful, non-zero if some error condition occurred. */
+    int (*statusCheck)(unsigned clear_error_counts);
+
+     /* ! @brief Stops the DMA and clears any reception unit failure */
+    int (*stopDma)(void);
+
+     /* ! @brief Starts the DMA */
+    int (*startDma)(void);
+
+     /* ! @brief Performs a hard exit.  The status code is provided to the control system. */
+     /* ! @return This service never returns. */
+    void (*exit)(int rc);
+
+     /* ! @brief Resets the MAC unit's PHY but does not block. */
+     /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+     /* !     reset sequence.  That is, callers should initiate a reset sequence with reset=1 and then */
+     /* !     if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */
+     /* !     reset=0. */
+     /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+     /* !     before returning.  If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+     /* !     to indicate that it needs additional time. */
+     /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset.  BGCNS_RC_CONTINUE if the reset operation is */
+     /* !     not yet complete.  BGCNS_RC_ERROR if the reset operation failed. */
+    int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis);
+
+     /* ! @brief Tests the MAC unit's link but does not block. */
+     /* ! @param[in] link_type specifies the type of link to be tested. */
+     /* ! @param[out] result points to the link status, which is valid only when the return code is */
+     /* !     BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+     /* !     indicates that it is inactive. */
+     /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+     /* !     test link sequence.  That is, callers should initiate a sequence with reset=1 and then */
+     /* !     if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+     /* !     reset=0. */
+     /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+     /* !     before returning.  If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+     /* !     to indicate that it needs additional time. */
+     /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+     /* !     if the reset operation is not yet complete.  BGCNS_RC_ERROR if the reset operation failed. */
+    int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+    void * _not_in_use_1068;
+    void * _not_in_use_1069;
+
+
+     /* ! @brief Indicates that a new job is about to start. */
+     /* ! @return Zero (0) if CNS is ready for a new job to start.  Returns non-zero otherwise. */
+    int (*startNextJob)(void);
+
+     /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */
+     /* !     given device.  When a device is remapped, CNS will no longer make any attempt to map */
+     /* !     a TLB to access that device -- it is the responsibility of the kernel to handle the */
+     /* !     TLB either  proactively or reactively (via a fault). */
+     /* ! @param[in] device specifies the device being mapped. */
+     /* ! @param[in] base_address is the root virtual address of the device.  The address should be */
+     /* !     naturally aligned (relative to the size of the device).  See the seciton Reserved and */
+     /* !     Preferred Addresses for more information. */
+     /* ! @return Zero (0) if the device was successfully remapped.  Returns non-zero if it was not. */
+     /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */
+     /* !    possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */
+     /* !    (that is, takeCPU has been called for all cores). */
+    int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address);
+
+     /* ! @brief Enables barriers on the specified channel. */
+     /* ! @param channel specifies the channel being enabled. */
+     /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */
+     /* ! @return Zero if global barriers were enabled.  Returns non-zero if the request could not be */
+     /* !        completed, including the case of attempting to enable a reserved channel. */
+    int (*enableBarrier)(unsigned int channel, int user_mode);
+
+     /* ! @brief Disables barriers on the specified channel. */
+     /* ! @return Zero if global barriers were disabled.  Returnsnon-zero if the request could not be */
+     /* !        completed, including the case of attempting to disable a reserved channel. */
+    int (*disableBarrier)(unsigned int channel);
+
+     /* ! @brief A global barrier that does not block indefinitely. */
+     /* ! @param channel indicates the GLINT hardware channel to use. */
+     /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */
+     /* !   sequence.  That is, caller should inititate a barrier operation by passing reset=1 and then, */
+     /* !   if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */
+     /* !   reset=0. */
+     /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */
+     /* !   to wait for barrier participants before returning to the caller. */
+     /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier.  BGCNS_RC_CONTINUE */
+     /* !   indicates that not all partipants arrived within the alloted timeout period.  BGCNS_RC_ERROR */
+     /* !   indicates that other problem has been detected. */
+     /* ! @remarks This service is not thread safe.  It is considered a programming error to invoke it */
+     /* !   from multiple threads concurrently and the behavior is not defined. */
+    int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis);
+
+     /* ! @brief Restart kernel in cycle reproducibility mode. */
+     /* ! @return Zero if no restart was required for reproducibility. */
+     /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */
+     /* !   Processors will be reset and kernels will start again. */
+    int (*setupReproducibility)(void);
+
+} BGCNS_ServiceDirectory;
+
+/* ! @deprecated */
+/* ! @typedef BGCNS_DeprecatedServicesDirectory */
+/* ! @struct  _BGCNS_DeprecatedServices */
+/* ! @brief   These services exist for historical reasons and are not further documented here. */
+/* !          They may not be available in future releases of CNS. */
+typedef struct _BGCNS_DeprecatedServices {
+    int (*torusTermCheck)(int* nonFatalRc);
+    int (*torusLinkErrCheck)(int* nonFatalRc);
+    int (*torusCRCExchange)(void);
+    int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier);
+    int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier);
+    unsigned (*collectiveGetClass)(unsigned virtualTree);
+    int (*collectiveInit)(void);
+    int (*collectiveRelease)(void);
+    int (*collectiveHardReset)(void);
+    int (*netbusTermCheck)(void);
+    unsigned (*getSerDesLinkStatus)(void);
+    int  (*dmaTermCheck)(void);
+} BGCNS_DeprecatedServicesDirectory;
+
+/* ! @typedef BGCNS_Descriptor */
+/* ! @struct  _BGCNS_Descriptor */
+/* ! @brief  The Common Node Services descriptor.  This descriptor provides information to the kernel regarding */
+/* !         the CNS memory region as well as a service directory.  The descriptor is passed to the kernel */
+/* !         upon boot and must not be altered by the kernel. */
+typedef struct _BGCNS_Descriptor {
+    BGCNS_ServiceDirectory* services;         //!< A pointer to the services directory.
+    unsigned baseVirtualAddress;	      //!< The virtual address of the beginning of the CNS memory region.
+    unsigned size;			      //!< The size (in bytes) of the CNS memory region.
+    unsigned basePhysicalAddress;             //!< The physical address of the CNS memory region.
+    unsigned basePhysicalAddressERPN;         //!< The extended real page number of the CNS memory region.
+    unsigned bgcns_private_in_use;            //!< Undefined.  This field is for internal use only and may disappear at any time.
+    BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented
+    unsigned version;                         //!< The CNS version
+} BGCNS_Descriptor;
+
+
+
+#endif /* !__ASSEMBLY */
+#endif /* _BGCNS_H */
diff --git a/arch/powerpc/boot/bgp.c b/arch/powerpc/boot/bgp.c
new file mode 100644
index 0000000..ff88da2
--- /dev/null
+++ b/arch/powerpc/boot/bgp.c
@@ -0,0 +1,179 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ *   David Gibson, IBM Corporation, 2007
+ *
+ */
+#include "types.h"
+#include "ops.h"
+#include "stdio.h"
+#include "4xx.h"
+#include "44x.h"
+#include "bgcns.h"
+/* Types needed for the personality */
+typedef u8  uint8_t;
+typedef u16 uint16_t;
+typedef u32 uint32_t;
+#include "bgp_personality.h"
+
+/* Blue Gene firmware jumps to 0x10.
+ * Simply branch to _zimage_start which is typically 0x800000.
+ * Must also link with --section-start bgstart=0
+ */
+asm (
+"       .section bgstart, \"ax\";		"
+"       .=0x10;					"
+"       lis     %r9, _zimage_start@h;		"
+"       ori	%r9, %r9, _zimage_start@l;	"
+"       mtlr    %r9;				"
+"       blr;					"
+"	.previous				"
+);
+
+/* This will point directly to CNS which remains mapped on entry. */
+BGCNS_Descriptor* cns;
+
+//static void bgp_console_write(const char *msg, int len) __attribute__((unused)) ;
+
+static void bgp_console_write(const char *msg, int len)
+{
+	if (cns)
+		cns->services->writeToMailboxConsole((char *)msg, len);
+}
+
+static void bgp_fixup_bluegene_cns(BGCNS_Descriptor *cns)
+{
+	void *node = finddevice("/ibm,bluegene/cns");
+	if (node) {
+		setprop_val(node, "base-va", cns->baseVirtualAddress);
+		setprop_val(node, "base-pa", cns->basePhysicalAddress);
+		setprop_val(node, "size", cns->size);
+		setprop_val(node, "services", cns->services);
+		setprop_val(node, "version", cns->version);
+	} else {
+		fatal("could not find /ibm,bluegene/cns node in device tree");
+	}
+}
+
+static void bgp_fixup_bluegene_personality(BGP_Personality_t *bgpers)
+{
+	void *node = finddevice("/ibm,bluegene/personality");
+	if (node) {
+		/* We could include individual fields of the personality as needed
+		 * so that Linux doesn't need to decode the struct directly.  We
+		 * provide raw-data for external tools and daemons.
+		 * This can replace /proc/personality
+		 */
+		unsigned frequency = bgpers->Kernel_Config.FreqMHz * 1000000;
+		setprop(node, "raw-data", bgpers, sizeof(*bgpers));
+		setprop_val(node, "frequency", frequency);
+	} else {
+		fatal("could not find /ibm,bluegene/personality node in device tree");
+	}
+}
+
+static void bgp_fixup_bluegene_initrd(void)
+{
+	void *node = finddevice("/chosen");
+	if (node) {
+		/* On Blue Gene we may have a gzipped ramdisk loaded at a fixed
+		 * address (0x1000000).  It is preceeded by a 4-byte magic value and a
+		 * 4-byte big endian length.
+		 */
+		unsigned *rd = (unsigned *)0x1000000;	/* 16M */
+
+		if (rd[0] == 0xf0e1d2c3 && rd[1] != 0) {
+			unsigned initrd_start = (unsigned)(rd+2);
+			unsigned initrd_len = rd[1];
+			unsigned initrd_end = initrd_start + initrd_len;
+			setprop_val(node, "linux,initrd-start", initrd_start);
+			setprop_val(node, "linux,initrd-end", initrd_end);
+		}
+	} else {
+		fatal("could not find chosen node in device tree");
+	}
+}
+
+static void bgp_fixups(void)
+{
+	BGP_Personality_t *bgpers = cns->services->getPersonalityData();
+	unsigned int DDRSize = (bgpers->DDR_Config.DDRSizeMB << 20) - cns->size;
+	unsigned int freq = bgpers->Kernel_Config.FreqMHz * 1000000;
+
+/* For vRNIC configurations, turn down the memory that Linux thinks is on the node so the vRNIC can map it all */
+	if ( (DDRSize & 0xf0000000 ) == 0xd0000000 ) DDRSize = 0xb0000000 ;
+
+        dt_fixup_memory(0, DDRSize);
+        dt_fixup_cpu_clocks(freq, freq, freq);
+
+	bgp_fixup_bluegene_cns(cns);
+	bgp_fixup_bluegene_personality(bgpers);
+	bgp_fixup_bluegene_initrd();
+
+#if 0
+	 /*  FIXME: sysclk should be derived by reading the FPGA registers */
+	unsigned long sysclk = 33000000;
+
+	ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
+	ibm4xx_sdram_fixup_memsize();
+	dt_fixup_mac_address_by_alias("ethernet0", ebony_mac0);
+	dt_fixup_mac_address_by_alias("ethernet1", ebony_mac1);
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	ebony_flashsel_fixup();
+#endif
+}
+
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	cns = (BGCNS_Descriptor*) r3;
+#if defined(CONFIG_PPC_EARLY_DEBUG_BGP)
+	{
+	    BGP_Personality_t bgpers;
+	    
+	    /* ZXXX: only rank0 prints msg */
+	    if( cns ) {
+		memcpy(&bgpers, cns->services->getPersonalityData(), sizeof(bgpers) );
+
+		if( bgpers.Network_Config.Rank == 0 ) { 
+		    console_ops.write = bgp_console_write;
+		} else {
+		    console_ops.write = 0;
+		}
+	    }
+	}
+#endif
+
+	simple_alloc_init(_end, 256 << 20, 32, 64);
+
+	platform_ops.fixups = bgp_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+
+/*	serial_console_init(); */
+}
diff --git a/arch/powerpc/boot/bgp_personality.h b/arch/powerpc/boot/bgp_personality.h
new file mode 100644
index 0000000..37c9161
--- /dev/null
+++ b/arch/powerpc/boot/bgp_personality.h
@@ -0,0 +1,1086 @@
+/*
+ * Andrew Tauferner
+ *
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef	BGP_PERSONALITY_H_ // Prevent multiple inclusion
+#define	BGP_PERSONALITY_H_
+
+
+
+
+/* #include <linux/types.h> */
+
+// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields
+//  b = IBM bit number of the least significant bit (highest number)
+//  x = value to set in field
+//  s = size
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b)    ((1<<(31-(b))))
+#define _B1(b,x)  (((x)&0x1)<<(31-(b)))
+#define _B2(b,x)  (((x)&0x3)<<(31-(b)))
+#define _B3(b,x)  (((x)&0x7)<<(31-(b)))
+#define _B4(b,x)  (((x)&0xF)<<(31-(b)))
+#define _B5(b,x)  (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x)  (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x)  (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x)  (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x)  (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#define BGP_UCI_Component_Rack              ( 0)
+#define BGP_UCI_Component_Midplane          ( 1)
+#define BGP_UCI_Component_BulkPowerSupply   ( 2)
+#define BGP_UCI_Component_PowerCable        ( 3)
+#define BGP_UCI_Component_PowerModule       ( 4)
+#define BGP_UCI_Component_ClockCard         ( 5)
+#define BGP_UCI_Component_FanAssembly       ( 6)
+#define BGP_UCI_Component_Fan               ( 7)
+#define BGP_UCI_Component_ServiceCard       ( 8)
+#define BGP_UCI_Component_LinkCard          ( 9)
+#define BGP_UCI_Component_LinkChip          (10)
+#define BGP_UCI_Component_LinkPort          (11)  // Identifies 1 end of a LinkCable
+#define BGP_UCI_Component_NodeCard          (12)
+#define BGP_UCI_Component_ComputeCard       (13)
+#define BGP_UCI_Component_IOCard            (14)
+#define BGP_UCI_Component_DDRChip           (15)
+#define BGP_UCI_Component_ENetConnector     (16)
+
+typedef struct BGP_UCI_Rack_t
+                {                           // "Rxy": R<RackRow><RackColumn>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Rack
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_Rack_t;
+
+#define BGP_UCI_RACK_COMPONENT(x)              _B5( 4,x)  // when BGP_UCI_Component_Rack
+#define BGP_UCI_RACK_RACKROW(x)                _B4( 8,x)  // 0..F
+#define BGP_UCI_RACK_RACKCOLUMN(x)             _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_Midplane_t
+                {                           // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Midplane
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned _zero       : 18;  // zero's
+                }
+                BGP_UCI_Midplane_t;
+
+#define BGP_UCI_MIDPLANE_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_Midplane
+#define BGP_UCI_MIDPLANE_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_MIDPLANE_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_MIDPLANE_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+
+
+typedef struct BGP_UCI_BulkPowerSupply_t
+                {                           // "Rxy-B": R<RackRow><RackColumn>-B
+                unsigned Component   :  5;  // when BGP_UCI_Component_BulkPowerSupply
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_BulkPowerSupply_t;
+
+#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x)   _B5( 4,x)  // when BGP_UCI_Component_BulkPowerSupply
+#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x)     _B4( 8,x)  // 0..F
+#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x)  _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_PowerCable_t
+                {                           // "Rxy-B-C": R<RackRow><RackColumn>-B-C
+                unsigned Component   :  5;  // when BGP_UCI_Component_PowerCable
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_PowerCable_t;
+
+#define BGP_UCI_POWERCABLE_COMPONENT(x)        _B5( 4,x)  // when BGP_UCI_Component_PowerCable
+#define BGP_UCI_POWERCABLE_RACKROW(x)          _B4( 8,x)  // 0..F
+#define BGP_UCI_POWERCABLE_RACKCOLUMN(x)       _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_PowerModule_t
+                {                           // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule>
+                unsigned Component   :  5;  // when BGP_UCI_Component_PowerModule
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned PowerModule :  3;  // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+                unsigned _zero       : 16;  // zero's
+                }
+                BGP_UCI_PowerModule_t;
+
+#define BGP_UCI_POWERMODULE_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_POWERMODULE_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_POWERMODULE_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_POWERMODULE_POWERMODULE(x)     _B3(15,x)  // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+
+
+typedef struct BGP_UCI_ClockCard_t
+                {                           // "Rxy-K": R<RackRow><RackColumn>-K
+                unsigned Component   :  5;  // when BGP_UCI_Component_ClockCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_ClockCard_t;
+
+#define BGP_UCI_CLOCKCARD_COMPONENT(x)         _B5( 4,x)  // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_CLOCKCARD_RACKROW(x)           _B4( 8,x)  // 0..F
+#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x)        _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_FanAssembly_t
+                {                           // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>
+                unsigned Component   :  5;  // when BGP_UCI_Component_FanAssembly
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned FanAssembly :  4;  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+                unsigned _zero       : 14;  // zero's
+                }
+                BGP_UCI_FanAssembly_t;
+
+#define BGP_UCI_FANASSEMBLY_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_FanAssembly
+#define BGP_UCI_FANASSEMBLY_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_FANASSEMBLY_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x)     _B4(17,x)  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+
+
+
+typedef struct BGP_UCI_Fan_t
+                {                           // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Fan
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned FanAssembly :  4;  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+                unsigned Fan         :  2;  // 0..2 (0=Tailstock, 2=Midplane)
+                unsigned _zero       : 12;  // zero's
+                }
+                BGP_UCI_Fan_t;
+
+#define BGP_UCI_FAN_COMPONENT(x)               _B5( 4,x)  // when BGP_UCI_Component_Fan
+#define BGP_UCI_FAN_RACKROW(x)                 _B4( 8,x)  // 0..F
+#define BGP_UCI_FAN_RACKCOLUMN(x)              _B4(12,x)  // 0..F
+#define BGP_UCI_FAN_MIDPLANE(x)                _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_FAN_FANASSEMBLY(x)             _B4(17,x)  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+#define BGP_UCI_FAN_FAN(x)                     _B2(19,x)  // 0..2 (0=Tailstock, 2=Midplane)
+
+typedef struct BGP_UCI_ServiceCard_t
+                {                           // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S
+                unsigned Component   :  5;  // when BGP_UCI_Component_ServiceCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top (Master ServiceCard in M0)
+                unsigned _zero       : 18;  // zero's
+                }
+                BGP_UCI_ServiceCard_t;
+
+#define BGP_UCI_SERVICECARD_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_ServiceCard
+#define BGP_UCI_SERVICECARD_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_SERVICECARD_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_SERVICECARD_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top (Master ServiceCard in M0)
+
+
+
+typedef struct BGP_UCI_LinkCard_t
+                {                           // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned _zero       : 16;  // zero's
+                }
+                BGP_UCI_LinkCard_t;
+
+#define BGP_UCI_LINKCARD_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkCard
+#define BGP_UCI_LINKCARD_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKCARD_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKCARD_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCARD_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+
+
+
+typedef struct BGP_UCI_LinkChip_t
+                {                           // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkChip
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned LinkChip    :  3;  // 00..05: left to right from Front
+                unsigned _zero       : 13;  // zero's
+                }
+                BGP_UCI_LinkChip_t;
+
+#define BGP_UCI_LINKCHIP_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkChip
+#define BGP_UCI_LINKCHIP_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKCHIP_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKCHIP_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCHIP_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKCHIP_LINKCHIP(x)           _B3(18,x)  // 00..05: left to right from Front
+
+typedef struct BGP_UCI_LinkPort_t
+                {                           // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkPort
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned LinkPort    :  4;  // 00..15: left to right from Front
+                unsigned _zero       : 12;  // zero's
+                }
+                BGP_UCI_LinkPort_t;
+
+#define BGP_UCI_LINKPORT_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkPort
+#define BGP_UCI_LINKPORT_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKPORT_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKPORT_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKPORT_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKPORT_LINKPORT(x)           _B4(19,x)  // 00..15: left to right from Front
+
+
+typedef struct BGP_UCI_NodeCard_t
+                {                           // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_NodeCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned _zero       : 14;  // zero's
+                }
+                BGP_UCI_NodeCard_t;
+
+#define BGP_UCI_NODECARD_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_NodeCard
+#define BGP_UCI_NODECARD_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_NODECARD_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_NODECARD_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_NODECARD_NODECARD(x)           _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+
+
+
+typedef struct BGP_UCI_ComputeCard_t
+                {                           // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_ComputeCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned _zero       :  8;  // zero's
+                }
+                BGP_UCI_ComputeCard_t;
+
+#define BGP_UCI_COMPUTECARD_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_ComputeCard
+#define BGP_UCI_COMPUTECARD_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_COMPUTECARD_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_COMPUTECARD_NODECARD(x)        _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_COMPUTECARD_COMPUTECARD(x)     _B6(23,x)  // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+typedef struct BGP_UCI_IOCard_t
+                {                           // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_IOCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned _zero       :  8;  // zero's
+                }
+                BGP_UCI_IOCard_t;
+
+#define BGP_UCI_IOCARD_COMPONENT(x)            _B5( 4,x)  // when BGP_UCI_Component_IOCard
+#define BGP_UCI_IOCARD_RACKROW(x)              _B4( 8,x)  // 0..F
+#define BGP_UCI_IOCARD_RACKCOLUMN(x)           _B4(12,x)  // 0..F
+#define BGP_UCI_IOCARD_MIDPLANE(x)             _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_IOCARD_NODECARD(x)             _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_IOCARD_COMPUTECARD(x)          _B6(23,x)  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+
+typedef struct BGP_UCI_DDRChip_t
+                {                           // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip>
+                unsigned Component   :  5;  // when BGP_UCI_Component_DDRChip
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned DDRChip     :  5;  // 00..20
+                unsigned _zero       :  3;  // zero's
+                }
+                BGP_UCI_DDRChip_t;
+
+#define BGP_UCI_DDRCHIP_COMPONENT(x)           _B5( 4,x)  // when BGP_UCI_Component_DDRChip
+#define BGP_UCI_DDRCHIP_RACKROW(x)             _B4( 8,x)  // 0..F
+#define BGP_UCI_DDRCHIP_RACKCOLUMN(x)          _B4(12,x)  // 0..F
+#define BGP_UCI_DDRCHIP_MIDPLANE(x)            _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_DDRCHIP_NODECARD(x)            _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_DDRCHIP_COMPUTECARD(x)         _B6(23,x)  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+#define BGP_UCI_DDRCHIP_DDRCHIP(x)             _B5(28,x)  // 00..20
+
+
+typedef struct BGP_UCI_ENetConnector_t
+                {                           // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN>
+                unsigned Component   :  5;  // when BGP_UCI_Component_ENetConnector
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned EN          :  1;  // 0..1 (Equal to IOCard number)
+                unsigned _zero       : 13;  // zero's
+                }
+                BGP_UCI_ENetConnector_t;
+
+#define BGP_UCI_ENETCONNECTOR_COMPONENT(x)     _B5( 4,x)  // when BGP_UCI_Component_ENetConnector
+#define BGP_UCI_ENETCONNECTOR_RACKROW(x)       _B4( 8,x)  // 0..F
+#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x)    _B4(12,x)  // 0..F
+#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x)      _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_ENETCONNECTOR_NODECARD(x)      _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x)  // 0..1 (Equal to IOCard number)
+
+
+
+typedef union  TBGP_UniversalComponentIdentifier
+                {
+                uint32_t                   UCI;
+                BGP_UCI_Rack_t            Rack;
+                BGP_UCI_Midplane_t        Midplane;
+                BGP_UCI_BulkPowerSupply_t BulkPowerSupply;
+                BGP_UCI_PowerCable_t      PowerCable;
+                BGP_UCI_PowerModule_t     PowerModule;
+                BGP_UCI_ClockCard_t       ClockCard;
+                BGP_UCI_FanAssembly_t     FanAssembly;
+                BGP_UCI_Fan_t             Fan;
+                BGP_UCI_ServiceCard_t     ServiceCard;
+                BGP_UCI_LinkCard_t        LinkCard;
+                BGP_UCI_LinkChip_t        LinkChip;
+                BGP_UCI_LinkPort_t        LinkPort;
+                BGP_UCI_NodeCard_t        NodeCard;
+                BGP_UCI_ComputeCard_t     ComputeCard;
+                BGP_UCI_IOCard_t          IOCard;
+                BGP_UCI_DDRChip_t         DDRChip;
+                BGP_UCI_ENetConnector_t   ENetConnector;
+                }
+                BGP_UniversalComponentIdentifier;
+
+
+
+#define BGP_PERSONALITY_VERSION (0x0A)
+
+#define BGP_DEFAULT_FREQ (850) 
+
+#define BGP_PERS_PROCESSCONFIG_DIAGS      (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0
+#define BGP_PERS_PROCESSCONFIG_SMP        (0x0F000000) // All Cores Enabled User-Space in Process 0
+#define BGP_PERS_PROCESSCONFIG_VNM        (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes)
+#define BGP_PERS_PROCESSCONFIG_2x2        (0x0C030000) // 2 Processes of 2 Cores each in same DP unit
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_3PLUS1     (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process
+#define BGP_PERS_PROCESSCONFIG_DEFAULT    (BGP_PERS_PROCESSCONFIG_DIAGS)
+
+
+// Personality.Kernel_Config.RASPolicy
+#define BGP_PERS_RASPOLICY_VERBOSITY(x)   _B2( 1,x)  // Verbosity as shown below
+#define BGP_PERS_RASPOLICY_MINIMAL          BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_NORMAL           BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_VERBOSE          BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics
+#define BGP_PERS_RASPOLICY_EXTREME          BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1
+#define BGP_PERS_RASPOLICY_FATALEXIT      _BN( 2)   // Fatal is Fatal, so exit.
+
+#define BGP_PERS_RASPOLICY_DEFAULT        (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes
+
+#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes
+
+// Personality.NodeConfig Driver Enables and Configurations
+#define BGP_PERS_ENABLE_Simulation      _BN( 0)  // Running on VHDL Simulation
+#define BGP_PERS_ENABLE_LockBox         _BN( 1)
+#define BGP_PERS_ENABLE_BIC             _BN( 2)
+#define BGP_PERS_ENABLE_DDR             _BN( 3)  // DDR Controllers (not Fusion DDR model)
+#define BGP_PERS_ENABLE_LoopBack        _BN( 4)  // LoopBack: Internal TS/TR or SerDes Loopback
+#define BGP_PERS_ENABLE_GlobalInts      _BN( 5)
+#define BGP_PERS_ENABLE_Collective      _BN( 6)  // Enable Collective Network
+#define BGP_PERS_ENABLE_Torus           _BN( 7)
+#define BGP_PERS_ENABLE_TorusMeshX      _BN( 8)  // Torus is a Mesh in the X-dimension
+#define BGP_PERS_ENABLE_TorusMeshY      _BN( 9)  // Torus is a Mesh in the Y-dimension
+#define BGP_PERS_ENABLE_TorusMeshZ      _BN(10)  // Torus is a Mesh in the Z-dimension
+#define BGP_PERS_ENABLE_TreeA           _BN(11)  // Enable Collective Network A-link
+#define BGP_PERS_ENABLE_TreeB           _BN(12)  // Enable Collective Network B-link
+#define BGP_PERS_ENABLE_TreeC           _BN(13)  // Enable Collective Network C-link
+#define BGP_PERS_ENABLE_DMA             _BN(14)
+#define BGP_PERS_ENABLE_SerDes          _BN(15)
+#define BGP_PERS_ENABLE_UPC             _BN(16)
+#define BGP_PERS_ENABLE_EnvMon          _BN(17)
+#define BGP_PERS_ENABLE_Ethernet        _BN(18)
+#define BGP_PERS_ENABLE_JTagLoader      _BN(19)  // Converse with JTag Host to load kernel
+#define BGP_PERS_ENABLE_MailBoxReceive  BGP_PERS_ENABLE_JTagLoader
+#define BGP_PERS_ENABLE_PowerSave       _BN(20)  // Turn off unused devices (Eth on CN, TS on ION)
+#define BGP_PERS_ENABLE_FPU             _BN(21)  // Enable Double-Hummers (not supported in EventSim)
+#define BGP_PERS_ENABLE_StandAlone      _BN(22)  // Disable "CIOD" interface, Requires Collective!
+#define BGP_PERS_ENABLE_TLBMisses       _BN(23)  // TLB Misses vs Wasting Memory (see bgp_AppSetup.c)
+#define BGP_PERS_ENABLE_Mambo           _BN(24)  // Running under Mambo? Used by Linux
+#define BGP_PERS_ENABLE_TreeBlast       _BN(25)  // Enable Tree "Blast" mode
+#define BGP_PERS_ENABLE_BlindStacks     _BN(26)  // For "XB" Tests, Lock 16K Stacks in Blind Device
+#define BGP_PERS_ENABLE_CNK_Malloc      _BN(27)  // Enable Malloc Support in CNK.
+#define BGP_PERS_ENABLE_Reproducibility _BN(28)  // Enable Cycle Reproducibility
+#define BGP_PERS_ENABLE_HighThroughput  _BN(29)  // Enable high throughput computing mode
+#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30)  // Enable diagnostics mode
+
+// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back)
+//  This overrides most L1, L2, and Snoop settings. Carefull!!
+#define BGP_PERS_ENABLE_BGLMODE      _BN(31)  // (not yet fully implemented)
+
+// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave
+#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation  |\
+                                      BGP_PERS_ENABLE_LockBox     |\
+                                      BGP_PERS_ENABLE_BIC         |\
+                                      BGP_PERS_ENABLE_DDR         |\
+                                      BGP_PERS_ENABLE_LoopBack    |\
+                                      BGP_PERS_ENABLE_GlobalInts  |\
+                                      BGP_PERS_ENABLE_Collective  |\
+                                      BGP_PERS_ENABLE_Torus       |\
+                                      BGP_PERS_ENABLE_UPC         |\
+                                      BGP_PERS_ENABLE_EnvMon      |\
+                                      BGP_PERS_ENABLE_FPU         |\
+                                      BGP_PERS_ENABLE_StandAlone)
+
+// Default Setup for Hardware:
+//     Supports Stand-Alone CNA Applications.
+//     Bootloader-Extensions and XB's must turn-off JTagLoader
+#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader  |\
+                                                   BGP_PERS_ENABLE_LockBox     |\
+                                                   BGP_PERS_ENABLE_BIC         |\
+                                                   BGP_PERS_ENABLE_DDR         |\
+                                                   BGP_PERS_ENABLE_GlobalInts  |\
+                                                   BGP_PERS_ENABLE_Collective  |\
+                                                   BGP_PERS_ENABLE_SerDes      |\
+                                                   BGP_PERS_ENABLE_UPC         |\
+                                                   BGP_PERS_ENABLE_EnvMon      |\
+                                                   BGP_PERS_ENABLE_FPU         |\
+                                                   BGP_PERS_ENABLE_StandAlone)
+
+// these fields are defined by the control system depending on compute/io node
+//                                                   BGP_PERS_ENABLE_Torus       |
+//                                                   BGP_PERS_ENABLE_TorusMeshX  |
+//                                                   BGP_PERS_ENABLE_TorusMeshY  |
+//                                                   BGP_PERS_ENABLE_TorusMeshZ  |
+
+
+
+// Personality.L1Config: Controls and Settings for L1 Cache
+#define BGP_PERS_L1CONFIG_L1I          _BN( 0)    // L1 Enabled for Instructions
+#define BGP_PERS_L1CONFIG_L1D          _BN( 1)    // L1 Enabled for Data
+#define BGP_PERS_L1CONFIG_L1SWOA       _BN( 2)    // L1 Store WithOut Allocate
+#define BGP_PERS_L1CONFIG_L1Recovery   _BN( 3)    // L1 Full Recovery Mode
+#define BGP_PERS_L1CONFIG_L1WriteThru  _BN( 4)    // L1 Write-Thru (not svc_host changeable (yet?))
+#define BGP_PERS_L1CONFIG_DO_L1ITrans  _BN( 5)    // Enable L1 Instructions Transient?
+#define BGP_PERS_L1CONFIG_DO_L1DTrans  _BN( 6)    // Enable L1 Data         Transient?
+                                                   // unused 9bits: 7..15
+#define BGP_PERS_L1CONFIG_L1ITrans(x)  _B8(23,x)  // L1 Transient for Instructions in Groups of 16 Lines
+#define BGP_PERS_L1CONFIG_L1DTrans(x)  _B8(31,x)  // L1 Transient for Data         in Groups of 16 Lines
+
+#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I         |\
+                                    BGP_PERS_L1CONFIG_L1D         |\
+                                    BGP_PERS_L1CONFIG_L1SWOA      |\
+				    BGP_PERS_L1CONFIG_L1Recovery  |\
+                                    BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union TBGP_Pers_L1Cfg
+               {
+               uint32_t l1cfg;
+               struct {
+                      unsigned l1i         :  1;
+                      unsigned l1d         :  1;
+                      unsigned l1swoa      :  1;
+                      unsigned l1recovery  :  1;
+                      unsigned l1writethru :  1;
+                      unsigned do_l1itrans :  1;
+                      unsigned do_l1dtrans :  1;
+                      unsigned l1rsvd      :  9;
+                      unsigned l1itrans    :  8;
+                      unsigned l1dtrans    :  8;
+                      };
+               }
+               BGP_Pers_L1Cfg;
+
+// Personality.L2Config: Controls and Settings for L2 and Snoop
+#define BGP_PERS_L2CONFIG_L2I                _BN( 0)  // L2 Instruction Caching Enabled
+#define BGP_PERS_L2CONFIG_L2D                _BN( 1)  // L2 Data        Caching Enabled
+#define BGP_PERS_L2CONFIG_L2PF               _BN( 2)  // L2 Automatic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFO              _BN( 3)  // L2 Optimistic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFA              _BN( 4)  // L2 Aggressive Prefetching Enabled (fewer deeper streams)
+#define BGP_PERS_L2CONFIG_L2PFS              _BN( 5)  // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers)
+#define BGP_PERS_L2CONFIG_Snoop              _BN( 6)  // Just NULL Snoop Filter
+#define BGP_PERS_L2CONFIG_SnoopCache         _BN( 7)  // Snoop Caches
+#define BGP_PERS_L2CONFIG_SnoopStream        _BN( 8)  // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata)
+#define BGP_PERS_L2CONFIG_SnoopRange         _BN( 9)  // Snoop Range Filter when possible
+#define BGP_PERS_L2CONFIG_BUG824LUMPY        _BN(10)  // BPC_BUGS 824: Fix with Lumpy Performance
+#define BGP_PERS_L2CONFIG_BUG824SMOOTH       _BN(11)  // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory
+#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12)  // Special for Snoop diagnostics. See bgp_vmm.c
+                                              // additional bits may be used for Snoop setting tweaks
+
+// Default L2 Configuration:
+//   L2 Enabled with Multi-Stream Aggressive Prefetching
+//   Snoop Enabled with all filters except Range
+#define BGP_PERS_L2CONFIG_DEFAULT   (BGP_PERS_L2CONFIG_L2I        |\
+                                      BGP_PERS_L2CONFIG_L2D        |\
+                                      BGP_PERS_L2CONFIG_L2PF       |\
+                                      BGP_PERS_L2CONFIG_L2PFO      |\
+                                      BGP_PERS_L2CONFIG_L2PFS      |\
+                                      BGP_PERS_L2CONFIG_Snoop      |\
+                                      BGP_PERS_L2CONFIG_SnoopCache |\
+                                      BGP_PERS_L2CONFIG_SnoopStream|\
+                                      BGP_PERS_L2CONFIG_BUG824LUMPY)
+
+
+// Personality.L3Config: Controls and Settings for L3
+//   Note: Most bits match BGP_L3x_CTRL DCRs.
+//         See arch/include/bpcore/bgl_l3_dcr.h
+#define BGP_PERS_L3CONFIG_L3I        _BN( 0)    // L3 Enabled for Instructions
+#define BGP_PERS_L3CONFIG_L3D        _BN( 1)    // L3 Enabled for Data
+#define BGP_PERS_L3CONFIG_L3PFI      _BN( 2)    // Inhibit L3 Prefetch from DDR
+#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3)    // Set up Scratch?
+#define BGP_PERS_L3CONFIG_DO_PFD0    _BN( 4)    // Adjust PFD0?
+#define BGP_PERS_L3CONFIG_DO_PFD1    _BN( 5)    // Adjust PFD1?
+#define BGP_PERS_L3CONFIG_DO_PFDMA   _BN( 6)    // Adjust PFDMA?
+#define BGP_PERS_L3CONFIG_DO_PFQD    _BN( 7)    // Adjust PFQD?
+                                      // 8..15 unused/available
+#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x)  // Scratch 8ths: 0..8
+#define BGP_PERS_L3CONFIG_PFD0(x)    _B3(22,x)  // Prefetch Depth for DP0
+#define BGP_PERS_L3CONFIG_PFD1(x)    _B3(25,x)  // Prefetch Depth for DP1
+#define BGP_PERS_L3CONFIG_PFDMA(x)   _B3(28,x)  // Prefetch Depth for DMA
+#define BGP_PERS_L3CONFIG_PFQD(x)    _B3(31,x)  // Prefetch Queue Depth
+
+// General L3 Configuration
+typedef union TBGP_Pers_L3Cfg
+               {
+               uint32_t l3cfg;
+               struct {
+                      unsigned l3i        :  1;
+                      unsigned l3d        :  1;
+                      unsigned l3pfi      :  1;
+                      unsigned do_scratch :  1;
+                      unsigned do_pfd0    :  1;
+                      unsigned do_pfd1    :  1;
+                      unsigned do_pfdma   :  1;
+                      unsigned do_pfqd    :  1;
+                      unsigned rsvd       :  8;
+                      unsigned scratch    :  4;
+                      unsigned pfd0       :  3;
+                      unsigned pfd1       :  3;
+                      unsigned pfdma      :  3;
+                      unsigned pfqd       :  3;
+                      };
+               }
+               BGP_Pers_L3Cfg;
+
+// Default L3 Configuration:
+//   L3 Enabled for Instructions and Data
+//   No Prefetch Depth overrides, No Scratch, No Scrambling.
+#define BGP_PERS_L3CONFIG_DEFAULT    (BGP_PERS_L3CONFIG_L3I |\
+                                       BGP_PERS_L3CONFIG_L3D |\
+				       BGP_PERS_L3CONFIG_DO_PFDMA |\
+                                       BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users)
+#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0)   // Adjust Cache Select setting?
+#define BGP_PERS_L3SELECT_DO_BankSel  _BN( 1)   // Adjust Bank  Select setting?
+#define BGP_PERS_L3SELECT_Scramble    _BN( 2)   // L3 Scramble
+#define BGP_PERS_L3SELECT_PFby2       _BN( 3)   // Prefetch by 2 if set, else by 1 (default) if clear.
+#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26)
+#define BGP_PERS_L3SELECT_BankSel(x)  _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel.
+
+typedef union TBGP_Pers_L3Select
+               {
+               uint32_t l3select;
+               struct {
+                      unsigned do_CacheSel :  1;
+                      unsigned do_BankSel  :  1;
+                      unsigned l3Scramble  :  1;
+                      unsigned l3_PF_by2   :  1; // default is PreFetch by 1.
+                      unsigned CacheSel    :  5; // Physical Address Bit for L3 Selection (0..26)
+                      unsigned BankSel     :  5; // 0..26 Must be strictly greater than CacheSel.
+                      unsigned rsvd        : 18;
+                      };
+               }
+               BGP_Pers_L3Select;
+
+// Default L3 Selection Configuration: Disable overrides, but set h/w default values.
+#define BGP_PERS_L3SELECT_DEFAULT  (BGP_PERS_L3SELECT_CacheSel(21) |\
+                                     BGP_PERS_L3SELECT_BankSel(26))
+
+// Tracing Masks and default trace configuration
+#define BGP_TRACE_CONFIG    _BN( 0)   // Display Encoded personality config on startup
+#define BGP_TRACE_ENTRY     _BN( 1)   // Function enter and exit
+#define BGP_TRACE_INTS      _BN( 2)   // Standard Interrupt Dispatch
+#define BGP_TRACE_CINTS     _BN( 3)   // Critical Interrupt Dispatch
+#define BGP_TRACE_MCHK      _BN( 4)   // Machine Check Dispatch
+#define BGP_TRACE_SYSCALL   _BN( 5)   // System Calls
+#define BGP_TRACE_VMM       _BN( 6)   // Virtual Memory Manager
+#define BGP_TRACE_DEBUG     _BN( 7)   // Debug Events (app crashes etc)
+#define BGP_TRACE_TORUS     _BN( 8)   // Torus Init
+#define BGP_TRACE_TREE      _BN( 9)   // Tree  Init
+#define BGP_TRACE_GLOBINT   _BN(10)   // Global Interrupts
+#define BGP_TRACE_DMA       _BN(11)   // DMA Setup
+#define BGP_TRACE_SERDES    _BN(12)   // SerDes Init
+#define BGP_TRACE_TESTINT   _BN(13)   // Test Interface, ECID, Config
+#define BGP_TRACE_ETHTX     _BN(14)   // Ethernet Transmit
+#define BGP_TRACE_ETHRX     _BN(15)   // Ethernet Receive
+#define BGP_TRACE_POWER     _BN(16)   // Power Control
+#define BGP_TRACE_PROCESS   _BN(17)   // Process/Thread Mapping
+#define BGP_TRACE_EXIT_SUM  _BN(18)   // Report Per-Core Interrupt and Error Summary on exit()
+#define BGP_TRACE_SCHED     _BN(19)   // Report Scheduler Information
+#define BGP_TRACE_RAS       _BN(20)   // Report RAS Events (in addition to sending to Host)
+#define BGP_TRACE_ECID      _BN(21)   // Report UCI and ECID on boot
+#define BGP_TRACE_FUTEX     _BN(22)   // Trace Futex operations
+#define BGP_TRACE_MemAlloc  _BN(23)   // Trace MMAP and Shared Memory operations
+#define BGP_TRACE_WARNINGS  _BN(30)   // Trace Warnings
+#define BGP_TRACE_VERBOSE   _BN(31)   // Verbose Tracing Modifier
+
+// Enable tracking of Regression Suite coverage and report UCI+ECID on boot
+#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID)
+
+
+typedef struct BGP_Personality_Kernel_t
+                {
+                uint32_t  UniversalComponentIdentifier; // see include/common/bgp_ras.h
+
+                uint32_t  FreqMHz;                      // Clock_X1 Frequency in MegaHertz (eg 1000)
+
+                uint32_t  RASPolicy;                    // Verbosity level, and other RAS Reporting Controls
+
+                // Process Config:
+                //   Each byte represents a process (1 to 4 processes supported)
+                //     No core can be assigned to more than 1 process.
+                //     Cores assigned to no process are disabled.
+                //     Cores with in a process share the same address space.
+                //     Separate processes have distinct address spaces.
+                //   Within each process (0 to 4 cores assigned to a process):
+                //     Lower nibble is bitmask of which core belongs to that process.
+                //     Upper nibble is bitmask whether that thread is privileged or user.
+                //     Processes with zero cores do not exist.
+                //   E.g., for Diagnostics, we use 0xFF000000, which means
+                //     that all 4 cores run privileged in process 0.
+                uint32_t  ProcessConfig;
+
+                uint32_t  TraceConfig;        // Kernel Tracing Enables
+                uint32_t  NodeConfig;         // Kernel Driver Enables
+                uint32_t  L1Config;           // L1 Config and setup controls
+                uint32_t  L2Config;           // L2 and Snoop Config and setup controls
+                uint32_t  L3Config;           // L3 Config and setup controls
+                uint32_t  L3Select;           // L3 Cache and Bank Selection controls
+
+                uint32_t  SharedMemMB;        // Memory to Reserve for Sharing among Processes
+
+                uint32_t  ClockStop0;        // Upper 11Bits of ClockStop, enabled if Non-zero
+                uint32_t  ClockStop1;        // Lower 32Bits of ClockStop, enabled if Non-zero
+                }
+                BGP_Personality_Kernel_t;
+
+
+// Defaults for DDR Config
+#define BGP_PERS_DDR_PBX0_DEFAULT             (0x411D1512)    // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_PBX1_DEFAULT             (0x40000000)    // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_MemConfig0_DEFAULT       (0x81fc4080)    // MemConfig
+#define BGP_PERS_DDR_MemConfig1_DEFAULT       (0x0C0ff800)    // MemConfig
+#define BGP_PERS_DDR_ParmCtl0_DEFAULT         (0x3216c008)    // Parm Control
+#define BGP_PERS_DDR_ParmCtl1_DEFAULT         (0x4168c323)    // Parm Control
+#define BGP_PERS_DDR_MiscCtl0_DEFAULT         (0)    // Misc. Control
+#define BGP_PERS_DDR_MiscCtl1_DEFAULT         (0)    // Misc. Control
+#define BGP_PERS_DDR_CmdBufMode0_DEFAULT      (0x00400fdf)    // Command Buffer Mode
+#define BGP_PERS_DDR_CmdBufMode1_DEFAULT      (0xffc80600)    // Command Buffer Mode
+#define BGP_PERS_DDR_RefrInterval0_DEFAULT    (0xD1000002)    // Refresh Interval
+#define BGP_PERS_DDR_RefrInterval1_DEFAULT    (0x04000000)    // Refresh Interval
+#define BGP_PERS_DDR_ODTCtl0_DEFAULT          (0)    // ODT Control
+#define BGP_PERS_DDR_ODTCtl1_DEFAULT          (0)    // ODT Control
+#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64)    // Data Strobe Calibration
+#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805)    // Data Strobe Calibration
+#define BGP_PERS_DDR_DQSCtl_DEFAULT           (0x00000168)    // DQS Control
+#define BGP_PERS_DDR_Throttle_DEFAULT         (0)    // DDR Throttle
+//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT        (4096) // Total DDR size in MegaBytes (512MB - 16384MB).
+#define BGP_PERS_DDR_DDRSizeMB_DEFAULT        (1024) // Total DDR size in MegaBytes (512MB - 16384MB).
+//1#define BGP_PERS_DDR_Chips_DEFAULT            (0x0B) // Type of DDR chips
+#define BGP_PERS_DDR_Chips_DEFAULT            (0x09) // Type of DDR chips
+#define BGP_PERS_DDR_CAS_DEFAULT              (4)    // CAS Latency (3, 4, or 5)
+
+
+#define BGP_PERS_DDRFLAGS_ENABLE_Scrub        _BN(0) // Enable DDR Slow Scrub when 1
+
+// DDRFLAGS default: Enable Slow Scrub.
+#define BGP_PERS_DDRFLAGS_DEFAULT             (BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define BGP_PERS_SRBS0_DEFAULT                (0)
+#define BGP_PERS_SRBS1_DEFAULT                (0)
+
+typedef struct BGP_Personality_DDR_t
+                {
+                uint32_t  DDRFlags;         // Misc. Flags and Settings
+                uint32_t  SRBS0;            // Controller 0 SRBS/CK Settings
+                uint32_t  SRBS1;            // Controller 1 SRBS/CK Settings
+                uint32_t  PBX0;             // PBX DCRs setting (in IBM bit numbering)
+                uint32_t  PBX1;             // PBX DCRs setting (in IBM bit numbering)
+                uint32_t  MemConfig0;       // MemConfig
+                uint32_t  MemConfig1;       // MemConfig
+                uint32_t  ParmCtl0;         // Parm Control
+                uint32_t  ParmCtl1;         // Parm Control
+                uint32_t  MiscCtl0;         // Misc. Control
+                uint32_t  MiscCtl1;         // Misc. Control
+                uint32_t  CmdBufMode0;      // Command Buffer Mode
+                uint32_t  CmdBufMode1;      // Command Buffer Mode
+                uint32_t  RefrInterval0;    // Refresh Interval
+                uint32_t  RefrInterval1;    // Refresh Interval
+                uint32_t  ODTCtl0;          // ODT Control
+                uint32_t  ODTCtl1;          // ODT Control
+                uint32_t  DataStrobeCalib0; // Data Strobe Calibration
+                uint32_t  DataStrobeCalib1; // Data Strobe Calibration
+                uint32_t  DQSCtl;           // DQS Control
+                uint32_t  Throttle;         // DDR Throttle
+                uint16_t  DDRSizeMB;        // Total DDR size in MegaBytes (512MB - 16384MB).
+                uint8_t   Chips;            // Type of DDR chips
+                uint8_t   CAS;              // CAS Latency (3, 4, or 5)
+                }
+                BGP_Personality_DDR_t;
+
+
+typedef struct BGP_Personality_Networks_t
+                {
+                uint32_t  BlockID;         // a.k.a. PartitionID
+
+                uint8_t   Xnodes,
+                          Ynodes,
+                          Znodes,
+                          Xcoord,
+                          Ycoord,
+                          Zcoord;
+
+                // PSet Support
+                uint16_t  PSetNum;
+                uint32_t  PSetSize;
+                uint32_t  RankInPSet;
+
+                uint32_t  IOnodes;
+                uint32_t  Rank;               // Rank in Block (or Partition)
+                uint32_t  IOnodeRank;         // Rank (and therefore P2P Addr) of my I/O Node
+                uint16_t  TreeRoutes[ 16 ];
+                }
+                BGP_Personality_Networks_t;
+
+
+typedef struct BGP_IP_Addr_t
+                {
+                // IPv6 Addresses are 16 bytes, where the
+                //  lower 4 (indices 12-15) can be used for IPv4 address.
+                uint8_t octet[ 16 ];
+                }
+                BGP_IP_Addr_t;
+
+
+typedef struct BGP_Personality_Ethernet_t
+                {
+                uint16_t       MTU;            // Initial emac MTU size
+                uint8_t        EmacID[6];      // MAC address for emac
+                BGP_IP_Addr_t IPAddress;      // IPv6/IPv4 address of this node
+                BGP_IP_Addr_t IPNetmask;      // IPv6/IPv4 netmask
+                BGP_IP_Addr_t IPBroadcast;    // IPv6/IPv4 broadcast address
+                BGP_IP_Addr_t IPGateway;      // IPv6/IPv4 initial gateway (zero if none)
+                BGP_IP_Addr_t NFSServer;      // IPv6/IPv4 NFS system software server address
+                BGP_IP_Addr_t serviceNode;    // IPv6/IPv4 address of service node
+
+                // NFS mount info
+                char      NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR];
+                char      NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR];
+
+                // Security Key for Service Node authentication
+                uint8_t   SecurityKey[BGP_PERSONALITY_LEN_SECKEY ];
+                }
+                BGP_Personality_Ethernet_t;
+
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective	_BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus		_BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC	_BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x)		_B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x)		_B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full		0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly		1
+#define BGP_PERS_BLKCFG_CIOMode_None		2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3		0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4		1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+				 BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+typedef struct TBGP_Personality_t
+                {
+                uint16_t  CRC;
+                uint8_t   Version;
+                uint8_t   PersonalitySizeWords;
+
+                BGP_Personality_Kernel_t   Kernel_Config;
+
+                BGP_Personality_DDR_t      DDR_Config;
+
+                BGP_Personality_Networks_t Network_Config;
+
+                BGP_Personality_Ethernet_t Ethernet_Config;
+
+		uint8_t  Block_Config;
+		uint8_t  padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr)
+                                  // to simplify jtag operations. See issue #140.
+                }
+                BGP_Personality_t;
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION)
+//  This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+           0,                                              /* CRC                  */ \
+           BGP_PERSONALITY_VERSION,                       /* Version              */ \
+           (sizeof(BGP_Personality_t)/sizeof(uint32_t)),  /* PersonalitySizeWords */ \
+           {  /* BGP_Personality_Kernel_t: */ \
+              0,                                   /* MachineLocation        */ \
+              BGP_DEFAULT_FREQ,                   /* FreqMHz       */ \
+              BGP_PERS_RASPOLICY_DEFAULT,         /* RASPolicy     */ \
+              BGP_PERS_PROCESSCONFIG_DEFAULT,     /* ProcessConfig */ \
+              BGP_PERS_TRACE_DEFAULT,             /* TraceConfig   */ \
+              BGP_PERS_NODECONFIG_DEFAULT,        /* NodeConfig    */ \
+              BGP_PERS_L1CONFIG_DEFAULT,          /* L1Config      */ \
+              BGP_PERS_L2CONFIG_DEFAULT,          /* L2Config      */ \
+              BGP_PERS_L3CONFIG_DEFAULT,          /* L3Config      */ \
+              BGP_PERS_L3SELECT_DEFAULT,          /* L3Select      */ \
+              0,                                   /* SharedMemMB   */ \
+              0,                                   /* ClockStop0    */ \
+              0                                    /* ClockStop1    */ \
+              }, \
+           {  /* BGP_Personality_DDR_t: */ \
+              BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+              BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	      0,				   /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE)
+//  This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+           0,                                             /* CRC                  */ \
+           BGP_PERSONALITY_VERSION,                      /* Version              */ \
+           (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+           {  /* BGP_Personality_Kernel_t: */ \
+              0,                                          /* MachineLocation      */ \
+              BGP_DEFAULT_FREQ,                          /* FreqMHz       */ \
+              BGP_PERS_RASPOLICY_DEFAULT,                /* RASPolicy     */ \
+              BGP_PERS_PROCESSCONFIG_SMP,                /* ProcessConfig */ \
+              BGP_PERS_TRACE_DEFAULT,                    /* TraceConfig   */ \
+              BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE,  /* NodeConfig    */ \
+              BGP_PERS_L1CONFIG_DEFAULT,                 /* L1Config      */ \
+              BGP_PERS_L2CONFIG_DEFAULT,                 /* L2Config      */ \
+              BGP_PERS_L3CONFIG_DEFAULT,                 /* L3Config      */ \
+              BGP_PERS_L3SELECT_DEFAULT,                 /* L3Select      */ \
+              0,                                          /* SharedMemMB   */ \
+              0,                                          /* ClockStop0    */ \
+              0                                           /* ClockStop1    */ \
+              }, \
+           {  /* BGP_Personality_DDR_t: */ \
+              BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+              BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	      0,				   /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+
+
+#endif // Add nothing below this line.
diff --git a/arch/powerpc/boot/dts/bgp.dts b/arch/powerpc/boot/dts/bgp.dts
new file mode 100644
index 0000000..684edef
--- /dev/null
+++ b/arch/powerpc/boot/dts/bgp.dts
@@ -0,0 +1,127 @@
+/*
+ * Device Tree Source for IBM BlueGene/P 
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ 
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ 
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * Cloned from 'Ebony', and revised. 
+ *
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "ibm,bluegenep";
+	compatible = "ibm,bluegenep";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+/*	aliases {
+		ethernet0 = &EMAC0;
+	};
+*/
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,450";
+			reg = <0x00000000>;
+			clock-frequency = <850000000>;
+			timebase-frequency = <850000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			model = "PowerPC,450";
+			reg = <0x00000000>;
+			clock-frequency = <850000000>;
+			timebase-frequency = <850000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			model = "PowerPC,450";
+			reg = <0x00000000>;
+			clock-frequency = <850000000>;
+			timebase-frequency = <850000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			model = "PowerPC,450";
+			reg = <0x00000000>;
+			clock-frequency = <850000000>;
+			timebase-frequency = <850000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; // from wrapper
+	};
+
+	ibm,bluegene {
+		cns {	// from wrapper
+			version	 = <0>;
+			size	 = <0>;
+        		base-va  = <0>;
+			base-pa  = <0>; // assume <= 4G
+			services = <0>;
+		};
+		personality {	// from wrapper
+			version		= <0>;
+			frequency 	= <850000000>;
+		};
+    	};
+
+	chosen {
+		bootargs = "console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608 rdinit=/sbin/init";
+
+		// the bgp wrapper locates a ramdisk and updates initrd-start/end
+		linux,initrd-start = <0>;
+		linux,initrd-end = <0>;
+	};
+};
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 965c237..23c6ccf 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -222,6 +222,11 @@
     platformo="$object/fixed-head.o $object/redboot-83xx.o"
     binary=y
     ;;
+bgp)
+	platformo="--section-start bgstart=0 $object/fixed-head.o $object/bgp.o"
+	link_address='0x00800000'
+    ;;
+
 esac
 
 vmz="$tmpdir/`basename \"$kernel\"`.$ext"
diff --git a/arch/powerpc/configs/44x/bgp_defconfig b/arch/powerpc/configs/44x/bgp_defconfig
new file mode 100644
index 0000000..897d02b
--- /dev/null
+++ b/arch/powerpc/configs/44x/bgp_defconfig
@@ -0,0 +1,930 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29.1
+# Wed May  6 13:09:35 2009
+#
+# CONFIG_PPC64 is not set
+
+#
+# Processor support
+#
+# CONFIG_6xx is not set
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_8xx is not set
+# CONFIG_40x is not set
+CONFIG_44x=y
+# CONFIG_E200 is not set
+CONFIG_PPC_FPU=y
+CONFIG_4xx=y
+CONFIG_BOOKE=y
+CONFIG_PTE_64BIT=y
+CONFIG_PHYS_64BIT=y
+CONFIG_PPC_MMU_NOHASH=y
+# CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+# CONFIG_NOT_COHERENT_CACHE is not set
+CONFIG_L1_WRITETHROUGH=y
+CONFIG_PPC32=y
+CONFIG_WORD_SIZE=32
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS=y
+# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set
+CONFIG_IRQ_PER_CPU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+# CONFIG_ARCH_NO_VIRT_TO_BUS is not set
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_OF=y
+CONFIG_PPC_UDBG_16550=y
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_GENERIC_BUG=y
+# CONFIG_DEFAULT_UIMAGE is not set
+CONFIG_PPC_DCR_NATIVE=y
+# CONFIG_PPC_DCR_MMIO is not set
+CONFIG_PPC_DCR=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+# CONFIG_FREEZER is not set
+
+#
+# Platform support
+#
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+# CONFIG_PQ2ADS is not set
+# CONFIG_BAMBOO is not set
+# CONFIG_EBONY is not set
+# CONFIG_SAM440EP is not set
+# CONFIG_SEQUOIA is not set
+# CONFIG_TAISHAN is not set
+# CONFIG_KATMAI is not set
+# CONFIG_RAINIER is not set
+# CONFIG_WARP is not set
+# CONFIG_CANYONLANDS is not set
+# CONFIG_YOSEMITE is not set
+CONFIG_BGP=y
+# CONFIG_XILINX_VIRTEX440_GENERIC_BOARD is not set
+CONFIG_BLUEGENE=y
+# CONFIG_BLUEGENE_MAMBO is not set
+# CONFIG_BGP_DD1 is not set
+CONFIG_BLUEGENE_TCP=y
+# CONFIG_BLUEGENE_DMA_MEMCPY is not set
+CONFIG_BLUEGENE_COLLECTIVE_TRACE=y
+CONFIG_BLUEGENE_TORUS_TRACE=y
+
+CONFIG_BGP_STATISTICS=y
+# CONFIG_BLUEGENE_SHARE_WITH_VRNIC is not set
+CONFIG_BGP_NFS_FIX=y
+# CONFIG_BLUEGENE_TCP_WITHOUT_NAPI is not set
+# CONFIG_BLUEGENE_UNIPROCESSOR is not set
+# CONFIG_BLUEGENE_SOCKETS is not set
+CONFIG_HUGE_KMALLOC=y
+CONFIG_DEBUG_ALIGNMENT_HISTOGRAM=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_IBM_OCP=y
+CONFIG_IBM_EMAC4=y
+# CONFIG_PPC4xx_DMA is not set
+CONFIG_PPC_GEN550=y
+# CONFIG_IPIC is not set
+# CONFIG_MPIC is not set
+# CONFIG_MPIC_WEIRD is not set
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+# CONFIG_PPC_970_NAP is not set
+# CONFIG_PPC_INDIRECT_IO is not set
+# CONFIG_GENERIC_IOMAP is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_FSL_ULI1575 is not set
+# CONFIG_SIMPLE_GPIO is not set
+
+#
+# Kernel options
+#
+CONFIG_HIGHMEM=y
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_MATH_EMULATION=y
+# CONFIG_IOMMU_HELPER is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_HAS_WALK_MEMORY=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+# CONFIG_PPC_4K_PAGES is not set
+# CONFIG_PPC_16K_PAGES is not set
+CONFIG_PPC_64K_PAGES=y
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_PROC_DEVICETREE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608"
+CONFIG_WRAP_COPY_TOFROM_USER=y
+CONFIG_EXTRA_TARGETS=""
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_ZONE_DMA=y
+CONFIG_4xx_SOC=y
+CONFIG_PPC_PCI_CHOICE=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+# CONFIG_HAS_RAPIDIO is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_PAGE_OFFSET=0xc0000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_PHYSICAL_START=0x00000000
+CONFIG_TASK_SIZE=0xc0000000
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_COMPAT_NET_DEV_OPS=y
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_BEET is not set
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_NET_ETHERNET is not set
+# CONFIG_NETDEV_1000 is not set
+CONFIG_NETDEV_10000=y
+CONFIG_BGP_COLLECTIVE=y
+CONFIG_BGP_COLLECTIVE_IP_CHECKSUM=y
+CONFIG_BGP_COLLECTIVE_NAPI=n
+CONFIG_BGP_DMA=y
+CONFIG_BGP_TORUS=y
+CONFIG_BGP_TORUS_DIAGNOSTICS=y
+# CONFIG_BGP_FRANKENTORUS is not set
+CONFIG_BGP_TORUS_IP_CHECKSUM=y
+CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT=20
+CONFIG_BGP_TORUS_ADAPTIVE_ROUTING=y
+
+CONFIG_BGP_VRNIC=n
+CONFIG_BGP_E10000=y
+CONFIG_BGP_E10000_RXB=1048576
+CONFIG_BGP_E10000_TXB=4096
+CONFIG_BGP_E10000_IP_CHECKSUM=y
+CONFIG_BGP_E10000_NAPI=y
+# CONFIG_BGP_E10000_EMAC_LOOPBACK is not set
+# CONFIG_BGP_E10000_PHY_LOOPBACK is not set
+# CONFIG_BGP_E10000_DBG is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+CONFIG_TCP_HIATUS_COUNTS=y
+CONFIG_TCP_CONGESTION_OVERRIDES=y
+
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_HVC_UDBG is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_USER_MEM=y
+CONFIG_INFINIBAND_ADDR_TRANS=y
+CONFIG_INFINIBAND_SOFTRDMA=m
+CONFIG_INFINIBAND_SOFTIWARP=m
+# CONFIG_INFINIBAND_BGVRNIC is not set
+# CONFIG_INFINIBAND_BGVRNIC_ETH is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_CM is not set
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+# CONFIG_EDAC is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS4_FS=y
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+CONFIG_HUGETLBFS=n
+CONFIG_HUGETLB_PAGE=n
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_CODE_PATCHING_SELFTEST is not set
+# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_MSI_BITMAP_SELFTEST is not set
+# CONFIG_XMON is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_VIRQ_DEBUG is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_PPC_CLOCK is not set
+# CONFIG_VIRTUALIZATION is not set
diff --git a/arch/powerpc/configs/44x/bgpion_defconfig b/arch/powerpc/configs/44x/bgpion_defconfig
new file mode 100644
index 0000000..f28adfb
--- /dev/null
+++ b/arch/powerpc/configs/44x/bgpion_defconfig
@@ -0,0 +1,929 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29.1
+# Wed Sep  8 16:59:33 2010
+#
+# CONFIG_PPC64 is not set
+
+#
+# Processor support
+#
+# CONFIG_6xx is not set
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_8xx is not set
+# CONFIG_40x is not set
+CONFIG_44x=y
+# CONFIG_E200 is not set
+CONFIG_PPC_FPU=y
+CONFIG_4xx=y
+CONFIG_BOOKE=y
+CONFIG_PTE_64BIT=y
+CONFIG_PHYS_64BIT=y
+CONFIG_PPC_MMU_NOHASH=y
+# CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+# CONFIG_NOT_COHERENT_CACHE is not set
+CONFIG_L1_WRITETHROUGH=y
+CONFIG_PPC32=y
+CONFIG_WORD_SIZE=32
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS=y
+# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set
+CONFIG_IRQ_PER_CPU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+# CONFIG_ARCH_NO_VIRT_TO_BUS is not set
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_OF=y
+CONFIG_PPC_UDBG_16550=y
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_GENERIC_BUG=y
+# CONFIG_DEFAULT_UIMAGE is not set
+CONFIG_PPC_DCR_NATIVE=y
+# CONFIG_PPC_DCR_MMIO is not set
+CONFIG_PPC_DCR=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+# CONFIG_FREEZER is not set
+
+#
+# Platform support
+#
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+# CONFIG_PQ2ADS is not set
+# CONFIG_BAMBOO is not set
+# CONFIG_EBONY is not set
+# CONFIG_SAM440EP is not set
+# CONFIG_SEQUOIA is not set
+# CONFIG_TAISHAN is not set
+# CONFIG_KATMAI is not set
+# CONFIG_RAINIER is not set
+# CONFIG_WARP is not set
+# CONFIG_CANYONLANDS is not set
+# CONFIG_YOSEMITE is not set
+CONFIG_BGP=y
+# CONFIG_XILINX_VIRTEX440_GENERIC_BOARD is not set
+CONFIG_BLUEGENE=y
+# CONFIG_BLUEGENE_NOISY_BOOT is not set
+# CONFIG_BLUEGENE_MAMBO is not set
+# CONFIG_BGP_DD1 is not set
+# CONFIG_BLUEGENE_TCP is not set
+# CONFIG_BLUEGENE_DMA_MEMCPY is not set
+CONFIG_BLUEGENE_COLLECTIVE_TRACE=y
+CONFIG_BLUEGENE_TORUS_TRACE=y
+# CONFIG_BLUEGENE_TCP_WITHOUT_NAPI is not set
+# CONFIG_BLUEGENE_UNIPROCESSOR is not set
+CONFIG_BLUEGENE_STATISTICS=y
+# CONFIG_BLUEGENE_SHARE_WITH_VRNIC is not set
+# CONFIG_BGP_NFS_FIX is not set
+CONFIG_HUGE_KMALLOC=y
+CONFIG_TASK_UNMAPPED_BASE=0x20000000
+CONFIG_DEBUG_ALIGNMENT_HISTOGRAM=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_IBM_OCP=y
+CONFIG_IBM_EMAC4=y
+# CONFIG_PPC4xx_DMA is not set
+CONFIG_PPC_GEN550=y
+
+#
+# Zepto setup
+#
+CONFIG_ZEPTO=y
+CONFIG_ZEPTO_DEBUG=y
+CONFIG_ZEPTO_MEMORY=y
+CONFIG_ZEPTO_CNS_RELOCATION=y
+# CONFIG_ZEPTO_LOCKBOX_UPC_TLB is not set
+# CONFIG_IPIC is not set
+# CONFIG_MPIC is not set
+# CONFIG_MPIC_WEIRD is not set
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+# CONFIG_PPC_970_NAP is not set
+# CONFIG_PPC_INDIRECT_IO is not set
+# CONFIG_GENERIC_IOMAP is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_FSL_ULI1575 is not set
+# CONFIG_SIMPLE_GPIO is not set
+
+#
+# Kernel options
+#
+CONFIG_HIGHMEM=y
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_MATH_EMULATION=y
+# CONFIG_IOMMU_HELPER is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_HAS_WALK_MEMORY=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_PPC_16K_PAGES is not set
+# CONFIG_PPC_64K_PAGES is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_PROC_DEVICETREE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608"
+# CONFIG_WRAP_COPY_TOFROM_USER is not set
+CONFIG_EXTRA_TARGETS=""
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_ZONE_DMA=y
+CONFIG_4xx_SOC=y
+CONFIG_PPC_PCI_CHOICE=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+# CONFIG_HAS_RAPIDIO is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_PAGE_OFFSET=0xc0000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_PHYSICAL_START=0x00000000
+CONFIG_TASK_SIZE=0xc0000000
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_COMPAT_NET_DEV_OPS=y
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_BEET is not set
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+# CONFIG_VETH is not set
+# CONFIG_NET_ETHERNET is not set
+# CONFIG_NETDEV_1000 is not set
+CONFIG_NETDEV_10000=y
+CONFIG_BGP_COLLECTIVE=y
+CONFIG_BGP_COLLECTIVE_IP_CHECKSUM=y
+# CONFIG_BGP_COLLECTIVE_NAPI is not set
+# CONFIG_BGP_DMA is not set
+# CONFIG_BGP_TORUS is not set
+# CONFIG_BGP_TORUS_DIAGNOSTICS is not set
+# CONFIG_BGP_FRANKENTORUS is not set
+CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT=20
+# CONFIG_BGP_TORUS_ADAPTIVE_ROUTING is not set
+# CONFIG_BGP_VRNIC is not set
+# CONFIG_BGP_STATISTICS is not set
+CONFIG_BGP_E10000=y
+CONFIG_BGP_E10000_RXB=1048576
+CONFIG_BGP_E10000_TXB=4096
+CONFIG_BGP_E10000_IP_CHECKSUM=y
+CONFIG_BGP_E10000_NAPI=y
+# CONFIG_BGP_E10000_EMAC_LOOPBACK is not set
+# CONFIG_BGP_E10000_PHY_LOOPBACK is not set
+# CONFIG_BGP_E10000_DBG is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+CONFIG_TCP_HIATUS_COUNTS=y
+CONFIG_TCP_CONGESTION_OVERRIDES=y
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_UDBG is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_EDAC is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_CODE_PATCHING_SELFTEST is not set
+# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_MSI_BITMAP_SELFTEST is not set
+# CONFIG_XMON is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_VIRQ_DEBUG is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_PPC_CLOCK is not set
+# CONFIG_VIRTUALIZATION is not set
diff --git a/arch/powerpc/configs/44x/bgpzepto_defconfig b/arch/powerpc/configs/44x/bgpzepto_defconfig
new file mode 100644
index 0000000..bc97f94
--- /dev/null
+++ b/arch/powerpc/configs/44x/bgpzepto_defconfig
@@ -0,0 +1,941 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29.1
+# Thu Dec  2 13:23:12 2010
+#
+# CONFIG_PPC64 is not set
+
+#
+# Processor support
+#
+# CONFIG_6xx is not set
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_8xx is not set
+# CONFIG_40x is not set
+CONFIG_44x=y
+# CONFIG_E200 is not set
+CONFIG_PPC_FPU=y
+CONFIG_4xx=y
+CONFIG_BOOKE=y
+CONFIG_PTE_64BIT=y
+CONFIG_PHYS_64BIT=y
+CONFIG_PPC_MMU_NOHASH=y
+# CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+# CONFIG_NOT_COHERENT_CACHE is not set
+CONFIG_L1_WRITETHROUGH=y
+CONFIG_PPC32=y
+CONFIG_WORD_SIZE=32
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS=y
+# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set
+CONFIG_IRQ_PER_CPU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+# CONFIG_ARCH_NO_VIRT_TO_BUS is not set
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_OF=y
+CONFIG_PPC_UDBG_16550=y
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_GENERIC_BUG=y
+# CONFIG_DEFAULT_UIMAGE is not set
+CONFIG_PPC_DCR_NATIVE=y
+# CONFIG_PPC_DCR_MMIO is not set
+CONFIG_PPC_DCR=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_IKCONFIG=y
+# CONFIG_IKCONFIG_PROC is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_MARKERS is not set
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_KRETPROBES=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+# CONFIG_FREEZER is not set
+
+#
+# Platform support
+#
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+# CONFIG_PQ2ADS is not set
+# CONFIG_BAMBOO is not set
+# CONFIG_EBONY is not set
+# CONFIG_SAM440EP is not set
+# CONFIG_SEQUOIA is not set
+# CONFIG_TAISHAN is not set
+# CONFIG_KATMAI is not set
+# CONFIG_RAINIER is not set
+# CONFIG_WARP is not set
+# CONFIG_CANYONLANDS is not set
+# CONFIG_YOSEMITE is not set
+CONFIG_BGP=y
+# CONFIG_XILINX_VIRTEX440_GENERIC_BOARD is not set
+CONFIG_BLUEGENE=y
+CONFIG_BLUEGENE_NOISY_BOOT=y
+# CONFIG_BLUEGENE_MAMBO is not set
+# CONFIG_BGP_DD1 is not set
+CONFIG_BLUEGENE_TCP=y
+# CONFIG_BLUEGENE_DMA_MEMCPY is not set
+# CONFIG_BLUEGENE_COLLECTIVE_TRACE is not set
+# CONFIG_BLUEGENE_TORUS_TRACE is not set
+# CONFIG_BLUEGENE_TCP_WITHOUT_NAPI is not set
+# CONFIG_BLUEGENE_UNIPROCESSOR is not set
+# CONFIG_BLUEGENE_STATISTICS is not set
+# CONFIG_BLUEGENE_SHARE_WITH_VRNIC is not set
+# CONFIG_BGP_NFS_FIX is not set
+CONFIG_HUGE_KMALLOC=y
+CONFIG_TASK_UNMAPPED_BASE=0x20000000
+# CONFIG_DEBUG_ALIGNMENT_HISTOGRAM is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_IBM_OCP=y
+CONFIG_IBM_EMAC4=y
+# CONFIG_PPC4xx_DMA is not set
+CONFIG_PPC_GEN550=y
+
+#
+# Zepto setup
+#
+CONFIG_ZEPTO=y
+CONFIG_ZEPTO_DEBUG=y
+CONFIG_ZEPTO_MEMORY=y
+CONFIG_ZEPTO_CNS_RELOCATION=y
+CONFIG_ZEPTO_LOCKBOX_UPC_TLB=y
+CONFIG_ZEPTO_TREE_TORUS_TLB=y
+CONFIG_ZEPTO_COMPUTENODE=y
+# CONFIG_ZEPTO_EXPERIMENTAL is not set
+# CONFIG_IPIC is not set
+# CONFIG_MPIC is not set
+# CONFIG_MPIC_WEIRD is not set
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+# CONFIG_PPC_970_NAP is not set
+# CONFIG_PPC_INDIRECT_IO is not set
+# CONFIG_GENERIC_IOMAP is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_FSL_ULI1575 is not set
+# CONFIG_SIMPLE_GPIO is not set
+
+#
+# Kernel options
+#
+CONFIG_HIGHMEM=y
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_MATH_EMULATION is not set
+# CONFIG_IOMMU_HELPER is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_HAS_WALK_MEMORY=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_MIGRATION is not set
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+# CONFIG_PPC_4K_PAGES is not set
+# CONFIG_PPC_16K_PAGES is not set
+CONFIG_PPC_64K_PAGES=y
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_PROC_DEVICETREE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608"
+# CONFIG_WRAP_COPY_TOFROM_USER is not set
+CONFIG_EXTRA_TARGETS=""
+# CONFIG_SECCOMP is not set
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_ZONE_DMA=y
+CONFIG_4xx_SOC=y
+CONFIG_PPC_PCI_CHOICE=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+# CONFIG_HAS_RAPIDIO is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_PAGE_OFFSET=0xc0000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_PHYSICAL_START=0x00000000
+CONFIG_TASK_SIZE=0xc0000000
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_COMPAT_NET_DEV_OPS=y
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_BEET is not set
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+# CONFIG_VETH is not set
+# CONFIG_NET_ETHERNET is not set
+# CONFIG_NETDEV_1000 is not set
+CONFIG_NETDEV_10000=y
+CONFIG_BGP_COLLECTIVE=y
+# CONFIG_BGP_COLLECTIVE_IP_CHECKSUM is not set
+# CONFIG_BGP_COLLECTIVE_NAPI is not set
+CONFIG_BGP_DMA=y
+CONFIG_BGP_TORUS=y
+CONFIG_BGP_TORUS_DIAGNOSTICS=y
+# CONFIG_BGP_FRANKENTORUS is not set
+CONFIG_BGP_TORUS_IP_CHECKSUM=y
+CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT=20
+CONFIG_BGP_TORUS_ADAPTIVE_ROUTING=y
+# CONFIG_BGP_VRNIC is not set
+CONFIG_BGP_STATISTICS=y
+# CONFIG_BGP_E10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+CONFIG_TCP_HIATUS_COUNTS=y
+CONFIG_TCP_CONGESTION_OVERRIDES=y
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_UDBG is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+# CONFIG_INFINIBAND_USER_ACCESS is not set
+CONFIG_INFINIBAND_ADDR_TRANS=y
+# CONFIG_INFINIBAND_SOFTIWARP is not set
+# CONFIG_INFINIBAND_IPOIB is not set
+# CONFIG_EDAC is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_KPROBES_SANITY_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_LKDTM is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_CODE_PATCHING_SELFTEST is not set
+# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_MSI_BITMAP_SELFTEST is not set
+# CONFIG_XMON is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_VIRQ_DEBUG is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_PPC_CLOCK is not set
+# CONFIG_VIRTUALIZATION is not set
diff --git a/arch/powerpc/include/asm/bgcns.h b/arch/powerpc/include/asm/bgcns.h
new file mode 100644
index 0000000..238ad40
--- /dev/null
+++ b/arch/powerpc/include/asm/bgcns.h
@@ -0,0 +1,1060 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Tom Gooding, IBM
+ */
+
+
+#ifndef _BGCNS_H
+#define _BGCNS_H
+
+
+#ifndef __ASSEMBLY__
+
+/*! @page CNS Common Node Services
+ *
+ *  @section CNS_S10 Overview
+ *
+ *  As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services
+ *  to the kernel.  These services may be simple queries abstracting various node
+ *  specific data (such as DDR size) or they may be more sophisticated software services,
+ *  such as common machine check handling.  Additionally, some services may be implicit,
+ *  such as the initialization of various hardware devices unique to Blue Gene, such as
+ *  Netbus and SerDes.
+ *
+ *  Services are not directly linked into the kernel, but rather are invoked from kernel
+ *  code via a <b>service directory</b> which is itself part of an overall <b>service
+ *  descriptor</b>.  This service descriptor is constructed during initialization and
+ *  is passed to the kernel when the kernel is booted.  The service directory is a
+ *  collection of <b>service references</b>.
+ *
+ *  During partition (block) booting, ELF images are loaded onto the compute and I/O nodes.
+ *  The bootloader (@i aka microloader) boots first and then transfers control to the Common
+ *  Node Services layer so that it, in turn, may boot.
+ *
+ *  Once the CNS layer has booted, control is transferred to the kernel so that it may also
+ *  boot.  All services provided by the CNS layer are immediately available at this time.
+ *
+ *  @section CNS_S20 Programming Model
+ *
+ *  A kernel running on top of the CNS layer is not statically linked to the common services.
+ *  Instead, the services are called via function pointers provided by the services descriptor,
+ *  which is described here:  @ref _BGCNS_ServiceDirectory.
+ *
+ *  The kernel must operate under the following rules and restrictions:
+ *  @li The kernel must not alter the services descriptor.  The descriptor must be treated as a read-only
+ *      data structure even though the kernel may have the ability to alter it.  Because CNS trusts the
+ *      kernel, this also implies that the kernel must not expose the descriptor to any untrusted
+ *      software (such as application code).
+ *  @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any
+ *      service.
+ *  @li The kernel must ensure that any data passed to services via parameters is mapped.
+ *      Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable
+ *      (input parameters) or readable and writeable (output parameters).
+ *  @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress ,
+ *      _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1)  as reserved.
+ *      That is, the kernel must not use this region of virtual memory for anything besides accessing
+ *      the services descriptor.
+ *  @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress,
+ *      _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved.  The
+ *      kernel must not map this memory for any other use.
+ *  @li The kernel must not access any of the reserved virtual address regions with TLB settings that
+ *      are different from those used by CNS.  The kernel is allowed to unmap any of the reserved
+ *      memory TLBs for its own use.  However, in such a case and per the rule above, the kernel must
+ *      ensure that the region is mapped prior to using any CNS facilities (such as invoking a service).
+ *  @li CNS may need to map one or more TLB entries in order to access Blue Gene devices.  In such a case,
+ *      CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns
+ *      control to the invoking kernel.  Kernels may avoid this behavior for specific devices by using
+ *      the mapDevice service.
+ *  @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF.  This
+ *      region is used for the pre-relocated CNS image and will be available for general use once CNS boot
+ *      is complete.
+ *  @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers.
+ *
+ *  The CNS software may behave unpredictably if any of these rules and restrictions is violated.
+ *
+ *  Kernels may make the following assumptions about CNS:
+ *
+ *  @li The data passed in the firmware descriptor (see below) is static.  Specifically, the base addresses,
+ *      size and service directory will not change once CNS boot is complete.
+ *
+ *  @subsection CNS_21 Programming Examples
+ *
+ *  @subsubsection CNS_211 Obtaining the Personality
+ *
+ *  The following example shows how to fetch a copy of the Blue Gene personality structure and also
+ *  serves as a simple example of invoking a service:
+ *
+ *  @code
+ *
+ *      BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *     _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)();
+ *     ...
+ *  @endcode
+ *
+ *  The programming model guarantees that the descriptor is static.  Thus, one can provide a
+ *  convenience method to make service invocation a little more readable
+ *
+ *  @code
+ *
+ *
+ *  static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time
+ *
+ *  inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; }
+ *
+ *  void foo() {
+ *     _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData();
+ *     ...
+ *  }
+ *
+ *  @endcode
+ *
+ *  This style will be used in all of the subsequent examples.
+ *
+ *  @subsubsection CNS_212 SMP Initialization
+ *
+ *  Common Node Services will launch the kernel on a single core (typically core 0) and will
+ *  leave the remaining cores parked.  The kernel can activate additional cores via the @c takeCPU
+ *  service.  Here is a very simple example of such kernel code:
+ *
+ *  @code
+ *
+ *    void anEntryPoint(unsigned core, void* arg_not_used) {
+ *        // Do whatever your kernel needs to do here.  Typically,
+ *        // this function never returns.  You will arrive here
+ *        // when takeCPU is invoked (below).
+ *    }
+ *
+ *    void someCodeOnTheMainThread() {
+ *
+ *        // ...
+ *
+ *        unsigned N = cns()->getNumberOfCores();
+ *
+ *        for (core = 1; core < N; core++) {
+ *            if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) {
+ *                // error handling goes here
+ *            }
+ *        }
+ *
+ *        // ...
+ *    }
+ *
+ *  @endcode
+ *
+ *  @subsubsection CNS_213 Version Compatibility
+ *
+ *  Common Node Services structures and APIs should remain compatible within maintenance
+ *  releases and e-fixes.  Kernel's may add a runtime check to ensure that the version
+ *  of CNS is compatible with the version from compile time.  This is done as follows:
+ *
+ *  @code
+ *
+ *      BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *
+ *      if ( ! BGCNS_IS_COMPATIBLE(descr) ) {
+ *           // incompatible CNS (panic?)
+ *      }
+ *
+ *  @endcode
+ *
+ *  @subsubsection CNS_23 Interrupts
+ *
+ *  A kernel wanting to use the CNS interrupt services would first have to enable interrupts
+ *  for the appropriate Blue Gene BIC group and IRQ within that group.  This would likely be
+ *  done at boot time.  So, for example, such a kernel could enable interrupts for the Universal
+ *  Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as
+ *  follows:
+ *
+ *  @code
+ *      cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0);
+ *  @endcode
+ *
+ *  Such a kernel might also maintain a collection of routines that act as subhandlers of the
+ *  non-critical interrupt handler.  In this example, we'll assume it is simply a two
+ *  dimensional array indexed by group and IRQ:
+ *
+ *  @code
+ *      subhandlers[5][2] = &theUpcSubHandler;
+ *  @endcode
+ *
+ *  That kernel's non-critical interrupt handler would then typically handle all interrupts by
+ *  successively invoking the getInterrupt() service to determine the group and IRQ, and then
+ *  dispatching the appropriate subhandler.  Additionally, the interrupt will be acknowledged
+ *  so to avoid continuous interruption:
+ *
+ *  @code
+ *      unsigned grp, irq;
+ *
+ *      while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) {
+ *          (*subhandlers[g][i])(); // dispatch the handler
+ *          cns()->acknowledgeInterrupt(g,i); // ack the interrupt
+ *      }
+ *  @endcode
+ *
+ *  @subsubsection CNS_24 Global Barriers and Interrupts
+ *
+ *  The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels
+ *  that may be configured as either a global barrier or a global interrupt.
+ *
+ *  Barriers are constructed by invoking the barrier service:
+ *
+ *  @code
+ *      unsigned channel = 0;
+ *
+ *      // synchronize:
+ *      int reset = 1;
+ *      int rc;
+ *      while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ *        reset = 0;
+ *      }
+ *
+ *      if ( rc == BGCNS_RC_COMPLETE ) {
+ *        // good path
+ *      }
+ *      else {
+ *        // error
+ *      }
+ *  @endcode
+ *
+ *  Similarly, a barrier with a timeout can also be constructed:
+ *
+ *  @code
+ *      unsigned channel = 0;
+ *      int reset = 1;
+ *      unsigned long long startTime = ...; // obtain current time
+ *      int rc;
+ *
+ *      while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ *         reset = 0;
+ *         unsigned long long currentTime = ...; // obtain current time
+ *         if ( currentTime - startTime > timeout )
+ *           break;
+ *      }
+ *
+ *      if ( rc == BGCNS_RC_COMPLETE )  {
+ *        // good path
+ *      }
+ *      else {
+ *        // timeout or error
+ *      }
+ *  @endcode
+ *
+ *  A node may opt out of a barrier channel via the disableBarrier service:
+ *
+ *  @code
+ *
+ *    // some other synchronization mechanism needs to go here
+ *
+ *    cns()->disableBarrier(channel);
+ *
+ *  @endcode
+ *
+ *  Conversely, it may opt back in:
+ *
+ *  @code
+ *    cns()->enableBarrier(channel, user_mode);
+ *  @endcode
+ *
+ *  By default, CNS reserves the use of channel 2 as a global interrupt for environmental
+ *  monitoring.  It also reserves channel 3 for use as a supervisory mode, compute-node
+ *  only barrier.  Compute node kernels are free to share this channel for the same
+ *  purpose (compute node, supervisory barrier).  The enable/disable barrier services
+ *  may return errors if operating on a reserved channel.
+ *
+ *  NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node
+ *  Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an
+ *  compute-node only barrier when jobs execute.
+ *
+ *
+ *  @section CNS_3 DMA Services
+ *
+ *  The DMA services provided in CNS are low-level services.  Interested readers of this area should
+ *  also look at the documentation for the DMA SPIs, which are at a slightly higher level.
+ *
+ *
+ *
+ *  @section CNS_4 Reserved and Preferred Addresses
+ *
+ *
+ *  The following virtual memory regions are reserved and must be avoided by
+ *  kernels:
+ *
+ *  @code
+ *
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | Lower      | Upper      | Size | Usage                | Attributes            |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | CNSlow[1]  | CNShigh[2] | 256K | CNS                  | I, Rs, Ws, Xs         |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *
+ *    [1] CNSlow  = descr->baseVirtualAddress , usually 0xFFF40000
+ *    [2] CNShigh = descr->baseVirtualAddress + descr->size - 1;  usually 0xFFF7FFFF
+ *
+ *  @endcode
+ *
+ *  The following virtual memory regions are used by default in CNS.  Kernels that wish to have
+ *  a different memory map may do so via the mapDevice service.
+ *
+ *  @code
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | Lower      | Upper      | Size | Usage                | Attributes            |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFB0000 | 0xFFFCFFFF |  64K | Torus                | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFD0000 | 0xFFFD3FFF |  16K | DMA                  | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFD9000 | 0xFFFD9FFF |   4K | DevBus               | I, G, Rs, Ws          |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDA000 | 0xFFFDAFFF |   4K | UPC                  | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDC000 | 0xFFFDD3FF |   4K | Collective           | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFDE000 | 0xFFFDEFFF |   4K | BIC                  | I, G, Rs, Ws, Xs      |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF0000 | 0xFFFF3FFF |  16K | Lockbox (supervisor) | I, G, Rs, Ws          |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF4000 | 0xFFFF7FFF |  16K | Lockbox (user)       | I, G, Rs, Ws, Ru, Wu  |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  | 0xFFFF8000 | 0xFFFFFFFF |  32K | SRAM                 | SWOA, WL1, Rs, Ws, Xs |
+ *  +------------+------------+------+----------------------+-----------------------+
+ *  @endcode
+ *
+ */
+
+
+#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */
+#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS
+
+/* ! @enum  BGCNS_InterruptType */
+/* ! @brief Defines the different types of interrupts known to */
+/* !        Common Node Services. */
+typedef enum  {
+    BGCNS_NonCritical,     //!< Non-critical interrupt
+    BGCNS_Critical,        //!< Critical interrupt
+    BGCNS_MachineCheck,    //!< Machine check
+} BGCNS_InterruptType;
+
+/* ! @enum   BGCNS_FifoOperation */
+/* ! @brief  Defines the types of FIFO operations */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaFifoControls */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaLocalCopies */
+/* ! @see    _BGCNS_ServiceDirectory::setDmaPriority */
+typedef enum {
+    BGCNS_Disable = 0,
+    BGCNS_Enable = 1,
+    BGCNS_Reenable = 2
+} BGCNS_FifoOperation;
+
+/* ! @enum BGCNS_FifoFacility */
+/* ! @brief Defines the various types of FIFO facilities */
+typedef enum {
+    BGCNS_InjectionFifo,                 //!< Normal Injection FIFO
+    BGCNS_ReceptionFifo,                 //!< Normal Reception FIFO
+    BGCNS_ReceptionHeaderFifo,           //!< Reception Header FIFO (typically used only for debugging)
+    BGCNS_InjectionFifoInterrupt,
+    BGCNS_ReceptionFifoInterrupt,
+    BGCNS_ReceptionHeaderFifoInterrupt,
+    BGCNS_InjectionCounterInterrupt,
+    BGCNS_ReceptionCounterInterrupt
+} BGCNS_FifoFacility;
+
+/* ! @enum  BGCNS_LinkType */
+/* ! @brief Defines the types of MAC links. */
+/* ! @see   _BGCNS_ServiceDirectory::macTestLink */
+typedef enum {
+    BGCNS_Transmitter,  //!< A transmitter link.
+    BGCNS_Receiver      //!< A receiver link.
+} BGCNS_LinkType;
+
+/* ! @enum  BGCNS_EnvmonParameter */
+/* ! @brief Enumerates the various environmental monitor parameters. */
+/* ! @see   _BGCNS_ServiceDirectory::getEnvmonParm */
+/* ! @see   _BGCNS_ServiceDirectory::setEnvmonParm */
+typedef enum {
+    BGCNS_envmon_period  = 0,
+    BGCNS_envmon_policy,
+    BGCNS_envmon_globintwire,
+
+     /*  temporary */
+    BGCNS_envmon_duration,
+    BGCNS_envmon_ddrratio,
+    BGCNS_envmon_numparms
+} BGCNS_EnvmonParameter;
+
+
+#define BGCNS_RC_COMPLETE  0         //!< Indicates that the operation completed normally.
+#define BGCNS_RC_CONTINUE  1         //!< Indicates that the operation is still in progress.
+#define BGCNS_RC_TIMEOUT  -1         //!< Indicates that the operation timed out.
+#define BGCNS_RC_ERROR    -2         //!< Indicates that the operation failed.
+
+#define BGCNS_NUM_DMA_RECEPTION_GROUPS           4
+#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP  8
+
+/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */
+/* !     The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */
+/* !     from the DMA packet. */
+/* ! */
+/* !     The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */
+/* !     and local copy. */
+typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP];
+
+/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */
+/* ! @see   _BGCNS_ServiceDirectory::enableInterrupt */
+#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu
+
+
+/* ! @enum   BGCNS_DeviceMasks */
+/* ! @brief  Provides a list of masks for various Blue Gene devices */
+
+typedef enum {
+    BGCNS_SRAM       = 0x80000000u,
+    BGCNS_BIC        = 0x40000000u,
+    BGCNS_Torus      = 0x20000000u,
+    BGCNS_DevBus     = 0x10000000u,
+    BGCNS_XEMAC      = 0x08000000u,
+    BGCNS_LockBox    = 0x04000000u,
+    BGCNS_Collective = 0x02000000u,
+    BGCNS_SRAM_Err   = 0x01000000u,
+    BGCNS_DMA        = 0x00800000u,
+    BGCNS_UPC        = 0x00400000u
+} BGCNS_DeviceMasks;
+
+/* ! @typedef BGCNS_ServiceDirectory */
+/* ! @struct  _BGCNS_ServiceDirectory */
+/* ! @brief   The service directory is a collection of function pointers to services */
+/* !          provided by the Common Node Services. */
+typedef struct _BGCNS_ServiceDirectory {
+
+    /*------------------------------------------*/
+    /*--- Informational services for the node --*/
+    /*------------------------------------------*/
+
+
+    int (*isIONode)(void);                             //!< Returns 1 if this is an I/O node; 0 if not.
+
+
+    /*-----------------------------------------------------------------*/
+    /*--- Informational services for obtaining Raw personality data ---*/
+    /*-----------------------------------------------------------------*/
+
+    unsigned int (*getPersonalitySize)(void);           //!< Returns the size (in bytes) of the Blue Gene personality.
+    void* (*getPersonalityData)(void);		      //!< Returns a pointer to the raw personality data.
+
+
+    /*-----------------------------------------------*/
+    /*--- Services for Symmetric Multi-Processing ---*/
+    /*-----------------------------------------------*/
+
+
+    unsigned (*getNumberOfCores)(void);                  //!< Returns the number of CPUs on this node.
+
+     /* ! @brief Called by the kernel to activate a CPU. */
+     /* ! @param[in] cpu The index of the cpu (core) to be activated. */
+     /* ! @param[in] entry The (kernel) entry point function.  This function will be invoked when */
+     /* !            the CPU is actually activated. */
+     /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */
+     /* ! @return Zero (0) if the CPU was succsessfully activated.  Non-zero if the CPU was not */
+     /* !            activated (e.g. invalid cpu argument, or the cpu has already been */
+     /* !            activated). */
+     /* ! @remarks   See Section x of the Common Node Services overview for details. */
+    int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+
+    /*--------------------------------------*/
+    /*--- Services for Blue Gene devices ---*/
+    /*--------------------------------------*/
+
+     /* ! @brief  Checks active devices for a clean termination state and returns 0 */
+     /* !         if everything is nominal.  Returns non-zero if any anomaly is */
+     /* !         detected and logs violations. */
+     /* ! @param[in] job_rc specifies the return code of the job that is terminating. */
+    int (*terminationCheck)(int job_rc);
+
+    /*-------------------------------*/
+    /*--- Services for interrupts ---*/
+    /*-------------------------------*/
+
+
+     /* ! @brief Enables the specified interrupt.  For all interrupts except inter-processor */
+     /* !        interrupts, the interrupt will bendled by the specified core. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @param[in] itype Specifies the type of interrupt that hardware will present */
+     /* !            for this group/irq. */
+     /* ! @param[in] core Specifies which core will handle the interrupt.  If specified as */
+     /* !            BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */
+     /* ! @return    Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+    int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core);
+
+     /* ! @brief Disables the specified interrupt. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @return    Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+    int (*disableInterrupt)(unsigned group, unsigned irq);
+
+     /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */
+     /* !        type and returns the group/IRQ.  This service is typically used in the */
+     /* !        context of an interrupt handler.  Since multiple interrupt conditions */
+     /* !        may be present, the service is typically invoked from the handler */
+     /* !        (along with corresponding acknowledgement) until the return code */
+     /* !        indicates that no more interrupts are present. */
+     /* ! @param[out] group Specifies the Blue Gene interrupt group.  The value is valid */
+     /* !        only when the return code is 0. */
+     /* ! @param[out] irq  Specifies the interrupt index within the group.  The value is */
+     /* !        valid only when the reutrn code is zero. */
+     /* ! @param[in] itype Specifies the type of interrupt being queried. */
+     /* ! @return Returns zero (0) if an interrupt condition of the specified type exists.  Returns -1 */
+     /* !        if no such condition exists. */
+    int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq);
+
+     /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */
+     /* !       condition in the interrupt controller hardware. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+     /* ! @return    Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */
+     /* !            (including the case of bad arguments). */
+     /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */
+     /* !       the interrupt; the hardware condition that triggered the interrupt may */
+     /* !       also need to be cleared. */
+    int (*acknowledgeInterrupt)(unsigned group, unsigned irq);
+
+     /* ! @brief Raises the specified interrupt. */
+     /* ! @param[in] group Specifies the Blue Gene interrupt group */
+     /* ! @param[in] irq  Specifies the interrupt index within the group */
+    int (*raiseInterrupt)(unsigned group, unsigned irq);
+
+
+    /*------------------------*/
+    /*--- Mailbox services ---*/
+    /*------------------------*/
+
+    unsigned (*getMailboxMaximumConsoleInputSize)(void);   //!< Returns the actual maximum console message input data size.
+    unsigned (*getMailboxMaximumConsoleOutputSize)(void);  //!< Returns the actual maximum console message output data size.
+
+     /* ! @brief Writes a text message to the output mailbox. */
+     /* ! @param[in] msg a pointer to the message to be written. */
+     /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+     /* ! @remarks As with all common services, the message data area must be mapped via */
+     /* !          the TLB when the service is called.  The behavior is not defined if this */
+     /* !          is not the case. */
+     /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+     /*            wrong (including a message that is too large). */
+    int (*writeToMailboxConsole)(char *msg, unsigned msglen);
+
+     /* ! @brief Writes a text message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] msg a pointer to the message to be written. */
+     /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+     /* ! @remarks As with all common services, the message data area must be mapped via */
+     /* !          the TLB when the service is called.  The behavior is not defined if this */
+     /* !          is not the case. */
+     /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+     /*            wrong (including a message that is too large). */
+    int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen);
+
+     /* ! @brief Tests the outbox to see if the last message was picked up by the control */
+     /* !        system. */
+     /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */
+     /* ! @remarks Typically the caller will invoke this service after having called */
+     /* !        writeToMailboxConsole_nonBlocking and will then invoke this service in a */
+     /* !        loop until zero is returned. */
+    int (*testForOutboxCompletion)(void);
+
+     /* ! @brief Reads a message from the input mail box. */
+     /* ! @param msg a pointer to a data area into which the message will be placed. */
+     /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */
+     /* !        that may be safely received into the buffer. */
+     /* ! @return The actual length of the message (0 if no message was receieved). */
+     /* ! @remarks As with all common services, the message data area must be mapped */
+     /* !          via the TLB when this service is called.  The results are not defined if */
+     /* !          this is not the case. */
+    unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize);
+
+    int (*testInboxAttention)(void);                      //!< Returns 1 if something is available in the input mailbox.
+
+    int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use.
+
+    int (*writeToMailbox)(void* message, unsigned length, unsigned cmd);
+
+    /*------------------------------------*/
+    /*---  RAS and diagnostic services ---*/
+    /*------------------------------------*/
+
+     /* ! @brief TBD */
+    void (*machineCheck)(void *regs);
+
+     /* ! @brief Writes a RAS event to the log. */
+     /* ! @param[in] facility The facility (aka component). */
+     /* ! @param[in] unit The unit (aka subcomponent). */
+     /* ! @param[in] err_code The error code. */
+     /* ! @param[in] numDetails The number of additional details. */
+     /* ! @param[in] details The list of additional details. */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+     /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+    int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+     /* ! @brief Writes a RAS string to the log. */
+     /* ! @param[in] facility The facility (aka component). */
+     /* ! @param[in] unit The unit (aka subcomponent). */
+     /* ! @param[in] err_code The error code. */
+     /* ! @param[in] str The message string being written (ASCII encoded, null-terminated).  Note that the length of this string is */
+     /* !     limited to _BGP_RAS_ASCII_MAX_LEN characters.  The implementation may choose to truncate the string if it exceeds this */
+     /* !     length. */
+     /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+     /* !      string was truncated). */
+     /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+    int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+
+    /*---------------------------------*/
+    /*--- Global Interrupt services ---*/
+    /*---------------------------------*/
+
+     /* ! @brief A global (compute node) barrier.  This call will block until all other compute nodes */
+     /* !        in the partition also arrive at the barrier. */
+    int (*globalBarrier)(void);
+
+     /* ! @brief  A global (compute node) barrier.  This call will block until all other compute nodes */
+     /* !         in the partition also arrive at the barrier or until the timeout is reached. */
+     /* ! @param  timeoutInMillis specifies the timeout duration.  Units are milliseconds. */
+     /* ! @return BGCNS_RC_COMPLETE if the barrier completed.  BGCNS_RC_TIMEOUT if the barrier timed */
+     /* !         out.  BGCNS_RC_ERROR if some other error occurred. */
+    int (*globalBarrierWithTimeout)(unsigned timeoutInMillis);
+
+
+
+    /*-------------------------*/
+    /*---  Network services ---*/
+    /*-------------------------*/
+
+
+    void (*initializeNetworks)(void);  //!< @todo Is this is going away??? Talk to Andy
+
+    void (*_no_longer_in_use_381)(void); //!< @warning Do not use
+
+    void (*_no_longer_in_use_384)(void);//!< @warning Do not use
+
+
+    /*--------------------------*/
+    /*---  DMA unit services ---*/
+    /*--------------------------*/
+
+#define BGCNS_DMA_CAPTURE_X_PLUS         0   //!< watch the X+ receiver
+#define BGCNS_DMA_CAPTURE_X_MINUS        1   //!< watch the X- receiver
+#define BGCNS_DMA_CAPTURE_Y_PLUS         2   //!< watch the Y+ receiver
+#define BGCNS_DMA_CAPTURE_Y_MINUS        3   //!< watch the Y- receiver
+#define BGCNS_DMA_CAPTURE_Z_PLUS         4   //!< watch the Z+ receiver
+#define BGCNS_DMA_CAPTURE_Z_MINUS        5   //!< watch the Z- receiver
+#define BGCNS_DMA_CAPTURE_DISABLE        7   //!< disable link capturing
+
+     /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */
+     /* !        receiver (or disable). */
+     /* ! @param[in] link Specifies the link being monitored.  Use the BGCNS_DMA_CAPTURE_* */
+     /* !        mnemonics defined above. */
+     /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */
+     /* !        link was specified). */
+    int (*setDmaLinkCapture)(int link);
+
+     /* ! @brief Clears the link capture unit so that another packet can be captured. */
+    void (*clearDmaLinkCapture)(void);
+
+#define BGCNS_RC_DMA_NO_PACKET_CAPTURED      0
+#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR     -1
+#define BGCNS_RC_DMA_DATA_CONFLICT          -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured
+#define BGCNS_RC_DMA_DATA_CONFLICT2         -3 //!< if bad packet is captured, but all the bytes are the same
+     /* ! @brief Reads the DMA link capture packets. */
+    int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size);
+
+
+#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF
+
+     /* ! @brief Sets FIFO controls for the DMA unit. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */
+     /* ! The FIFOs are organized into four groups of 32.  The mask argument is a bit mask (bit i controls the i-th imFIFO */
+     /* ! within that group, that is the (group*32)+i imFIFO. */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */
+     /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */
+     /* ! group.  The mask argument is ignored.  Note that the header FIFO is typically used for debugging. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */
+     /* ! specified injection FIFO.  Threshold interrupts occur if available space is less than the configured */
+     /* ! threshold when the FIFO is used for a remote get operation.  The group and mask arguments are as */
+     /* ! described in the BGCNS_InjectionFifo operation (above). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */
+     /* ! reception FIFO(s).  If enabled, an interrupt will occur when the reception FIFO's available space drops */
+     /* ! below the configured threshold.  The group argument selects the interrupt type (type 0, 1, 2 or 3). */
+     /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */
+     /* ! reception header FIFO.  Reception header FIFOs are used for debug purposes only. */
+     /* ! */
+     /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */
+     /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3.  The mask */
+     /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */
+     /* ! are partitioned into 32 subgroups of 8 counters). */
+     /* ! */
+     /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */
+     /* ! for reception counters.  The group and mask arguments are the as as described in the the */
+     /* ! BGCNS_InjectionCounterInterrupt operation (above). */
+     /* ! */
+     /* ! The buffer argument is used as a means to save/restore in an opaque manner.  This is achieved by passing */
+     /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */
+     /* ! operation (the buffer is used to snapshot state). */
+     /* ! */
+     /* ! */
+     /* ! @code */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | Facility                        | group     | mask    | Notes | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionFifo             | 0..3      | 32 bits | [1]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionFifo             | n/a       | 32 bits | [2]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionHeaderFifo       | 0..3, ALL | N/A     |       | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionFifoInterrupt    | 0..3      | 32 bits | [1]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionFifoInterrupt    | 0..3      | 32 bits | [3]   | */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_InjectionCounterInterrupt | 0..3      | 32 bits | [3][4]| */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* !   | BGCNS_ReceptionCounterInterrupt | 0..3      | 32 bits | [3][4]| */
+     /* !   +---------------------------------+-----------+---------+-------+ */
+     /* ! */
+     /* !     [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */
+     /* !     [2] There are 32 normal reception FIFOs in BG/P. */
+     /* !     [3] There are 4 interrupt lines.  The group argument selects one these 4. */
+     /* !     [4] There are 256 counters of each type (injection and reception).  The */
+     /* !         32-bit mask partitions them into groups of 8. */
+     /* ! */
+     /* ! @endcode */
+     /* ! */
+     /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */
+     /* ! @param[in] facility defines the type of FIFO being configured. */
+     /* ! @param[in] group is interpreted differently based on the facility. */
+     /* ! @param[in] mask is interpreted differently based on the facility. */
+     /* ! @param[out] buffer is interpreted differently based on the operation and facility.  It is generally used to capture */
+     /* !   a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored).  It is */
+     /* !   generally used as the value to be loaded in a re-enable operation.  In this manner, a state value captured by an enable */
+     /* !   operation may be easily restored by a subsequent re-enable operation.  The buffer argument is generally ignored by */
+     /* !   disable operations. */
+    int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer);
+
+     /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */
+     /* !   given group). */
+     /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */
+     /* !   physical torus injection FIFOs are mapped.  Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */
+     /* !   group 1.  Bits 3 and 7 are the high priority FIFOs. */
+     /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */
+     /* ! @return Zero if the map was properly set.  Non-zero if it was not, including the case of illegal arguments. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */
+
+    int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos);
+
+     /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs.  A local copy injection FIFO */
+     /* !   can be used to perform memory copies within a node via the DMA engine. */
+     /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs.  The BGCNS_Reenable */
+     /* !   operation is not supported. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+     /* ! @return Zero if the operation succeeded; non-zero if it did not. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3. */
+    int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+     /* ! @brief Enables or disables the priority bit for the specified injection FIFOs.  The priority bit */
+     /* !   is used by the hardware arbitration (details are not further documented here). */
+     /* ! @param[in] operation specifies whether priority bits are being set or cleared. */
+     /* ! @param[in] group specifies the injection FIFO group. */
+     /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+     /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32.  So the legal range of the group */
+     /* !   argument is 0..3. */
+    int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+     /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs.  The hardware supports */
+     /* !   8 torus FIFOs (six torus dimensions plus high priority plus local copy).  Furthermore, the hardware supports */
+     /* !   4 groups as derived from the PID0 and PID1 bits of the DMA packet.  Thus the mapping is a 4 x 8 matrix of */
+     /* !   reception FIFO ids. */
+     /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */
+     /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold).  For BGP, */
+     /* !   N=2 (there are 32 normal reception FIFOs). */
+     /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold).  For */
+     /* !   BGP, N=4 (there are 4 reception header FIFOs).  Note that reception header FIFOs are typically only used for debugging purposes. */
+     /* ! @param[in] threshold is an array of N threshold values.  The value threshold[i] specifies the threshold value for reception */
+     /* !   FIFO type i.  If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */
+     /* !   below its threshold, an interrupt is driven.  For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */
+    int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]);
+
+     /* ! @brief Gets the reception map. */
+     /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */
+    int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]);
+
+
+     /* ! @deprecated */
+    int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void);
+
+
+     /* ! @brief Resets the MAC unit's PHY. */
+     /* ! @return Zero if the unit was properly reset.  Returns non-zero if some error occurred. */
+     /* ! @deprecated See macResetPHY_nonBlocking. */
+    int (*macResetPHY)(void);
+
+     /* ! @brief Tests the MAC unit's link. */
+     /* ! @param[in] link_type specifies the type of link to be tested. */
+     /* ! @return One (1) if the link is active; zero (0) if it is not. */
+     /* ! @deprecated See macTestLink_nonBlocking */
+    int (*macTestLink)(BGCNS_LinkType link_type);
+
+     /* ! @brief Reads one of the MAC's XGMII registers. */
+     /* ! @param[in] device_address */
+     /* ! @param[in] port_address */
+     /* ! @param[in] register_address */
+     /* ! @return The register's value or a negative number if some error occurred. */
+     /* ! @deprecated Low level MAC register access is being eliminated. */
+    int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address);
+
+     /* ! @brief Writes one of the MAC's XGMII registers. */
+     /* ! @param[in] device_address */
+     /* ! @param[in] port_address */
+     /* ! @param[in] register_address */
+     /* ! @param[in] value */
+     /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */
+     /* ! @deprecated Low level MAC register access is being eliminated. */
+    int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value);
+
+
+     /* ! @brief Trains SerDes in a non-blocking manner.  The standard usage is to inititate */
+     /* !      training with trainSerDes(1), check the return code, and then continue to invoke */
+     /* !      trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */
+     /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */
+     /* !      continuations. */
+     /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */
+     /* !      the service again (with reset=0).  BGCNS_RC_COMPLETE if training is complete. */
+     /* !      BGCNS_ERROR if some error has occurred. */
+    int (*trainSerDes)(int reset);
+
+     /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */
+     /* ! @param[in] parameter Parameter to retrieve.  Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+     /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */
+     /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+    int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value);
+
+     /* ! @brief Stores a value to the specified control parameter of the environmental monitor */
+     /* ! @param[in] parameter Parameter to store.  Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+     /* ! @param[in] value New value for the parameter */
+     /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+    int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value);
+
+     /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */
+     /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */
+     /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */
+     /* !            time the operating system needs to call performEnvMgmt.  Failure to do so may result in poorly performing */
+     /* !            nodes or shutdown of the block / rack. */
+    int (*performEnvMgmt)(unsigned long long* nextCallbackTime);
+
+
+     /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] facility The facility (aka component).  See bgp_ras.h for a list of facilities. */
+     /* ! @param[in] unit The unit (aka subcomponent).  See bgp_ras.h for a list of units. */
+     /* ! @param[in] err_code The error code.  See bgp_ras.h for a list of error code.s */
+     /* ! @param[in] numDetails The number of additional details. */
+     /* ! @param[in] details The list of additional details. */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+    int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+     /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+     /* !        response back from the control system.  When this service is used, */
+     /* !        the caller must poll for completion using the testForOutboxCompletion */
+     /* !        service. */
+     /* ! @param[in] facility The facility (aka component).  See bgp_ras.h for a list of facilities. */
+     /* ! @param[in] unit The unit (aka subcomponent).  See bgp_ras.h for a list of units. */
+     /* ! @param[in] err_code The error code.  See bgp_ras.h for a list of error code.s */
+     /* ! @param[in] str The message string being written (ASCII encoded, null-terminated).  Note that the length of this string is */
+     /* !     limited to _BGP_RAS_ASCII_MAX_LEN characters.  The implementation may choose to truncate the string if it exceeds this */
+     /* !     length. */
+     /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+     /* !      string was truncated). */
+     /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+    int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+     /* ! @brief Sets the core's timebase registers to the specified value. */
+     /* ! @param[in] newtime The new 64-bit timebase */
+     /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */
+     /* ! @deprecated */
+    int (*synchronizeTimebase)(unsigned long long newtime);
+
+     /* ! @brief Sets the node's DMA physical protection settings. */
+     /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */
+     /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */
+    int (*dmaSetRange)(unsigned numreadranges,  unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr,
+			 unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr);
+
+     /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */
+     /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */
+     /* ! @return Zero if successful, non-zero if some error condition occurred. */
+    int (*statusCheck)(unsigned clear_error_counts);
+
+     /* ! @brief Stops the DMA and clears any reception unit failure */
+    int (*stopDma)(void);
+
+     /* ! @brief Starts the DMA */
+    int (*startDma)(void);
+
+     /* ! @brief Performs a hard exit.  The status code is provided to the control system. */
+     /* ! @return This service never returns. */
+    void (*exit)(int rc);
+
+     /* ! @brief Resets the MAC unit's PHY but does not block. */
+     /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+     /* !     reset sequence.  That is, callers should initiate a reset sequence with reset=1 and then */
+     /* !     if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */
+     /* !     reset=0. */
+     /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+     /* !     before returning.  If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+     /* !     to indicate that it needs additional time. */
+     /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset.  BGCNS_RC_CONTINUE if the reset operation is */
+     /* !     not yet complete.  BGCNS_RC_ERROR if the reset operation failed. */
+    int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis);
+
+     /* ! @brief Tests the MAC unit's link but does not block. */
+     /* ! @param[in] link_type specifies the type of link to be tested. */
+     /* ! @param[out] result points to the link status, which is valid only when the return code is */
+     /* !     BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+     /* !     indicates that it is inactive. */
+     /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+     /* !     test link sequence.  That is, callers should initiate a sequence with reset=1 and then */
+     /* !     if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+     /* !     reset=0. */
+     /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+     /* !     before returning.  If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+     /* !     to indicate that it needs additional time. */
+     /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+     /* !     if the reset operation is not yet complete.  BGCNS_RC_ERROR if the reset operation failed. */
+    int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+    void * _not_in_use_1068;
+    void * _not_in_use_1069;
+
+
+     /* ! @brief Indicates that a new job is about to start. */
+     /* ! @return Zero (0) if CNS is ready for a new job to start.  Returns non-zero otherwise. */
+    int (*startNextJob)(void);
+
+     /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */
+     /* !     given device.  When a device is remapped, CNS will no longer make any attempt to map */
+     /* !     a TLB to access that device -- it is the responsibility of the kernel to handle the */
+     /* !     TLB either  proactively or reactively (via a fault). */
+     /* ! @param[in] device specifies the device being mapped. */
+     /* ! @param[in] base_address is the root virtual address of the device.  The address should be */
+     /* !     naturally aligned (relative to the size of the device).  See the seciton Reserved and */
+     /* !     Preferred Addresses for more information. */
+     /* ! @return Zero (0) if the device was successfully remapped.  Returns non-zero if it was not. */
+     /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */
+     /* !    possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */
+     /* !    (that is, takeCPU has been called for all cores). */
+    int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address);
+
+     /* ! @brief Enables barriers on the specified channel. */
+     /* ! @param channel specifies the channel being enabled. */
+     /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */
+     /* ! @return Zero if global barriers were enabled.  Returns non-zero if the request could not be */
+     /* !        completed, including the case of attempting to enable a reserved channel. */
+    int (*enableBarrier)(unsigned int channel, int user_mode);
+
+     /* ! @brief Disables barriers on the specified channel. */
+     /* ! @return Zero if global barriers were disabled.  Returnsnon-zero if the request could not be */
+     /* !        completed, including the case of attempting to disable a reserved channel. */
+    int (*disableBarrier)(unsigned int channel);
+
+     /* ! @brief A global barrier that does not block indefinitely. */
+     /* ! @param channel indicates the GLINT hardware channel to use. */
+     /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */
+     /* !   sequence.  That is, caller should inititate a barrier operation by passing reset=1 and then, */
+     /* !   if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */
+     /* !   reset=0. */
+     /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */
+     /* !   to wait for barrier participants before returning to the caller. */
+     /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier.  BGCNS_RC_CONTINUE */
+     /* !   indicates that not all partipants arrived within the alloted timeout period.  BGCNS_RC_ERROR */
+     /* !   indicates that other problem has been detected. */
+     /* ! @remarks This service is not thread safe.  It is considered a programming error to invoke it */
+     /* !   from multiple threads concurrently and the behavior is not defined. */
+    int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis);
+
+     /* ! @brief Restart kernel in cycle reproducibility mode. */
+     /* ! @return Zero if no restart was required for reproducibility. */
+     /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */
+     /* !   Processors will be reset and kernels will start again. */
+    int (*setupReproducibility)(void);
+
+} BGCNS_ServiceDirectory;
+
+/* ! @deprecated */
+/* ! @typedef BGCNS_DeprecatedServicesDirectory */
+/* ! @struct  _BGCNS_DeprecatedServices */
+/* ! @brief   These services exist for historical reasons and are not further documented here. */
+/* !          They may not be available in future releases of CNS. */
+typedef struct _BGCNS_DeprecatedServices {
+    int (*torusTermCheck)(int* nonFatalRc);
+    int (*torusLinkErrCheck)(int* nonFatalRc);
+    int (*torusCRCExchange)(void);
+    int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier);
+    int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier);
+    unsigned (*collectiveGetClass)(unsigned virtualTree);
+    int (*collectiveInit)(void);
+    int (*collectiveRelease)(void);
+    int (*collectiveHardReset)(void);
+    int (*netbusTermCheck)(void);
+    unsigned (*getSerDesLinkStatus)(void);
+    int  (*dmaTermCheck)(void);
+} BGCNS_DeprecatedServicesDirectory;
+
+/* ! @typedef BGCNS_Descriptor */
+/* ! @struct  _BGCNS_Descriptor */
+/* ! @brief  The Common Node Services descriptor.  This descriptor provides information to the kernel regarding */
+/* !         the CNS memory region as well as a service directory.  The descriptor is passed to the kernel */
+/* !         upon boot and must not be altered by the kernel. */
+typedef struct _BGCNS_Descriptor {
+    BGCNS_ServiceDirectory* services;         //!< A pointer to the services directory.
+    unsigned baseVirtualAddress;	      //!< The virtual address of the beginning of the CNS memory region.
+    unsigned size;			      //!< The size (in bytes) of the CNS memory region.
+    unsigned basePhysicalAddress;             //!< The physical address of the CNS memory region.
+    unsigned basePhysicalAddressERPN;         //!< The extended real page number of the CNS memory region.
+    unsigned bgcns_private_in_use;            //!< Undefined.  This field is for internal use only and may disappear at any time.
+    BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented
+    unsigned version;                         //!< The CNS version
+} BGCNS_Descriptor;
+
+
+
+#endif /* !__ASSEMBLY */
+#endif /* _BGCNS_H */
diff --git a/arch/powerpc/include/asm/bgp_personality.h b/arch/powerpc/include/asm/bgp_personality.h
new file mode 100644
index 0000000..37c9161
--- /dev/null
+++ b/arch/powerpc/include/asm/bgp_personality.h
@@ -0,0 +1,1086 @@
+/*
+ * Andrew Tauferner
+ *
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef	BGP_PERSONALITY_H_ // Prevent multiple inclusion
+#define	BGP_PERSONALITY_H_
+
+
+
+
+/* #include <linux/types.h> */
+
+// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields
+//  b = IBM bit number of the least significant bit (highest number)
+//  x = value to set in field
+//  s = size
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b)    ((1<<(31-(b))))
+#define _B1(b,x)  (((x)&0x1)<<(31-(b)))
+#define _B2(b,x)  (((x)&0x3)<<(31-(b)))
+#define _B3(b,x)  (((x)&0x7)<<(31-(b)))
+#define _B4(b,x)  (((x)&0xF)<<(31-(b)))
+#define _B5(b,x)  (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x)  (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x)  (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x)  (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x)  (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#define BGP_UCI_Component_Rack              ( 0)
+#define BGP_UCI_Component_Midplane          ( 1)
+#define BGP_UCI_Component_BulkPowerSupply   ( 2)
+#define BGP_UCI_Component_PowerCable        ( 3)
+#define BGP_UCI_Component_PowerModule       ( 4)
+#define BGP_UCI_Component_ClockCard         ( 5)
+#define BGP_UCI_Component_FanAssembly       ( 6)
+#define BGP_UCI_Component_Fan               ( 7)
+#define BGP_UCI_Component_ServiceCard       ( 8)
+#define BGP_UCI_Component_LinkCard          ( 9)
+#define BGP_UCI_Component_LinkChip          (10)
+#define BGP_UCI_Component_LinkPort          (11)  // Identifies 1 end of a LinkCable
+#define BGP_UCI_Component_NodeCard          (12)
+#define BGP_UCI_Component_ComputeCard       (13)
+#define BGP_UCI_Component_IOCard            (14)
+#define BGP_UCI_Component_DDRChip           (15)
+#define BGP_UCI_Component_ENetConnector     (16)
+
+typedef struct BGP_UCI_Rack_t
+                {                           // "Rxy": R<RackRow><RackColumn>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Rack
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_Rack_t;
+
+#define BGP_UCI_RACK_COMPONENT(x)              _B5( 4,x)  // when BGP_UCI_Component_Rack
+#define BGP_UCI_RACK_RACKROW(x)                _B4( 8,x)  // 0..F
+#define BGP_UCI_RACK_RACKCOLUMN(x)             _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_Midplane_t
+                {                           // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Midplane
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned _zero       : 18;  // zero's
+                }
+                BGP_UCI_Midplane_t;
+
+#define BGP_UCI_MIDPLANE_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_Midplane
+#define BGP_UCI_MIDPLANE_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_MIDPLANE_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_MIDPLANE_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+
+
+typedef struct BGP_UCI_BulkPowerSupply_t
+                {                           // "Rxy-B": R<RackRow><RackColumn>-B
+                unsigned Component   :  5;  // when BGP_UCI_Component_BulkPowerSupply
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_BulkPowerSupply_t;
+
+#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x)   _B5( 4,x)  // when BGP_UCI_Component_BulkPowerSupply
+#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x)     _B4( 8,x)  // 0..F
+#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x)  _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_PowerCable_t
+                {                           // "Rxy-B-C": R<RackRow><RackColumn>-B-C
+                unsigned Component   :  5;  // when BGP_UCI_Component_PowerCable
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_PowerCable_t;
+
+#define BGP_UCI_POWERCABLE_COMPONENT(x)        _B5( 4,x)  // when BGP_UCI_Component_PowerCable
+#define BGP_UCI_POWERCABLE_RACKROW(x)          _B4( 8,x)  // 0..F
+#define BGP_UCI_POWERCABLE_RACKCOLUMN(x)       _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_PowerModule_t
+                {                           // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule>
+                unsigned Component   :  5;  // when BGP_UCI_Component_PowerModule
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned PowerModule :  3;  // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+                unsigned _zero       : 16;  // zero's
+                }
+                BGP_UCI_PowerModule_t;
+
+#define BGP_UCI_POWERMODULE_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_POWERMODULE_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_POWERMODULE_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_POWERMODULE_POWERMODULE(x)     _B3(15,x)  // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+
+
+typedef struct BGP_UCI_ClockCard_t
+                {                           // "Rxy-K": R<RackRow><RackColumn>-K
+                unsigned Component   :  5;  // when BGP_UCI_Component_ClockCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned _zero       : 19;  // zero's
+                }
+                BGP_UCI_ClockCard_t;
+
+#define BGP_UCI_CLOCKCARD_COMPONENT(x)         _B5( 4,x)  // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_CLOCKCARD_RACKROW(x)           _B4( 8,x)  // 0..F
+#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x)        _B4(12,x)  // 0..F
+
+
+
+typedef struct BGP_UCI_FanAssembly_t
+                {                           // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>
+                unsigned Component   :  5;  // when BGP_UCI_Component_FanAssembly
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned FanAssembly :  4;  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+                unsigned _zero       : 14;  // zero's
+                }
+                BGP_UCI_FanAssembly_t;
+
+#define BGP_UCI_FANASSEMBLY_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_FanAssembly
+#define BGP_UCI_FANASSEMBLY_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_FANASSEMBLY_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x)     _B4(17,x)  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+
+
+
+typedef struct BGP_UCI_Fan_t
+                {                           // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan>
+                unsigned Component   :  5;  // when BGP_UCI_Component_Fan
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned FanAssembly :  4;  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+                unsigned Fan         :  2;  // 0..2 (0=Tailstock, 2=Midplane)
+                unsigned _zero       : 12;  // zero's
+                }
+                BGP_UCI_Fan_t;
+
+#define BGP_UCI_FAN_COMPONENT(x)               _B5( 4,x)  // when BGP_UCI_Component_Fan
+#define BGP_UCI_FAN_RACKROW(x)                 _B4( 8,x)  // 0..F
+#define BGP_UCI_FAN_RACKCOLUMN(x)              _B4(12,x)  // 0..F
+#define BGP_UCI_FAN_MIDPLANE(x)                _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_FAN_FANASSEMBLY(x)             _B4(17,x)  // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+#define BGP_UCI_FAN_FAN(x)                     _B2(19,x)  // 0..2 (0=Tailstock, 2=Midplane)
+
+typedef struct BGP_UCI_ServiceCard_t
+                {                           // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S
+                unsigned Component   :  5;  // when BGP_UCI_Component_ServiceCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top (Master ServiceCard in M0)
+                unsigned _zero       : 18;  // zero's
+                }
+                BGP_UCI_ServiceCard_t;
+
+#define BGP_UCI_SERVICECARD_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_ServiceCard
+#define BGP_UCI_SERVICECARD_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_SERVICECARD_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_SERVICECARD_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top (Master ServiceCard in M0)
+
+
+
+typedef struct BGP_UCI_LinkCard_t
+                {                           // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned _zero       : 16;  // zero's
+                }
+                BGP_UCI_LinkCard_t;
+
+#define BGP_UCI_LINKCARD_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkCard
+#define BGP_UCI_LINKCARD_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKCARD_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKCARD_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCARD_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+
+
+
+typedef struct BGP_UCI_LinkChip_t
+                {                           // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkChip
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned LinkChip    :  3;  // 00..05: left to right from Front
+                unsigned _zero       : 13;  // zero's
+                }
+                BGP_UCI_LinkChip_t;
+
+#define BGP_UCI_LINKCHIP_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkChip
+#define BGP_UCI_LINKCHIP_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKCHIP_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKCHIP_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCHIP_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKCHIP_LINKCHIP(x)           _B3(18,x)  // 00..05: left to right from Front
+
+typedef struct BGP_UCI_LinkPort_t
+                {                           // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort>
+                unsigned Component   :  5;  // when BGP_UCI_Component_LinkPort
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned LinkCard    :  2;  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+                unsigned LinkPort    :  4;  // 00..15: left to right from Front
+                unsigned _zero       : 12;  // zero's
+                }
+                BGP_UCI_LinkPort_t;
+
+#define BGP_UCI_LINKPORT_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_LinkPort
+#define BGP_UCI_LINKPORT_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_LINKPORT_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_LINKPORT_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_LINKPORT_LINKCARD(x)           _B2(15,x)  // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKPORT_LINKPORT(x)           _B4(19,x)  // 00..15: left to right from Front
+
+
+typedef struct BGP_UCI_NodeCard_t
+                {                           // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_NodeCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned _zero       : 14;  // zero's
+                }
+                BGP_UCI_NodeCard_t;
+
+#define BGP_UCI_NODECARD_COMPONENT(x)          _B5( 4,x)  // when BGP_UCI_Component_NodeCard
+#define BGP_UCI_NODECARD_RACKROW(x)            _B4( 8,x)  // 0..F
+#define BGP_UCI_NODECARD_RACKCOLUMN(x)         _B4(12,x)  // 0..F
+#define BGP_UCI_NODECARD_MIDPLANE(x)           _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_NODECARD_NODECARD(x)           _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+
+
+
+typedef struct BGP_UCI_ComputeCard_t
+                {                           // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_ComputeCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned _zero       :  8;  // zero's
+                }
+                BGP_UCI_ComputeCard_t;
+
+#define BGP_UCI_COMPUTECARD_COMPONENT(x)       _B5( 4,x)  // when BGP_UCI_Component_ComputeCard
+#define BGP_UCI_COMPUTECARD_RACKROW(x)         _B4( 8,x)  // 0..F
+#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x)      _B4(12,x)  // 0..F
+#define BGP_UCI_COMPUTECARD_MIDPLANE(x)        _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_COMPUTECARD_NODECARD(x)        _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_COMPUTECARD_COMPUTECARD(x)     _B6(23,x)  // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+typedef struct BGP_UCI_IOCard_t
+                {                           // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+                unsigned Component   :  5;  // when BGP_UCI_Component_IOCard
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned _zero       :  8;  // zero's
+                }
+                BGP_UCI_IOCard_t;
+
+#define BGP_UCI_IOCARD_COMPONENT(x)            _B5( 4,x)  // when BGP_UCI_Component_IOCard
+#define BGP_UCI_IOCARD_RACKROW(x)              _B4( 8,x)  // 0..F
+#define BGP_UCI_IOCARD_RACKCOLUMN(x)           _B4(12,x)  // 0..F
+#define BGP_UCI_IOCARD_MIDPLANE(x)             _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_IOCARD_NODECARD(x)             _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_IOCARD_COMPUTECARD(x)          _B6(23,x)  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+
+typedef struct BGP_UCI_DDRChip_t
+                {                           // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip>
+                unsigned Component   :  5;  // when BGP_UCI_Component_DDRChip
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned ComputeCard :  6;  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+                unsigned DDRChip     :  5;  // 00..20
+                unsigned _zero       :  3;  // zero's
+                }
+                BGP_UCI_DDRChip_t;
+
+#define BGP_UCI_DDRCHIP_COMPONENT(x)           _B5( 4,x)  // when BGP_UCI_Component_DDRChip
+#define BGP_UCI_DDRCHIP_RACKROW(x)             _B4( 8,x)  // 0..F
+#define BGP_UCI_DDRCHIP_RACKCOLUMN(x)          _B4(12,x)  // 0..F
+#define BGP_UCI_DDRCHIP_MIDPLANE(x)            _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_DDRCHIP_NODECARD(x)            _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_DDRCHIP_COMPUTECARD(x)         _B6(23,x)  // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+#define BGP_UCI_DDRCHIP_DDRCHIP(x)             _B5(28,x)  // 00..20
+
+
+typedef struct BGP_UCI_ENetConnector_t
+                {                           // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN>
+                unsigned Component   :  5;  // when BGP_UCI_Component_ENetConnector
+                unsigned RackRow     :  4;  // 0..F
+                unsigned RackColumn  :  4;  // 0..F
+                unsigned Midplane    :  1;  // 0=Bottom, 1=Top
+                unsigned NodeCard    :  4;  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+                unsigned EN          :  1;  // 0..1 (Equal to IOCard number)
+                unsigned _zero       : 13;  // zero's
+                }
+                BGP_UCI_ENetConnector_t;
+
+#define BGP_UCI_ENETCONNECTOR_COMPONENT(x)     _B5( 4,x)  // when BGP_UCI_Component_ENetConnector
+#define BGP_UCI_ENETCONNECTOR_RACKROW(x)       _B4( 8,x)  // 0..F
+#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x)    _B4(12,x)  // 0..F
+#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x)      _B1(13,x)  // 0=Bottom, 1=Top
+#define BGP_UCI_ENETCONNECTOR_NODECARD(x)      _B4(17,x)  // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x)  // 0..1 (Equal to IOCard number)
+
+
+
+typedef union  TBGP_UniversalComponentIdentifier
+                {
+                uint32_t                   UCI;
+                BGP_UCI_Rack_t            Rack;
+                BGP_UCI_Midplane_t        Midplane;
+                BGP_UCI_BulkPowerSupply_t BulkPowerSupply;
+                BGP_UCI_PowerCable_t      PowerCable;
+                BGP_UCI_PowerModule_t     PowerModule;
+                BGP_UCI_ClockCard_t       ClockCard;
+                BGP_UCI_FanAssembly_t     FanAssembly;
+                BGP_UCI_Fan_t             Fan;
+                BGP_UCI_ServiceCard_t     ServiceCard;
+                BGP_UCI_LinkCard_t        LinkCard;
+                BGP_UCI_LinkChip_t        LinkChip;
+                BGP_UCI_LinkPort_t        LinkPort;
+                BGP_UCI_NodeCard_t        NodeCard;
+                BGP_UCI_ComputeCard_t     ComputeCard;
+                BGP_UCI_IOCard_t          IOCard;
+                BGP_UCI_DDRChip_t         DDRChip;
+                BGP_UCI_ENetConnector_t   ENetConnector;
+                }
+                BGP_UniversalComponentIdentifier;
+
+
+
+#define BGP_PERSONALITY_VERSION (0x0A)
+
+#define BGP_DEFAULT_FREQ (850) 
+
+#define BGP_PERS_PROCESSCONFIG_DIAGS      (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0
+#define BGP_PERS_PROCESSCONFIG_SMP        (0x0F000000) // All Cores Enabled User-Space in Process 0
+#define BGP_PERS_PROCESSCONFIG_VNM        (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes)
+#define BGP_PERS_PROCESSCONFIG_2x2        (0x0C030000) // 2 Processes of 2 Cores each in same DP unit
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_3PLUS1     (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process
+#define BGP_PERS_PROCESSCONFIG_DEFAULT    (BGP_PERS_PROCESSCONFIG_DIAGS)
+
+
+// Personality.Kernel_Config.RASPolicy
+#define BGP_PERS_RASPOLICY_VERBOSITY(x)   _B2( 1,x)  // Verbosity as shown below
+#define BGP_PERS_RASPOLICY_MINIMAL          BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_NORMAL           BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_VERBOSE          BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics
+#define BGP_PERS_RASPOLICY_EXTREME          BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1
+#define BGP_PERS_RASPOLICY_FATALEXIT      _BN( 2)   // Fatal is Fatal, so exit.
+
+#define BGP_PERS_RASPOLICY_DEFAULT        (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes
+
+#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes
+
+// Personality.NodeConfig Driver Enables and Configurations
+#define BGP_PERS_ENABLE_Simulation      _BN( 0)  // Running on VHDL Simulation
+#define BGP_PERS_ENABLE_LockBox         _BN( 1)
+#define BGP_PERS_ENABLE_BIC             _BN( 2)
+#define BGP_PERS_ENABLE_DDR             _BN( 3)  // DDR Controllers (not Fusion DDR model)
+#define BGP_PERS_ENABLE_LoopBack        _BN( 4)  // LoopBack: Internal TS/TR or SerDes Loopback
+#define BGP_PERS_ENABLE_GlobalInts      _BN( 5)
+#define BGP_PERS_ENABLE_Collective      _BN( 6)  // Enable Collective Network
+#define BGP_PERS_ENABLE_Torus           _BN( 7)
+#define BGP_PERS_ENABLE_TorusMeshX      _BN( 8)  // Torus is a Mesh in the X-dimension
+#define BGP_PERS_ENABLE_TorusMeshY      _BN( 9)  // Torus is a Mesh in the Y-dimension
+#define BGP_PERS_ENABLE_TorusMeshZ      _BN(10)  // Torus is a Mesh in the Z-dimension
+#define BGP_PERS_ENABLE_TreeA           _BN(11)  // Enable Collective Network A-link
+#define BGP_PERS_ENABLE_TreeB           _BN(12)  // Enable Collective Network B-link
+#define BGP_PERS_ENABLE_TreeC           _BN(13)  // Enable Collective Network C-link
+#define BGP_PERS_ENABLE_DMA             _BN(14)
+#define BGP_PERS_ENABLE_SerDes          _BN(15)
+#define BGP_PERS_ENABLE_UPC             _BN(16)
+#define BGP_PERS_ENABLE_EnvMon          _BN(17)
+#define BGP_PERS_ENABLE_Ethernet        _BN(18)
+#define BGP_PERS_ENABLE_JTagLoader      _BN(19)  // Converse with JTag Host to load kernel
+#define BGP_PERS_ENABLE_MailBoxReceive  BGP_PERS_ENABLE_JTagLoader
+#define BGP_PERS_ENABLE_PowerSave       _BN(20)  // Turn off unused devices (Eth on CN, TS on ION)
+#define BGP_PERS_ENABLE_FPU             _BN(21)  // Enable Double-Hummers (not supported in EventSim)
+#define BGP_PERS_ENABLE_StandAlone      _BN(22)  // Disable "CIOD" interface, Requires Collective!
+#define BGP_PERS_ENABLE_TLBMisses       _BN(23)  // TLB Misses vs Wasting Memory (see bgp_AppSetup.c)
+#define BGP_PERS_ENABLE_Mambo           _BN(24)  // Running under Mambo? Used by Linux
+#define BGP_PERS_ENABLE_TreeBlast       _BN(25)  // Enable Tree "Blast" mode
+#define BGP_PERS_ENABLE_BlindStacks     _BN(26)  // For "XB" Tests, Lock 16K Stacks in Blind Device
+#define BGP_PERS_ENABLE_CNK_Malloc      _BN(27)  // Enable Malloc Support in CNK.
+#define BGP_PERS_ENABLE_Reproducibility _BN(28)  // Enable Cycle Reproducibility
+#define BGP_PERS_ENABLE_HighThroughput  _BN(29)  // Enable high throughput computing mode
+#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30)  // Enable diagnostics mode
+
+// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back)
+//  This overrides most L1, L2, and Snoop settings. Carefull!!
+#define BGP_PERS_ENABLE_BGLMODE      _BN(31)  // (not yet fully implemented)
+
+// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave
+#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation  |\
+                                      BGP_PERS_ENABLE_LockBox     |\
+                                      BGP_PERS_ENABLE_BIC         |\
+                                      BGP_PERS_ENABLE_DDR         |\
+                                      BGP_PERS_ENABLE_LoopBack    |\
+                                      BGP_PERS_ENABLE_GlobalInts  |\
+                                      BGP_PERS_ENABLE_Collective  |\
+                                      BGP_PERS_ENABLE_Torus       |\
+                                      BGP_PERS_ENABLE_UPC         |\
+                                      BGP_PERS_ENABLE_EnvMon      |\
+                                      BGP_PERS_ENABLE_FPU         |\
+                                      BGP_PERS_ENABLE_StandAlone)
+
+// Default Setup for Hardware:
+//     Supports Stand-Alone CNA Applications.
+//     Bootloader-Extensions and XB's must turn-off JTagLoader
+#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader  |\
+                                                   BGP_PERS_ENABLE_LockBox     |\
+                                                   BGP_PERS_ENABLE_BIC         |\
+                                                   BGP_PERS_ENABLE_DDR         |\
+                                                   BGP_PERS_ENABLE_GlobalInts  |\
+                                                   BGP_PERS_ENABLE_Collective  |\
+                                                   BGP_PERS_ENABLE_SerDes      |\
+                                                   BGP_PERS_ENABLE_UPC         |\
+                                                   BGP_PERS_ENABLE_EnvMon      |\
+                                                   BGP_PERS_ENABLE_FPU         |\
+                                                   BGP_PERS_ENABLE_StandAlone)
+
+// these fields are defined by the control system depending on compute/io node
+//                                                   BGP_PERS_ENABLE_Torus       |
+//                                                   BGP_PERS_ENABLE_TorusMeshX  |
+//                                                   BGP_PERS_ENABLE_TorusMeshY  |
+//                                                   BGP_PERS_ENABLE_TorusMeshZ  |
+
+
+
+// Personality.L1Config: Controls and Settings for L1 Cache
+#define BGP_PERS_L1CONFIG_L1I          _BN( 0)    // L1 Enabled for Instructions
+#define BGP_PERS_L1CONFIG_L1D          _BN( 1)    // L1 Enabled for Data
+#define BGP_PERS_L1CONFIG_L1SWOA       _BN( 2)    // L1 Store WithOut Allocate
+#define BGP_PERS_L1CONFIG_L1Recovery   _BN( 3)    // L1 Full Recovery Mode
+#define BGP_PERS_L1CONFIG_L1WriteThru  _BN( 4)    // L1 Write-Thru (not svc_host changeable (yet?))
+#define BGP_PERS_L1CONFIG_DO_L1ITrans  _BN( 5)    // Enable L1 Instructions Transient?
+#define BGP_PERS_L1CONFIG_DO_L1DTrans  _BN( 6)    // Enable L1 Data         Transient?
+                                                   // unused 9bits: 7..15
+#define BGP_PERS_L1CONFIG_L1ITrans(x)  _B8(23,x)  // L1 Transient for Instructions in Groups of 16 Lines
+#define BGP_PERS_L1CONFIG_L1DTrans(x)  _B8(31,x)  // L1 Transient for Data         in Groups of 16 Lines
+
+#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I         |\
+                                    BGP_PERS_L1CONFIG_L1D         |\
+                                    BGP_PERS_L1CONFIG_L1SWOA      |\
+				    BGP_PERS_L1CONFIG_L1Recovery  |\
+                                    BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union TBGP_Pers_L1Cfg
+               {
+               uint32_t l1cfg;
+               struct {
+                      unsigned l1i         :  1;
+                      unsigned l1d         :  1;
+                      unsigned l1swoa      :  1;
+                      unsigned l1recovery  :  1;
+                      unsigned l1writethru :  1;
+                      unsigned do_l1itrans :  1;
+                      unsigned do_l1dtrans :  1;
+                      unsigned l1rsvd      :  9;
+                      unsigned l1itrans    :  8;
+                      unsigned l1dtrans    :  8;
+                      };
+               }
+               BGP_Pers_L1Cfg;
+
+// Personality.L2Config: Controls and Settings for L2 and Snoop
+#define BGP_PERS_L2CONFIG_L2I                _BN( 0)  // L2 Instruction Caching Enabled
+#define BGP_PERS_L2CONFIG_L2D                _BN( 1)  // L2 Data        Caching Enabled
+#define BGP_PERS_L2CONFIG_L2PF               _BN( 2)  // L2 Automatic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFO              _BN( 3)  // L2 Optimistic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFA              _BN( 4)  // L2 Aggressive Prefetching Enabled (fewer deeper streams)
+#define BGP_PERS_L2CONFIG_L2PFS              _BN( 5)  // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers)
+#define BGP_PERS_L2CONFIG_Snoop              _BN( 6)  // Just NULL Snoop Filter
+#define BGP_PERS_L2CONFIG_SnoopCache         _BN( 7)  // Snoop Caches
+#define BGP_PERS_L2CONFIG_SnoopStream        _BN( 8)  // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata)
+#define BGP_PERS_L2CONFIG_SnoopRange         _BN( 9)  // Snoop Range Filter when possible
+#define BGP_PERS_L2CONFIG_BUG824LUMPY        _BN(10)  // BPC_BUGS 824: Fix with Lumpy Performance
+#define BGP_PERS_L2CONFIG_BUG824SMOOTH       _BN(11)  // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory
+#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12)  // Special for Snoop diagnostics. See bgp_vmm.c
+                                              // additional bits may be used for Snoop setting tweaks
+
+// Default L2 Configuration:
+//   L2 Enabled with Multi-Stream Aggressive Prefetching
+//   Snoop Enabled with all filters except Range
+#define BGP_PERS_L2CONFIG_DEFAULT   (BGP_PERS_L2CONFIG_L2I        |\
+                                      BGP_PERS_L2CONFIG_L2D        |\
+                                      BGP_PERS_L2CONFIG_L2PF       |\
+                                      BGP_PERS_L2CONFIG_L2PFO      |\
+                                      BGP_PERS_L2CONFIG_L2PFS      |\
+                                      BGP_PERS_L2CONFIG_Snoop      |\
+                                      BGP_PERS_L2CONFIG_SnoopCache |\
+                                      BGP_PERS_L2CONFIG_SnoopStream|\
+                                      BGP_PERS_L2CONFIG_BUG824LUMPY)
+
+
+// Personality.L3Config: Controls and Settings for L3
+//   Note: Most bits match BGP_L3x_CTRL DCRs.
+//         See arch/include/bpcore/bgl_l3_dcr.h
+#define BGP_PERS_L3CONFIG_L3I        _BN( 0)    // L3 Enabled for Instructions
+#define BGP_PERS_L3CONFIG_L3D        _BN( 1)    // L3 Enabled for Data
+#define BGP_PERS_L3CONFIG_L3PFI      _BN( 2)    // Inhibit L3 Prefetch from DDR
+#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3)    // Set up Scratch?
+#define BGP_PERS_L3CONFIG_DO_PFD0    _BN( 4)    // Adjust PFD0?
+#define BGP_PERS_L3CONFIG_DO_PFD1    _BN( 5)    // Adjust PFD1?
+#define BGP_PERS_L3CONFIG_DO_PFDMA   _BN( 6)    // Adjust PFDMA?
+#define BGP_PERS_L3CONFIG_DO_PFQD    _BN( 7)    // Adjust PFQD?
+                                      // 8..15 unused/available
+#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x)  // Scratch 8ths: 0..8
+#define BGP_PERS_L3CONFIG_PFD0(x)    _B3(22,x)  // Prefetch Depth for DP0
+#define BGP_PERS_L3CONFIG_PFD1(x)    _B3(25,x)  // Prefetch Depth for DP1
+#define BGP_PERS_L3CONFIG_PFDMA(x)   _B3(28,x)  // Prefetch Depth for DMA
+#define BGP_PERS_L3CONFIG_PFQD(x)    _B3(31,x)  // Prefetch Queue Depth
+
+// General L3 Configuration
+typedef union TBGP_Pers_L3Cfg
+               {
+               uint32_t l3cfg;
+               struct {
+                      unsigned l3i        :  1;
+                      unsigned l3d        :  1;
+                      unsigned l3pfi      :  1;
+                      unsigned do_scratch :  1;
+                      unsigned do_pfd0    :  1;
+                      unsigned do_pfd1    :  1;
+                      unsigned do_pfdma   :  1;
+                      unsigned do_pfqd    :  1;
+                      unsigned rsvd       :  8;
+                      unsigned scratch    :  4;
+                      unsigned pfd0       :  3;
+                      unsigned pfd1       :  3;
+                      unsigned pfdma      :  3;
+                      unsigned pfqd       :  3;
+                      };
+               }
+               BGP_Pers_L3Cfg;
+
+// Default L3 Configuration:
+//   L3 Enabled for Instructions and Data
+//   No Prefetch Depth overrides, No Scratch, No Scrambling.
+#define BGP_PERS_L3CONFIG_DEFAULT    (BGP_PERS_L3CONFIG_L3I |\
+                                       BGP_PERS_L3CONFIG_L3D |\
+				       BGP_PERS_L3CONFIG_DO_PFDMA |\
+                                       BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users)
+#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0)   // Adjust Cache Select setting?
+#define BGP_PERS_L3SELECT_DO_BankSel  _BN( 1)   // Adjust Bank  Select setting?
+#define BGP_PERS_L3SELECT_Scramble    _BN( 2)   // L3 Scramble
+#define BGP_PERS_L3SELECT_PFby2       _BN( 3)   // Prefetch by 2 if set, else by 1 (default) if clear.
+#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26)
+#define BGP_PERS_L3SELECT_BankSel(x)  _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel.
+
+typedef union TBGP_Pers_L3Select
+               {
+               uint32_t l3select;
+               struct {
+                      unsigned do_CacheSel :  1;
+                      unsigned do_BankSel  :  1;
+                      unsigned l3Scramble  :  1;
+                      unsigned l3_PF_by2   :  1; // default is PreFetch by 1.
+                      unsigned CacheSel    :  5; // Physical Address Bit for L3 Selection (0..26)
+                      unsigned BankSel     :  5; // 0..26 Must be strictly greater than CacheSel.
+                      unsigned rsvd        : 18;
+                      };
+               }
+               BGP_Pers_L3Select;
+
+// Default L3 Selection Configuration: Disable overrides, but set h/w default values.
+#define BGP_PERS_L3SELECT_DEFAULT  (BGP_PERS_L3SELECT_CacheSel(21) |\
+                                     BGP_PERS_L3SELECT_BankSel(26))
+
+// Tracing Masks and default trace configuration
+#define BGP_TRACE_CONFIG    _BN( 0)   // Display Encoded personality config on startup
+#define BGP_TRACE_ENTRY     _BN( 1)   // Function enter and exit
+#define BGP_TRACE_INTS      _BN( 2)   // Standard Interrupt Dispatch
+#define BGP_TRACE_CINTS     _BN( 3)   // Critical Interrupt Dispatch
+#define BGP_TRACE_MCHK      _BN( 4)   // Machine Check Dispatch
+#define BGP_TRACE_SYSCALL   _BN( 5)   // System Calls
+#define BGP_TRACE_VMM       _BN( 6)   // Virtual Memory Manager
+#define BGP_TRACE_DEBUG     _BN( 7)   // Debug Events (app crashes etc)
+#define BGP_TRACE_TORUS     _BN( 8)   // Torus Init
+#define BGP_TRACE_TREE      _BN( 9)   // Tree  Init
+#define BGP_TRACE_GLOBINT   _BN(10)   // Global Interrupts
+#define BGP_TRACE_DMA       _BN(11)   // DMA Setup
+#define BGP_TRACE_SERDES    _BN(12)   // SerDes Init
+#define BGP_TRACE_TESTINT   _BN(13)   // Test Interface, ECID, Config
+#define BGP_TRACE_ETHTX     _BN(14)   // Ethernet Transmit
+#define BGP_TRACE_ETHRX     _BN(15)   // Ethernet Receive
+#define BGP_TRACE_POWER     _BN(16)   // Power Control
+#define BGP_TRACE_PROCESS   _BN(17)   // Process/Thread Mapping
+#define BGP_TRACE_EXIT_SUM  _BN(18)   // Report Per-Core Interrupt and Error Summary on exit()
+#define BGP_TRACE_SCHED     _BN(19)   // Report Scheduler Information
+#define BGP_TRACE_RAS       _BN(20)   // Report RAS Events (in addition to sending to Host)
+#define BGP_TRACE_ECID      _BN(21)   // Report UCI and ECID on boot
+#define BGP_TRACE_FUTEX     _BN(22)   // Trace Futex operations
+#define BGP_TRACE_MemAlloc  _BN(23)   // Trace MMAP and Shared Memory operations
+#define BGP_TRACE_WARNINGS  _BN(30)   // Trace Warnings
+#define BGP_TRACE_VERBOSE   _BN(31)   // Verbose Tracing Modifier
+
+// Enable tracking of Regression Suite coverage and report UCI+ECID on boot
+#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID)
+
+
+typedef struct BGP_Personality_Kernel_t
+                {
+                uint32_t  UniversalComponentIdentifier; // see include/common/bgp_ras.h
+
+                uint32_t  FreqMHz;                      // Clock_X1 Frequency in MegaHertz (eg 1000)
+
+                uint32_t  RASPolicy;                    // Verbosity level, and other RAS Reporting Controls
+
+                // Process Config:
+                //   Each byte represents a process (1 to 4 processes supported)
+                //     No core can be assigned to more than 1 process.
+                //     Cores assigned to no process are disabled.
+                //     Cores with in a process share the same address space.
+                //     Separate processes have distinct address spaces.
+                //   Within each process (0 to 4 cores assigned to a process):
+                //     Lower nibble is bitmask of which core belongs to that process.
+                //     Upper nibble is bitmask whether that thread is privileged or user.
+                //     Processes with zero cores do not exist.
+                //   E.g., for Diagnostics, we use 0xFF000000, which means
+                //     that all 4 cores run privileged in process 0.
+                uint32_t  ProcessConfig;
+
+                uint32_t  TraceConfig;        // Kernel Tracing Enables
+                uint32_t  NodeConfig;         // Kernel Driver Enables
+                uint32_t  L1Config;           // L1 Config and setup controls
+                uint32_t  L2Config;           // L2 and Snoop Config and setup controls
+                uint32_t  L3Config;           // L3 Config and setup controls
+                uint32_t  L3Select;           // L3 Cache and Bank Selection controls
+
+                uint32_t  SharedMemMB;        // Memory to Reserve for Sharing among Processes
+
+                uint32_t  ClockStop0;        // Upper 11Bits of ClockStop, enabled if Non-zero
+                uint32_t  ClockStop1;        // Lower 32Bits of ClockStop, enabled if Non-zero
+                }
+                BGP_Personality_Kernel_t;
+
+
+// Defaults for DDR Config
+#define BGP_PERS_DDR_PBX0_DEFAULT             (0x411D1512)    // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_PBX1_DEFAULT             (0x40000000)    // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_MemConfig0_DEFAULT       (0x81fc4080)    // MemConfig
+#define BGP_PERS_DDR_MemConfig1_DEFAULT       (0x0C0ff800)    // MemConfig
+#define BGP_PERS_DDR_ParmCtl0_DEFAULT         (0x3216c008)    // Parm Control
+#define BGP_PERS_DDR_ParmCtl1_DEFAULT         (0x4168c323)    // Parm Control
+#define BGP_PERS_DDR_MiscCtl0_DEFAULT         (0)    // Misc. Control
+#define BGP_PERS_DDR_MiscCtl1_DEFAULT         (0)    // Misc. Control
+#define BGP_PERS_DDR_CmdBufMode0_DEFAULT      (0x00400fdf)    // Command Buffer Mode
+#define BGP_PERS_DDR_CmdBufMode1_DEFAULT      (0xffc80600)    // Command Buffer Mode
+#define BGP_PERS_DDR_RefrInterval0_DEFAULT    (0xD1000002)    // Refresh Interval
+#define BGP_PERS_DDR_RefrInterval1_DEFAULT    (0x04000000)    // Refresh Interval
+#define BGP_PERS_DDR_ODTCtl0_DEFAULT          (0)    // ODT Control
+#define BGP_PERS_DDR_ODTCtl1_DEFAULT          (0)    // ODT Control
+#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64)    // Data Strobe Calibration
+#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805)    // Data Strobe Calibration
+#define BGP_PERS_DDR_DQSCtl_DEFAULT           (0x00000168)    // DQS Control
+#define BGP_PERS_DDR_Throttle_DEFAULT         (0)    // DDR Throttle
+//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT        (4096) // Total DDR size in MegaBytes (512MB - 16384MB).
+#define BGP_PERS_DDR_DDRSizeMB_DEFAULT        (1024) // Total DDR size in MegaBytes (512MB - 16384MB).
+//1#define BGP_PERS_DDR_Chips_DEFAULT            (0x0B) // Type of DDR chips
+#define BGP_PERS_DDR_Chips_DEFAULT            (0x09) // Type of DDR chips
+#define BGP_PERS_DDR_CAS_DEFAULT              (4)    // CAS Latency (3, 4, or 5)
+
+
+#define BGP_PERS_DDRFLAGS_ENABLE_Scrub        _BN(0) // Enable DDR Slow Scrub when 1
+
+// DDRFLAGS default: Enable Slow Scrub.
+#define BGP_PERS_DDRFLAGS_DEFAULT             (BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define BGP_PERS_SRBS0_DEFAULT                (0)
+#define BGP_PERS_SRBS1_DEFAULT                (0)
+
+typedef struct BGP_Personality_DDR_t
+                {
+                uint32_t  DDRFlags;         // Misc. Flags and Settings
+                uint32_t  SRBS0;            // Controller 0 SRBS/CK Settings
+                uint32_t  SRBS1;            // Controller 1 SRBS/CK Settings
+                uint32_t  PBX0;             // PBX DCRs setting (in IBM bit numbering)
+                uint32_t  PBX1;             // PBX DCRs setting (in IBM bit numbering)
+                uint32_t  MemConfig0;       // MemConfig
+                uint32_t  MemConfig1;       // MemConfig
+                uint32_t  ParmCtl0;         // Parm Control
+                uint32_t  ParmCtl1;         // Parm Control
+                uint32_t  MiscCtl0;         // Misc. Control
+                uint32_t  MiscCtl1;         // Misc. Control
+                uint32_t  CmdBufMode0;      // Command Buffer Mode
+                uint32_t  CmdBufMode1;      // Command Buffer Mode
+                uint32_t  RefrInterval0;    // Refresh Interval
+                uint32_t  RefrInterval1;    // Refresh Interval
+                uint32_t  ODTCtl0;          // ODT Control
+                uint32_t  ODTCtl1;          // ODT Control
+                uint32_t  DataStrobeCalib0; // Data Strobe Calibration
+                uint32_t  DataStrobeCalib1; // Data Strobe Calibration
+                uint32_t  DQSCtl;           // DQS Control
+                uint32_t  Throttle;         // DDR Throttle
+                uint16_t  DDRSizeMB;        // Total DDR size in MegaBytes (512MB - 16384MB).
+                uint8_t   Chips;            // Type of DDR chips
+                uint8_t   CAS;              // CAS Latency (3, 4, or 5)
+                }
+                BGP_Personality_DDR_t;
+
+
+typedef struct BGP_Personality_Networks_t
+                {
+                uint32_t  BlockID;         // a.k.a. PartitionID
+
+                uint8_t   Xnodes,
+                          Ynodes,
+                          Znodes,
+                          Xcoord,
+                          Ycoord,
+                          Zcoord;
+
+                // PSet Support
+                uint16_t  PSetNum;
+                uint32_t  PSetSize;
+                uint32_t  RankInPSet;
+
+                uint32_t  IOnodes;
+                uint32_t  Rank;               // Rank in Block (or Partition)
+                uint32_t  IOnodeRank;         // Rank (and therefore P2P Addr) of my I/O Node
+                uint16_t  TreeRoutes[ 16 ];
+                }
+                BGP_Personality_Networks_t;
+
+
+typedef struct BGP_IP_Addr_t
+                {
+                // IPv6 Addresses are 16 bytes, where the
+                //  lower 4 (indices 12-15) can be used for IPv4 address.
+                uint8_t octet[ 16 ];
+                }
+                BGP_IP_Addr_t;
+
+
+typedef struct BGP_Personality_Ethernet_t
+                {
+                uint16_t       MTU;            // Initial emac MTU size
+                uint8_t        EmacID[6];      // MAC address for emac
+                BGP_IP_Addr_t IPAddress;      // IPv6/IPv4 address of this node
+                BGP_IP_Addr_t IPNetmask;      // IPv6/IPv4 netmask
+                BGP_IP_Addr_t IPBroadcast;    // IPv6/IPv4 broadcast address
+                BGP_IP_Addr_t IPGateway;      // IPv6/IPv4 initial gateway (zero if none)
+                BGP_IP_Addr_t NFSServer;      // IPv6/IPv4 NFS system software server address
+                BGP_IP_Addr_t serviceNode;    // IPv6/IPv4 address of service node
+
+                // NFS mount info
+                char      NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR];
+                char      NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR];
+
+                // Security Key for Service Node authentication
+                uint8_t   SecurityKey[BGP_PERSONALITY_LEN_SECKEY ];
+                }
+                BGP_Personality_Ethernet_t;
+
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective	_BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus		_BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC	_BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x)		_B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x)		_B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full		0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly		1
+#define BGP_PERS_BLKCFG_CIOMode_None		2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3		0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4		1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+				 BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+typedef struct TBGP_Personality_t
+                {
+                uint16_t  CRC;
+                uint8_t   Version;
+                uint8_t   PersonalitySizeWords;
+
+                BGP_Personality_Kernel_t   Kernel_Config;
+
+                BGP_Personality_DDR_t      DDR_Config;
+
+                BGP_Personality_Networks_t Network_Config;
+
+                BGP_Personality_Ethernet_t Ethernet_Config;
+
+		uint8_t  Block_Config;
+		uint8_t  padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr)
+                                  // to simplify jtag operations. See issue #140.
+                }
+                BGP_Personality_t;
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION)
+//  This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+           0,                                              /* CRC                  */ \
+           BGP_PERSONALITY_VERSION,                       /* Version              */ \
+           (sizeof(BGP_Personality_t)/sizeof(uint32_t)),  /* PersonalitySizeWords */ \
+           {  /* BGP_Personality_Kernel_t: */ \
+              0,                                   /* MachineLocation        */ \
+              BGP_DEFAULT_FREQ,                   /* FreqMHz       */ \
+              BGP_PERS_RASPOLICY_DEFAULT,         /* RASPolicy     */ \
+              BGP_PERS_PROCESSCONFIG_DEFAULT,     /* ProcessConfig */ \
+              BGP_PERS_TRACE_DEFAULT,             /* TraceConfig   */ \
+              BGP_PERS_NODECONFIG_DEFAULT,        /* NodeConfig    */ \
+              BGP_PERS_L1CONFIG_DEFAULT,          /* L1Config      */ \
+              BGP_PERS_L2CONFIG_DEFAULT,          /* L2Config      */ \
+              BGP_PERS_L3CONFIG_DEFAULT,          /* L3Config      */ \
+              BGP_PERS_L3SELECT_DEFAULT,          /* L3Select      */ \
+              0,                                   /* SharedMemMB   */ \
+              0,                                   /* ClockStop0    */ \
+              0                                    /* ClockStop1    */ \
+              }, \
+           {  /* BGP_Personality_DDR_t: */ \
+              BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+              BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	      0,				   /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE)
+//  This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+           0,                                             /* CRC                  */ \
+           BGP_PERSONALITY_VERSION,                      /* Version              */ \
+           (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+           {  /* BGP_Personality_Kernel_t: */ \
+              0,                                          /* MachineLocation      */ \
+              BGP_DEFAULT_FREQ,                          /* FreqMHz       */ \
+              BGP_PERS_RASPOLICY_DEFAULT,                /* RASPolicy     */ \
+              BGP_PERS_PROCESSCONFIG_SMP,                /* ProcessConfig */ \
+              BGP_PERS_TRACE_DEFAULT,                    /* TraceConfig   */ \
+              BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE,  /* NodeConfig    */ \
+              BGP_PERS_L1CONFIG_DEFAULT,                 /* L1Config      */ \
+              BGP_PERS_L2CONFIG_DEFAULT,                 /* L2Config      */ \
+              BGP_PERS_L3CONFIG_DEFAULT,                 /* L3Config      */ \
+              BGP_PERS_L3SELECT_DEFAULT,                 /* L3Select      */ \
+              0,                                          /* SharedMemMB   */ \
+              0,                                          /* ClockStop0    */ \
+              0                                           /* ClockStop1    */ \
+              }, \
+           {  /* BGP_Personality_DDR_t: */ \
+              BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+              BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	      0,				   /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+
+
+#endif // Add nothing below this line.
diff --git a/arch/powerpc/include/asm/bluegene.h b/arch/powerpc/include/asm/bluegene.h
new file mode 100644
index 0000000..8462ea9
--- /dev/null
+++ b/arch/powerpc/include/asm/bluegene.h
@@ -0,0 +1,73 @@
+/*
+ * Blue Gene board definitions
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ *
+ * Copyright 2005, 2007, 2009  International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_BLUEGENE_H__
+#define __ASM_BLUEGENE_H__
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+void __init bgp_init_cns(void);
+void bgp_udbg_putc(char c);
+unsigned int bgp_get_irq(void);
+void bgp_send_ipi(int cpu, int msg);
+void bgp_init_IPI(int cpu, int msg);
+void __init bgp_init_IRQ(void);
+
+/* Interrupt encoding for Blue Gene/P hardware).
+ * Given a BIC group and bit index within the group,
+ * bic_hw_to_irq(group, gint) returns the Linux IRQ number.
+ */
+static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint)
+{
+	return ((group+1) << 5) | (gint & 0x1f);
+}
+
+
+/* Wrappers for CNS calls.
+ * Any pointers must point to locations that will not take TLB misses.
+ */
+int bluegene_testInboxAttention(void);
+int bluegene_testForOutboxCompletion(void);
+int bluegene_writeRASEvent_nonBlocking(unsigned facility,
+				       unsigned unit,
+				       unsigned short err_code,
+				       unsigned numDetails,
+				       unsigned details[]);
+int bluegene_writeRASString(unsigned facility,
+			    unsigned unit,
+			    unsigned short err_code,
+			    char* str);
+int bluegene_writeRASString_nonBlocking(unsigned facility,
+					unsigned unit,
+					unsigned short err_code,
+					char* str);
+int bluegene_writeToMailboxConsole(char *msg, unsigned msglen);
+int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen);
+unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize);
+
+int bluegene_macResetPHY(void);
+int bluegene_macTestRxLink(void);
+int bluegene_macTestTxLink(void);
+
+int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+int bluegene_getPersonality(void* buff, unsigned buffSize);
+
+int bluegene_isIONode(void);
+
+int bluegene_mapXEMAC(void* baseAddr);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/bluegene_ras.h b/arch/powerpc/include/asm/bluegene_ras.h
new file mode 100644
index 0000000..492ab6f
--- /dev/null
+++ b/arch/powerpc/include/asm/bluegene_ras.h
@@ -0,0 +1,108 @@
+/*
+ * Andrew Tauferner
+ * 
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __BLUEGENE_RAS_H__
+#define __BLUEGENE_RAS_H__
+
+
+typedef enum {
+        bg_comp_none =                0x00,
+        bg_comp_kernel =              0x01,
+        bg_comp_application =         0x02,
+        bg_comp_card =                0x03,
+        bg_comp_mc =                  0x04,
+        bg_comp_mcserver =            0x05,
+        bg_comp_mmcs =                0x06,
+        bg_comp_diags =               0x07,
+
+        bg_comp_max                   // always last
+} bg_ras_comp;
+
+
+typedef enum {
+        bg_subcomp_none =             0x00,
+        bg_subcomp_ppc450 =           0x01,
+        bg_subcomp_fpu =              0x02,
+        bg_subcomp_snoop =            0x03,
+        bg_subcomp_dp0 =              0x04,
+        bg_subcomp_dp1 =              0x05,
+        bg_subcomp_l2 =               0x06,
+        bg_subcomp_l3 =               0x07,
+        bg_subcomp_ddr =              0x08,
+        bg_subcomp_sram =             0x09,
+        bg_subcomp_dma =              0x0a,
+        bg_subcomp_testint =          0x0b,
+        bg_subcomp_testint_dcr =      0x0c,
+        bg_subcomp_lockbox =          0x0d,
+        bg_subcomp_plb =              0x0e,
+        bg_subcomp_collective =       0x0f,
+        bg_subcomp_torus =            0x10,
+        bg_subcomp_globint =          0x11,
+        bg_subcomp_serdes =           0x12,
+        bg_subcomp_upc =              0x13,
+        bg_subcomp_dcr =              0x14,
+        bg_subcomp_bic =              0x15,
+        bg_subcomp_devbus =           0x16,
+        bg_subcomp_netbus =           0x17,
+        bg_subcomp_envmon =           0x18,
+        bg_subcomp_tomal =            0x19,
+        bg_subcomp_xemac =            0x1a,
+        bg_subcomp_phy =              0x1b,
+        bg_subcomp_bootloader =       0x1c,
+        bg_subcomp_cnk =              0x1d,
+        bg_subcomp_ciod =             0x1e,
+        bg_subcomp_svc_host =         0x1f,
+        bg_subcomp_diagnostic =       0x20,
+        bg_subcomp_application =      0x21,
+        bg_subcomp_linux =            0x22,
+	bg_subcomp_cns = 	      0x23,
+	bg_subcomp_e10000 = 	      0x24,
+
+        bg_subcomp_max                // always last
+} bg_ras_subcomp;
+
+
+typedef enum {
+	bg_code_none =				0x00,
+	bg_code_halted = 			0x01,
+	bg_code_script_error =			0x02,
+	bg_code_boot_complete = 		0x03,
+	bg_code_panic =				0x04,
+	bg_code_oops = 				0x05,
+	bg_code_tty_alloc_failure = 		0x06,
+	bg_code_tty_reg_failure	=		0x07,
+	bg_code_mbox_thread_create_failure = 	0x08,
+	bg_code_sysrq_thread_create_failure =	0x09,
+	bg_code_oom =				0x0a,
+	bg_ras_max			// always last
+} bg_ras_code;
+
+
+/*
+ * bg_ras -- RAS data structure
+ */
+#define BG_RAS_DATA_MAX  216
+typedef struct {
+        unsigned short comp;
+        unsigned short subcomp;
+        unsigned short code;
+	unsigned short length;
+	unsigned char  data[BG_RAS_DATA_MAX];
+} bg_ras;
+
+
+#define BG_RAS_FILE "/proc/ras"
+#define BG_RAS_ASCII_FILE "/proc/ras_ascii"
+
+
+#endif   // __BLUEGENE_RAS_H__
+
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index cd46f02..cd523d3 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -281,7 +281,24 @@
  * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
  *   even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
  * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
+ *
+ * For BlueGene (450 processor), don't tell the app about the dcache line size
+ * because the 'dcbz' optimisation in glibc memset.S has to be emulated by the
+ * kernel trap handler and slows things down
  */
+#if defined(CONFIG_BLUEGENE)
+#define ARCH_DLINFO							\
+do {									\
+	/* Handle glibc compatibility. */				\
+	NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);			\
+	NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);			\
+  NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);      \
+	/* Cache size items */						\
+	NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize);			\
+	NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize);			\
+	VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base)	\
+} while (0)
+#else
 #define ARCH_DLINFO							\
 do {									\
 	/* Handle glibc compatibility. */				\
@@ -293,6 +310,7 @@
 	NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize);			\
 	VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base)	\
 } while (0)
+#endif
 
 /* PowerPC64 relocations defined by the ABIs */
 #define R_PPC64_NONE    R_PPC_NONE
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 27cc6fd..46a78c5 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -8,6 +8,9 @@
 
 #define PPC44x_MMUCR_TID	0x000000ff
 #define PPC44x_MMUCR_STS	0x00010000
+#define PPC44x_MMUCR_SWOA       0x01000000
+#define PPC44x_MMUCR_U1TE       0x00400000
+#define PPC44x_MMUCR_U2SWOAE    0x00200000
 
 #define	PPC44x_TLB_PAGEID	0
 #define	PPC44x_TLB_XLAT		1
@@ -25,6 +28,7 @@
 #define PPC44x_TLB_1M		0x00000050
 #define PPC44x_TLB_16M		0x00000070
 #define	PPC44x_TLB_256M		0x00000090
+#define PPC44x_TLB_1G           0x000000A0      /* Blue Gene */ 
 
 /* Translation fields */
 #define PPC44x_TLB_RPN_MASK	0xfffffc00      /* Real Page Number */
@@ -32,9 +36,15 @@
 
 /* Storage attribute and access control fields */
 #define PPC44x_TLB_ATTR_MASK	0x0000ff80
+#define PPC44x_TLB_IL1I         0x00080000      /* Inhibit L1 icache */
+#define PPC44x_TLB_IL1D         0x00040000      /* Inhibit L1 dcache */
+#define PPC44x_TLB_IL2I         0x00020000      /* Inhibit L2 icache */
+#define PPC44x_TLB_IL2D         0x00010000      /* Inhibit L2 dcache */
+#define PPC44x_TLB_WL1          0x00100000      /* Write-through L1 */
 #define PPC44x_TLB_U0		0x00008000      /* User 0 */
 #define PPC44x_TLB_U1		0x00004000      /* User 1 */
 #define PPC44x_TLB_U2		0x00002000      /* User 2 */
+#define PPC44x_TLB_SWOA         PPC44x_TLB_U2   /* SWOA when MMUCR U2SWOAE is enabled */
 #define PPC44x_TLB_U3		0x00001000      /* User 3 */
 #define PPC44x_TLB_W		0x00000800      /* Caching is write-through */
 #define PPC44x_TLB_I		0x00000400      /* Caching is inhibited */
@@ -66,12 +76,17 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#ifndef CONFIG_PPC_EARLY_DEBUG_44x
-#define PPC44x_EARLY_TLBS	1
+#ifdef CONFIG_BLUEGENE
+/* Bluegene maps firmware with an early TLB. */
+#define PPC44x_EARLY_TLBS	2  /* include kernel base TLB and CNS */
 #else
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
 #define PPC44x_EARLY_TLBS	2
 #define PPC44x_EARLY_DEBUG_VIRTADDR	(ASM_CONST(0xf0000000) \
 	| (ASM_CONST(CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW) & 0xffff))
+#else
+#define PPC44x_EARLY_TLBS	1
+#endif
 #endif
 
 /* Size of the TLBs used for pinning in lowmem */
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 1458d95..9256c1e 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -9,7 +9,8 @@
 
 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
 
-#ifdef CONFIG_NOT_COHERENT_CACHE
+/* For BGP, it is convenient for 'kmalloc' to come back with 32-byte aligned units for torus DMA */
+#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
 #define ARCH_KMALLOC_MINALIGN	L1_CACHE_BYTES
 #endif
 
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 1a0d628..471ab83 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -84,6 +84,9 @@
 #define STXVD2X(xs, ra, rb)	.long (0x7c000798 | VSX_XX1((xs), (ra), (rb)))
 #define LXVD2X(xs, ra, rb)	.long (0x7c000698 | VSX_XX1((xs), (ra), (rb)))
 
+#define LFPDX(frt,ra,rb)	.long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(462<<1)
+#define STFPDX(frt,ra,rb)	.long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(974<<1)
+
 #define SAVE_2GPRS(n, base)	SAVE_GPR(n, base); SAVE_GPR(n+1, base)
 #define SAVE_4GPRS(n, base)	SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
 #define SAVE_8GPRS(n, base)	SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
@@ -93,18 +96,26 @@
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
-#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
-#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
-#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
-#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
-#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
-#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
-#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
+#ifndef CONFIG_BGP
+/* Normal FPR save/restore. */
+#define SAVE_FPR(n, b, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, b, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#else
+/* Blue Gene "double-hummer" FPR save/restore. */
+#define SAVE_FPR(n,b,base)	li b,THREAD_FPR0+(16*(n)); STFPDX(n,base,b)
+#define REST_FPR(n,b,base)      li b,THREAD_FPR0+(16*(n)); LFPDX(n,base,b)
+#endif
+
+#define SAVE_2FPRS(n, b, base)	SAVE_FPR(n, b, base); SAVE_FPR(n+1, b, base)
+#define SAVE_4FPRS(n, b, base)	SAVE_2FPRS(n, b, base); SAVE_2FPRS(n+2, b, base)
+#define SAVE_8FPRS(n, b, base)	SAVE_4FPRS(n, b, base); SAVE_4FPRS(n+4, b, base)
+#define SAVE_16FPRS(n, b, base)	SAVE_8FPRS(n, b, base); SAVE_8FPRS(n+8, b, base)
+#define SAVE_32FPRS(n, b, base)	SAVE_16FPRS(n, b, base); SAVE_16FPRS(n+16, b, base)
+#define REST_2FPRS(n, b, base)	REST_FPR(n, b, base); REST_FPR(n+1, b, base)
+#define REST_4FPRS(n, b, base)	REST_2FPRS(n, b, base); REST_2FPRS(n+2, b, base)
+#define REST_8FPRS(n, b, base)	REST_4FPRS(n, b, base); REST_4FPRS(n+4, b, base)
+#define REST_16FPRS(n, b, base)	REST_8FPRS(n, b, base); REST_8FPRS(n+8, b, base)
+#define REST_32FPRS(n, b, base)	REST_16FPRS(n, b, base); REST_16FPRS(n+16, b, base)
 
 #define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,b,base
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index d346649..4dcd99a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -14,8 +14,15 @@
 
 #ifdef CONFIG_VSX
 #define TS_FPRWIDTH 2
+#define TS_FPRALIGN
+#else
+#ifdef CONFIG_BGP
+#define TS_FPRWIDTH 2
+#define TS_FPRALIGN  __attribute__((aligned(16)))
 #else
 #define TS_FPRWIDTH 1
+#define TS_FPRALIGN
+#endif
 #endif
 
 #ifndef __ASSEMBLY__
@@ -95,8 +102,12 @@
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
+#if defined(CONFIG_TASK_UNMAPPED_BASE)
+#define TASK_UNMAPPED_BASE	(CONFIG_TASK_UNMAPPED_BASE)
+#else
 #define TASK_UNMAPPED_BASE	(TASK_SIZE / 8 * 3)
 #endif
+#endif
 
 #ifdef CONFIG_PPC64
 /* 64-bit user address space is 44-bits (16TB user VM) */
@@ -166,7 +177,7 @@
 	unsigned long	dbcr1;
 #endif
 	/* FP and VSX 0-31 register set */
-	double		fpr[32][TS_FPRWIDTH];
+	double		fpr[32][TS_FPRWIDTH] TS_FPRALIGN;
 	struct {
 
 		unsigned int pad;
@@ -309,7 +320,7 @@
 
 #define spin_lock_prefetch(x)	prefetchw(x)
 
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_BGP)
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 #endif
 
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e07d0c7..f15a8a7 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -344,8 +344,26 @@
 
 #ifdef __KERNEL__
 
+#ifdef CONFIG_BLUEGENE
+
+/* See also arch/powerpc/kernel/systbl.S */
+
+#ifdef CONFIG_ZEPTO
+#define __NR_zepto_generic      1048  /* ni_syscall is filled until 1047 */
+#define __NR_zepto_bigmem       1049
+#define __NR_zepto_lockbox      1050
+#define __NR_zepto_dma          1051
+#define __NR_syscalls           1052
+#else
+#define __NR_syscalls           1048
+#endif // CONFIG_ZEPTO
+
+#else
+
 #define __NR_syscalls		319
 
+#endif
+
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
 
diff --git a/arch/powerpc/include/asm/zepto_tlb.h b/arch/powerpc/include/asm/zepto_tlb.h
new file mode 100644
index 0000000..05dac4a
--- /dev/null
+++ b/arch/powerpc/include/asm/zepto_tlb.h
@@ -0,0 +1,90 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZEPTO_TLB_PARTITION_H_DEFINED__
+#define __ZEPTO_TLB_PARTITION_H_DEFINED__
+
+/* 
+   Relevant function and file:
+
+   _tlbil_all()          @ arch/powerpc/mm/tlb_nohash_low.S
+   DataTLBError()        @ arch/powerpc/kernel/head_44x.S
+   InstructionTLBError() @ arch/powerpc/kernel/head_44x.S
+
+   init_bigmem_pa()      @ arch/powerpc/mm/zepto_bigmem.c
+   install_bigmem_tlb()  @ arch/powerpc/mm/zepto_bigmem.c
+
+
+   NOTE:
+   tlb_44x_index keeps track of next available slot, which is 
+   defined in  arch/powerpc/mm/44x_mmu.c
+
+   CONFIG_ZEPTO_LOCKBOX_UPC_TLB installs 3 TLBs
+   slot
+   0   lockbox super
+   1   lockbox user
+   2   UPC
+
+   CONFIG_ZEPTO_TREE_TORUS_TLB installs LOCKBOX_UPC_TLB + 3 more TLBs
+   3   tree0 (CIO)
+   4   tree1 (MPI)
+   5   DMA
+
+   CONFIG_ZEPTO_COMPUTENODE depends on ZEPTO_TREE_TORUS_TLB (and ZEPTO_LOCKBOX_UPC_TLB)
+   NOTE: it does not depend on ZEPTO_MEMORY
+*/
+
+#ifdef CONFIG_ZEPTO_TREE_TORUS_TLB
+
+#define TLB_SLOT_AFTERDEV       6
+
+#else
+
+#ifdef  CONFIG_ZEPTO_LOCKBOX_UPC_TLB
+#define TLB_SLOT_AFTERDEV       3
+#else 
+#define TLB_SLOT_AFTERDEV       0
+#endif
+
+#endif
+
+
+#ifdef CONFIG_ZEPTO_MEMORY
+
+#define BIGMEM_TLB_START_SLOT       (TLB_SLOT_AFTERDEV)
+#define BIGMEM_TLB_END_SLOT         (BIGMEM_TLB_START_SLOT+8-1)   /* tentative: max 7 256MB TLBs + 16MB shm */
+#define BIGMEM_N_TLBS               (BIGMEM_TLB_END_SLOT-BIGMEM_TLB_START_SLOT+1)
+
+#define REGULAR_TLB_START_SLOT      (BIGMEM_TLB_END_SLOT+1)
+
+#else 
+
+#define REGULAR_TLB_START_SLOT      (TLB_SLOT_AFTERDEV)
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/bpcore/bgp_dma_memmap.h b/arch/powerpc/include/bpcore/bgp_dma_memmap.h
new file mode 100644
index 0000000..9669081
--- /dev/null
+++ b/arch/powerpc/include/bpcore/bgp_dma_memmap.h
@@ -0,0 +1,208 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _BGP_DMA_MEMMAP_H_
+#define _BGP_DMA_MEMMAP_H_
+
+#define _BGP_DMA_NUM_INJ_FIFO_GROUPS		   4
+#define _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP	   32
+#define _BGP_DMA_NUM_INJ_FIFOS			   (_BGP_DMA_NUM_INJ_FIFO_GROUPS * _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP)
+
+#define _BGP_DMA_NUM_REC_FIFO_GROUPS		   4
+#define _BGP_DMA_NUM_REC_FIFOS_PER_GROUP	   9
+#define _BGP_DMA_NUM_REC_FIFOS			   (_BGP_DMA_NUM_REC_FIFO_GROUPS * _BGP_DMA_NUM_REC_FIFOS_PER_GROUP)
+
+/*   size = end - start - BGP_FIFO_SAFETY_MARGIN  */
+/*   so you can disinguish between full and empty, in 16 byte units */
+#define _BGP_DMA_FIFO_SAFETY_MARGIN		   1
+#define _BGP_DMA_QUADS_PER_PACKET		   16
+
+#define _BGP_DMA_NUM_COUNTER_GROUPS		   4
+#define _BGP_DMA_NUM_COUNTERS_PER_GROUP		   64
+#define _BGP_DMA_NUM_COUNTERS			   (_BGP_DMA_NUM_COUNTER_GROUPS * _BGP_DMA_NUM_COUNTERS_PER_GROUP)
+
+/*   these are the lower 12 bits */
+/* #define  _BGP_DMA_GROUP_A(g)                    ((g)*0x1000) */
+
+/*  ------------------------------------------------ */
+/*     Macros defining absolute virtual address */
+/*  ------------------------------------------------ */
+#define _BGP_VA_DMA_GROUP_A(g)			   (_BGP_VA_DMA + ((g)*0x1000))
+
+/*  offset start of iDMA */
+#define _BGP_VA_iDMA_GROUP_START(g)		   (_BGP_VA_DMA_GROUP_A(g) + 0x0 )
+
+/*  repeated 32 times i=0 to 31 */
+#define _BGP_VA_iDMA_START(g,i)                    (_BGP_VA_DMA_GROUP_A(g) + ((i)*0x0010) )
+#define _BGP_VA_iDMA_END(g,i)                      (_BGP_VA_DMA_GROUP_A(g) + (0x0004+(i)*0x0010) )
+#define _BGP_VA_iDMA_HEAD(g,i)                     (_BGP_VA_DMA_GROUP_A(g) + (0x0008+(i)*0x0010) )
+#define _BGP_VA_iDMA_TAIL(g,i)                     (_BGP_VA_DMA_GROUP_A(g) + (0x000C+(i)*0x0010) )
+#define _BGP_VA_iDMA_NOT_EMPTY(g)                  (_BGP_VA_DMA_GROUP_A(g) + 0x0200)
+						    /* HOLE:	   ( _BGP_VA_DMA_GROUP_A(g)+0x0204)    */
+#define _BGP_VA_iDMA_AVAILABLE(g)                  (_BGP_VA_DMA_GROUP_A(g) + 0x0208)
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x020C) */
+#define _BGP_VA_iDMA_THRESHOLD_CROSSED(g)          (_BGP_VA_DMA_GROUP_A(g) + 0x0210)
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0214) */
+#define _BGP_VA_iDMA_CLEAR_THRESHOLD_CROSSED(g)    (_BGP_VA_DMA_GROUP_A(g) + 0x0218)
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x021C)  */
+#define _BGP_VA_iDMA_ACTIVATED(g)                  (_BGP_VA_DMA_GROUP_A(g) + 0x220)
+#define _BGP_VA_iDMA_ACTIVATE(g)                   (_BGP_VA_DMA_GROUP_A(g) + 0x224)
+#define _BGP_VA_iDMA_DEACTIVATE(g)                 (_BGP_VA_DMA_GROUP_A(g) + 0x228)
+						    /* HOLE:	   ( _BGP_VA_DMA_GROUP_A(g)+0x022C) to ( _BGP_VA_DMA_GROUP_A(g)+0x02FF) */
+/*  repeated twice, i=0 to 1 */
+#define _BGP_VA_iDMA_COUNTER_ENABLED(g,i)          (_BGP_VA_DMA_GROUP_A(g) + (0x0300 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_ENABLE(g,i)           (_BGP_VA_DMA_GROUP_A(g) + (0x0308 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_DISABLE(g,i)          (_BGP_VA_DMA_GROUP_A(g) + (0x0310 +(i)*0x0004) )
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0318) to ( _BGP_VA_DMA_GROUP_A(g)+0x031C) */
+/*  repeated twice, i=0 to 1 */
+#define _BGP_VA_iDMA_COUNTER_HIT_ZERO(g,i)         (_BGP_VA_DMA_GROUP_A(g) + (0x0320 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_CLEAR_HIT_ZERO(g,i)	(_BGP_VA_DMA_GROUP_A(g) + (0x0328 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_GRP_STATUS(g)         (_BGP_VA_DMA_GROUP_A(g) + 0x0330)
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0334) to ( _BGP_VA_DMA_GROUP_A(g)+0x03FC) */
+/*  repeated 64 times  i=0 to 63 */
+#define _BGP_VA_iDMA_COUNTER(g,i)                  ( _BGP_VA_DMA_GROUP_A(g) + (0x0400 +(i)*0x0010) )
+#define _BGP_VA_iDMA_COUNTER_INCREMENT(g,i)        ( _BGP_VA_DMA_GROUP_A(g) + (0x0404 +(i)*0x0010) )
+#define _BGP_VA_iDMA_COUNTER_BASE(g,i)             ( _BGP_VA_DMA_GROUP_A(g) + (0x0408 +(i)*0x0010) )
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x040C) to ( _BGP_VA_DMA_GROUP_A(g)+0x07FC) */
+
+/*  offset start of rDMA  */
+#define  _BGP_VA_rDMA_GROUP_START(g)               ( _BGP_VA_DMA_GROUP_A(g) + 0x0800 )
+
+/*  repeated 8 times  i=0 to 7 */
+#define _BGP_VA_rDMA_START(g,i)                    ( _BGP_VA_DMA_GROUP_A(g) + (0x0800 + (i)*0x0010) )
+#define _BGP_VA_rDMA_END(g,i)                      ( _BGP_VA_DMA_GROUP_A(g) + (0x0804 + (i)*0x0010) )
+#define _BGP_VA_rDMA_HEAD(g,i)                     ( _BGP_VA_DMA_GROUP_A(g) + (0x0808 + (i)*0x0010) )
+#define _BGP_VA_rDMA_TAIL(g,i)                     ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) )
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0890) to ( _BGP_VA_DMA_GROUP_A(g)+0x08FC) */
+/*  repeated 16 times, 0 to 15 */
+/*   below addresses have storage backing them, but are not used by the DMA */
+#define _BGP_NUM_rDMA_UNUSED                       16
+#define _BGP_VA_rDMA_UNUSED(g,i)                   ( _BGP_VA_DMA_GROUP_A(g) + (0x0900 + (i)*0x0004) )
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0940) to ( _BGP_VA_DMA_GROUP_A(g)+0x09FC) */
+
+#define _BGP_VA_rDMA_TAIL(g,i)                     ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) )
+/* / repeated 2 times  i=0 to 1 */
+#define _BGP_VA_rDMA_NOT_EMPTY(g,i)                ( _BGP_VA_DMA_GROUP_A(g) + (0x0A00 + (i)*0x0004) )
+#define _BGP_VA_rDMA_AVAILABLE(g,i)                ( _BGP_VA_DMA_GROUP_A(g) + (0x0A08 + (i)*0x0004) )
+#define _BGP_VA_rDMA_THRESHOLD_CROSSED(g,i)        ( _BGP_VA_DMA_GROUP_A(g) + (0x0A10 + (i)*0x0004) )
+#define _BGP_VA_rDMA_CLEAR_THRESHOLD_CROSSED(g,i)  ( _BGP_VA_DMA_GROUP_A(g) + (0x0A18 + (i)*0x0004) )
+						    /* HOLE:         ( _BGP_DMA_GROUP_A(g)+0x0A1C) to ( _BGP_VA_DMA_GROUP_A(g)+0x0AFC) */
+/*  repeat 2 times, i=0 to 1 */
+#define _BGP_VA_rDMA_COUNTER_ENABLED(g,i)          ( _BGP_VA_DMA_GROUP_A(g) + (0x0B00 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_ENABLE(g,i)           ( _BGP_VA_DMA_GROUP_A(g) + (0x0B08 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_DISABLE(g,i)          ( _BGP_VA_DMA_GROUP_A(g) + (0x0B10 + (i)*0x0004) )
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0B18) to ( _BGP_VA_DMA_GROUP_A(g)+0x0B1C) */
+/*  repeat 2 times, i=0 to 1 */
+#define _BGP_VA_rDMA_COUNTER_HIT_ZERO(g,i)         ( _BGP_VA_DMA_GROUP_A(g) + (0x0B20 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_CLEAR_HIT_ZERO(g,i)   ( _BGP_VA_DMA_GROUP_A(g) + (0x0B28 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_GRP_STATUS(g)         ( _BGP_VA_DMA_GROUP_A(g) + 0x0B30)
+						    /* HOLE:         ( _BGP_VA_DMA_GROUP_A(g)+0x0B34) to ( _BGP_VA_DMA_GROUP_A(g)+0x0BFC) */
+/*  repeat 64 times, i=0 to 63 */
+#define _BGP_VA_rDMA_COUNTER(g,i)                  ( _BGP_VA_DMA_GROUP_A(g) + (0x0C00 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_INCREMENT(g,i)        ( _BGP_VA_DMA_GROUP_A(g) + (0x0C04 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_BASE(g,i)             ( _BGP_VA_DMA_GROUP_A(g) + (0x0C08 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_MAX(g,i)              ( _BGP_VA_DMA_GROUP_A(g) + (0x0C0C + (i)*0x0010) )
+
+
+
+/*  --------------------------------------- */
+/*     Macros defining address offset  */
+/*  --------------------------------------- */
+
+
+/*   these are the lower 12 bits */
+#define  _BGP_DMA_GROUP_A_OFFSET(g)                    ((g)*0x1000)
+
+/*  ---------------------- */
+/*  offset start of iDMA */
+/*  ---------------------- */
+#define  _BGP_iDMA_GROUP_START_OFFSET(g)               ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0 )
+
+/*  repeated 32 times i=0 to 31 */
+#define _BGP_iDMA_START_OFFSET(g,i)                    ( _BGP_DMA_GROUP_A_OFFSET(g)+(i)*0x0010)
+#define _BGP_iDMA_END_OFFSET(g,i)                      ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0004+(i)*0x0010)
+#define _BGP_iDMA_HEAD_OFFSET(g,i)                     ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0008+(i)*0x0010)
+#define _BGP_iDMA_TAIL_OFFSET(g,i)                     ( _BGP_DMA_GROUP_A_OFFSET(g)+0x000C+(i)*0x0010)
+#define _BGP_iDMA_NOT_EMPTY_OFFSET(g)                  ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0200)
+							 /* HOLE     ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0204)    */
+#define _BGP_iDMA_AVAILABLE_OFFSET(g)                  ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0208)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x020C) */
+#define _BGP_iDMA_THRESHOLD_CROSSED_OFFSET(g)          ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0210)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0214) */
+#define _BGP_iDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g)    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0218)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x021C)  */
+#define _BGP_iDMA_ACTIVATED_OFFSET(g)                  ( _BGP_DMA_GROUP_A_OFFSET(g)+0x220)
+#define _BGP_iDMA_ACTIVATE_OFFSET(g)                   ( _BGP_DMA_GROUP_A_OFFSET(g)+0x224)
+#define _BGP_iDMA_DEACTIVATE_OFFSET(g)                 ( _BGP_DMA_GROUP_A_OFFSET(g)+0x228)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x022C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x02FF) */
+/*  repeated twice, i=0 to 1 */
+#define _BGP_iDMA_COUNTER_ENABLED_OFFSET(g,i)          ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0300 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_ENABLE_OFFSET(g,i)           ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0308 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_DISABLE_OFFSET(g,i)          ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0310 +(i)*0x0004)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0318) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x031C) */
+/*  repeated twice, i=0 to 1 */
+#define _BGP_iDMA_COUNTER_HIT_ZERO_OFFSET(g,i)         ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0320 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i)   ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0328 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_GRP_STATUS_OFFSET(g)         ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0330)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0334) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x03FC) */
+/*  repeated 64 times  i=0 to 63 */
+#define _BGP_iDMA_COUNTER_OFFSET(g,i)                  ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0400 +(i)*0x0010)
+#define _BGP_iDMA_COUNTER_INCREMENT_OFFSET(g,i)        ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0404 +(i)*0x0010)
+#define _BGP_iDMA_COUNTER_BASE_OFFSET(g,i)             ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0408 +(i)*0x0010)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x040C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x07FC) */
+
+
+/* ----------------------- */
+/*  offset start of rDMA  */
+/* ----------------------- */
+#define  _BGP_rDMA_GROUP_START_OFFSET(g)               ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 )
+
+/*  repeated 8 times  i=0 to 7 */
+#define _BGP_rDMA_START_OFFSET(g,i)                    ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 + (i)*0x0010)
+#define _BGP_rDMA_END_OFFSET(g,i)                      ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0804 + (i)*0x0010)
+#define _BGP_rDMA_HEAD_OFFSET(g,i)                     ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0808 + (i)*0x0010)
+#define _BGP_rDMA_TAIL_OFFSET(g,i)                     ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x080C + (i)*0x0010)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0890) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x09FC) */
+/* / repeated 2 times  i=0 to 1 */
+#define _BGP_rDMA_NOT_EMPTY_OFFSET(g,i)                ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A00 + (i)*0x0004)
+#define _BGP_rDMA_AVAILABLE_OFFSET(g,i)                ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A08 + (i)*0x0004)
+#define _BGP_rDMA_THRESHOLD_CROSSED_OFFSET(g,i)        ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A10 + (i)*0x0004)
+#define _BGP_rDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g,i)  ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A18 + (i)*0x0004)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A1C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0AFC) */
+/*  repeat 2 times, i=0 to 1 */
+#define _BGP_rDMA_COUNTER_ENABLED_OFFSET(g,i)          ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B00 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_ENABLE_OFFSET(g,i)           ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B08 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_DISABLE_OFFSET(g,i)          ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B10 + (i)*0x0004)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B18) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B1C) */
+/*  repeat 2 times, i=0 to 1 */
+#define _BGP_rDMA_COUNTER_HIT_ZERO_OFFSET(g,i)         ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B20 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i)   ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B28 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_GRP_STATUS_OFFSET(g)         ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B30)
+							 /* HOLE:    ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B34) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0BFC) */
+/*  repeat 64 times, i=0 to 63 */
+#define _BGP_rDMA_COUNTER_OFFSET(g,i)                  ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C00 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_INCREMENT_OFFSET(g,i)        ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C04 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_BASE_OFFSET(g,i)             ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C08 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_MAX_OFFSET(g,i)              ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C0C + (i)*0x0010)
+
+#endif
+
+
+
diff --git a/arch/powerpc/include/bpcore/bgp_types.h b/arch/powerpc/include/bpcore/bgp_types.h
new file mode 100644
index 0000000..d940af7
--- /dev/null
+++ b/arch/powerpc/include/bpcore/bgp_types.h
@@ -0,0 +1,72 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file bpcore/bgp_types.h
+ */
+
+#ifndef _BGP_TYPES_H_   /*  Prevent multiple inclusion. */
+#define _BGP_TYPES_H_
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+
+#if !defined(__ASSEMBLY__) && !defined(__BGP_HIDE_STANDARD_TYPES__)
+
+#include <common/alignment.h>
+
+#ifdef _AIX
+#include <inttypes.h>
+#elif ! defined(__LINUX_KERNEL__)
+#include <stdint.h>
+#include <sys/types.h>
+#else
+#include <linux/types.h>
+#endif
+
+
+typedef  int8_t  _bgp_i8_t;
+typedef uint8_t  _bgp_u8_t;
+typedef  int16_t _bgp_i16_t;
+typedef uint16_t _bgp_u16_t;
+typedef  int32_t _bgp_i32_t;
+typedef uint32_t _bgp_u32_t;
+typedef  int64_t _bgp_i64_t;
+typedef uint64_t _bgp_u64_t;
+
+typedef union T_BGP_QuadWord
+               {
+               uint8_t   ub[ 16];
+               uint16_t  us[  8];
+               uint32_t  ul[  4];
+               uint64_t ull[ 2];
+               float      f[   4];
+               double     d[   2];
+               }
+               ALIGN_QUADWORD _bgp_QuadWord_t;
+
+typedef _bgp_QuadWord_t _QuadWord_t;
+
+#endif  /*  !__ASSEMBLY__ && !__BGP_HIDE_STANDARD_TYPES__ */
+
+__END_DECLS
+
+#endif  /*  Add nothing below this line. */
+
diff --git a/arch/powerpc/include/bpcore/ic_memmap.h b/arch/powerpc/include/bpcore/ic_memmap.h
new file mode 100644
index 0000000..5ff376f
--- /dev/null
+++ b/arch/powerpc/include/bpcore/ic_memmap.h
@@ -0,0 +1,803 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file bpcore/ic_memmap.h
+ */
+
+
+
+/**
+ * BGP Interrupt Controller Register mapping and bit definition.
+ *
+ * Note: preliminary register assignment.
+ */
+
+
+/* ************************************************************************* */
+/*      Architected BGP Interrupt Controller Registers                       */
+/* ************************************************************************* */
+/* Authors: Jose R. Brunheroto, Martin Ohmacht                               */
+/* Reflects the contents of the document http://w3vlsi.watson.ibm.com//      */
+/*                                                                           */
+/* ************************************************************************* */
+
+
+
+/*
+
+     BIC CRIT hierarchy register
+     +------------------------------------+
+     |0 1 2 3 4 5 6 7 8 9          ... 31 |
+     +------------------------------------+
+      | | | | | | | | | |
+      | | | | | | |                                    BIC UNIT 6
+      | | | | | | |                                  +-----+
+      | | | | | | +--------------------------------  |0-31 | -
+      | | | | | |                                    +-----+
+      | | | | | |                                      BIC UNIT 5
+      | | | | | |                                    +-----+
+      | | | | | +----------------------------------  |0-31 | -.
+      | | | | |                                      +-----+
+      | | | | |                                        BIC GROUP 4
+      | | | | |                                      +-----+
+      | | | | +------------------------------------  |0-31 | -
+      | | | |                                        +-----+
+      | | | |                                          BIC GROUP3
+      | | | |                                        +-----+
+      | | | +--------------------------------------  |0-31 | -
+      | | |                                          +-----+
+      | | |                                            BIC GROUP 2
+      | | |                                          +-----+
+      | | +----------------------------------------  |0-31 | -
+      | |                                            +-----+
+      | |                                              BIC GROUP 1
+      | |                                            +-----+
+      | +------------------------------------------  |0-31 | -
+      |                                              +-----+
+      |                                                BIC GROUP 0
+      |                                              +-----+
+      +--------------------------------------------  |0-31 | -
+                                                     +-----+
+
+
+     BIC NCRIT hierarchy register
+     +------------------------------------+
+     |0 1 2 3 4 5 6 7 8 9          ... 31 |
+     +------------------------------------+
+      | | | | | | | | | |
+      | | | | | | |                                    BIC UNIT 6
+      | | | | | | |                                  +-----+
+      | | | | | | +--------------------------------  |0-31 | -
+      | | | | | |                                    +-----+
+      | | | | | |                                      BIC UNIT 5
+      | | | | | |                                    +-----+
+      | | | | | +----------------------------------  |0-31 | -.
+      | | | | |                                      +-----+
+      | | | | |                                        BIC GROUP 4
+      | | | | |                                      +-----+
+      | | | | +------------------------------------  |0-31 | -
+      | | | |                                        +-----+
+      | | | |                                          BIC GROUP3
+      | | | |                                        +-----+
+      | | | +--------------------------------------  |0-31 | -
+      | | |                                          +-----+
+      | | |                                            BIC GROUP 2
+      | | |                                          +-----+
+      | | +----------------------------------------  |0-31 | -
+      | |                                            +-----+
+      | |                                              BIC GROUP 1
+      | |                                            +-----+
+      | +------------------------------------------  |0-31 | -
+      |                                              +-----+
+      |                                                BIC GROUP 0
+      |                                              +-----+
+      +--------------------------------------------  |0-31 | -
+                                                     +-----+
+
+
+     BIC MCCU hierarchy register
+     +------------------------------------+
+     |0 1 2 3 4 5 6 7 8 9          ... 31 |
+     +------------------------------------+
+      | | | | | | | | | |
+      | | | | | | |                                    BIC UNIT 6
+      | | | | | | |                                  +-----+
+      | | | | | | +--------------------------------  |0-31 | -
+      | | | | | |                                    +-----+
+      | | | | | |                                      BIC UNIT 5
+      | | | | | |                                    +-----+
+      | | | | | +----------------------------------  |0-31 | -.
+      | | | | |                                      +-----+
+      | | | | |                                        BIC GROUP 4
+      | | | | |                                      +-----+
+      | | | | +------------------------------------  |0-31 | -
+      | | | |                                        +-----+
+      | | | |                                          BIC GROUP3
+      | | | |                                        +-----+
+      | | | +--------------------------------------  |0-31 | -
+      | | |                                          +-----+
+      | | |                                            BIC GROUP 2
+      | | |                                          +-----+
+      | | +----------------------------------------  |0-31 | -
+      | |                                            +-----+
+      | |                                              BIC GROUP 1
+      | |                                            +-----+
+      | +------------------------------------------  |0-31 | -
+      |                                              +-----+
+      |                                                BIC GROUP 0
+      |                                              +-----+
+      +--------------------------------------------  |0-31 | -
+                                                     +-----+
+
+*/
+
+
+#ifndef _IC_MEMMAP_H_                     /*  Prevent multiple inclusion */
+#define _IC_MEMMAP_H_
+
+
+
+#define _BGP_IC_NUMBER_OF_GROUPS     (10)        /*  number of groups (0..9 inclusive) */
+
+
+
+#define _BGP_IC_TARGET_DISABLED      0x00        /*  disabled */
+#define _BGP_IC_TARGET_NCRIT_BCAST   0x01        /*  non-critical broadcast */
+#define _BGP_IC_TARGET_CRIT_BCAST    0x02        /*  critical broadcast */
+#define _BGP_IC_TARGET_MCHK_BCAST    0x03        /*  machine check */
+
+#define _BGP_IC_TARGET_NCRIT_CORE0   0x04        /*  non-critical core 0 */
+#define _BGP_IC_TARGET_NCRIT_CORE1   0x05        /*  non-critical core 1 */
+#define _BGP_IC_TARGET_NCRIT_CORE2   0x06        /*  non-critical core 2 */
+#define _BGP_IC_TARGET_NCRIT_CORE3   0x07        /*  non-critical core 3 */
+
+#define _BGP_IC_TARGET_CRIT_CORE0    0x08        /*  critical core 0 */
+#define _BGP_IC_TARGET_CRIT_CORE1    0x09        /*  critical core 1 */
+#define _BGP_IC_TARGET_CRIT_CORE2    0x0A        /*  critical core 2 */
+#define _BGP_IC_TARGET_CRIT_CORE3    0x0B        /*  critical core 3 */
+
+#define _BGP_IC_TARGET_MCHK_CORE0    0x0C        /*  machine check core 0 */
+#define _BGP_IC_TARGET_MCHK_CORE1    0x0D        /*  machine check core 1 */
+#define _BGP_IC_TARGET_MCHK_CORE2    0x0E        /*  machine check core 2 */
+#define _BGP_IC_TARGET_MCHK_CORE3    0x0F        /*  machine check core 3 */
+
+
+typedef struct _BGP_IC_Group_t
+{
+    volatile unsigned int status;                        /*  status (read and write) */
+    volatile unsigned int rd_clr_status;                 /*  status (read and clear) */
+    volatile unsigned int status_clr;                    /*  status (write and clear) */
+    volatile unsigned int status_set;                    /*  status (write and set) */
+
+    volatile unsigned int target_irq0_7;                 /*  target selector (IRQ 0:7) */
+    volatile unsigned int target_irq8_15;                /*  target selector (IRQ 8:15) */
+    volatile unsigned int target_irq16_23;               /*  target selector (IRQ 16:23) */
+    volatile unsigned int target_irq24_31;               /*  target selector (IRQ 24:31) */
+
+    union {
+          volatile unsigned int ncrit_masked_irq[ 4 ];        /*  array for easier access */
+          struct {
+                 volatile unsigned int ncrit_0_masked_irq;    /*  non-critical core 0 masked irq (RO) */
+                 volatile unsigned int ncrit_1_masked_irq;    /*  non-critical core 1 masked irq */
+                 volatile unsigned int ncrit_2_masked_irq;    /*  non-critical core 2 masked irq */
+                 volatile unsigned int ncrit_3_masked_irq;    /*  non-critical core 3 masked irq */
+                 };
+          };
+
+    union {
+          volatile unsigned int crit_masked_irq[ 4 ];         /*  array for easier access */
+          struct {
+                 volatile unsigned int crit_0_masked_irq;     /*  critical core 0 masked irq (RO) */
+                 volatile unsigned int crit_1_masked_irq;     /*  critical core 1 masked irq */
+                 volatile unsigned int crit_2_masked_irq;     /*  critical core 2 masked irq */
+                 volatile unsigned int crit_3_masked_irq;     /*  critical core 3 masked irq */
+                 };
+           };
+
+    union {
+          volatile unsigned int mchk_masked_irq[ 4 ];         /*  array for easier access */
+          struct {
+                 volatile unsigned int mchk_0_masked_irq;     /*  machine check core 0 masked irq (RO) */
+                 volatile unsigned int mchk_1_masked_irq;     /*  machine check core 1 masked irq */
+                 volatile unsigned int mchk_2_masked_irq;     /*  machine check core 2 masked irq */
+                 volatile unsigned int mchk_3_masked_irq;     /*  machine check core 3 masked irq */
+                 };
+           };
+
+    volatile unsigned int ti_mchk_mask;                       /*  (RW) TestInt MachineCheck Mask */
+    volatile unsigned int upc_time_stamp_mask;                /*  (RW) UPC Time Stamp Mask */
+    volatile unsigned int clock_sync_stop_mask;               /*  (RW) Clock Sync-Stop Mask */
+
+    volatile unsigned int ti_mchk_wof;                        /*  (RW) TestInt Mchk Who's on First */
+    volatile unsigned int upc_time_stamp_wof;                 /*  (RW) UPC Time Stamp Who's on First */
+    volatile unsigned int clock_sync_stop_wof;                /*  (RW) Clock Sync-Stop Who's on First */
+
+    volatile unsigned int ti_mchk;                            /*  (RO) TestInt Mchk */
+    volatile unsigned int upc_time_stamp;                     /*  (RO) UPC Time Stamp */
+    volatile unsigned int clock_sync_stop;                    /*  (RO) Clock Sync-Stop */
+
+
+} _BGP_IC_Group_t;
+
+
+
+#define _BGP_IC_MEM_GROUP_SIZE      (0x80)         /*  group size in bytes */
+
+/*  macros for indexed access to grouups */
+#define _BGP_IC_MEM_GROUP_OFFSET(_grp)          ( _BGP_IC_MEM_GROUP0_OFFSET + (_grp)*_BGP_IC_MEM_GROUP_SIZE )
+
+
+/*  Defines BGP Interrupt Controller Register Offset (memory mapped access) */
+#define _BGP_IC_MEM_GROUP0_OFFSET  (0x0000)        /*  Group 0 offset */
+#define _BGP_IC_MEM_GROUP1_OFFSET  (0x0080)        /*  Group 1 offset */
+#define _BGP_IC_MEM_GROUP2_OFFSET  (0x0100)        /*  Group 2 offset */
+#define _BGP_IC_MEM_GROUP3_OFFSET  (0x0180)        /*  Group 3 offset */
+#define _BGP_IC_MEM_GROUP4_OFFSET  (0x0200)        /*  Group 4 offset */
+#define _BGP_IC_MEM_GROUP5_OFFSET  (0x0280)        /*  Group 5 offset */
+#define _BGP_IC_MEM_GROUP6_OFFSET  (0x0300)        /*  Group 6 offset */
+#define _BGP_IC_MEM_GROUP7_OFFSET  (0x0380)        /*  Group 7 offset */
+#define _BGP_IC_MEM_GROUP8_OFFSET  (0x0400)        /*  Group 8 offset */
+#define _BGP_IC_MEM_GROUP9_OFFSET  (0x0480)        /*  Group 9 offset */
+
+/*  reserved group offset */
+#define _BGP_IC_MEM_GROUP10_OFFSET (0x0500)        /*  Group 10 offset */
+#define _BGP_IC_MEM_GROUP11_OFFSET (0x0580)        /*  Group 11 offset */
+#define _BGP_IC_MEM_GROUP12_OFFSET (0x0600)        /*  Group 12 offset */
+#define _BGP_IC_MEM_GROUP13_OFFSET (0x0680)        /*  Group 13 offset */
+#define _BGP_IC_MEM_GROUP14_OFFSET (0x0700)        /*  Group 14 offset */
+
+
+
+
+/*  Hierarchy Registers offsets */
+#define _BGP_IC_MEM_HNCR_OFFSET   (0x0780)         /*  Hierarchy Non-Critical Register */
+#define _BGP_IC_MEM_HNCR0_OFFSET  (0x0780)         /*  Hierarchy Non-Critical Register (core 0) */
+#define _BGP_IC_MEM_HNCR1_OFFSET  (0x0784)         /*  Hierarchy Non-Critical Register (core 1) */
+#define _BGP_IC_MEM_HNCR2_OFFSET  (0x0788)         /*  Hierarchy Non-Critical Register (core 2) */
+#define _BGP_IC_MEM_HNCR3_OFFSET  (0x078C)         /*  Hierarchy Non-Critical Register (core 3) */
+
+
+#define _BGP_IC_MEM_HCR_OFFSET    (0x0790)         /*  Hierarchy Critical Register */
+#define _BGP_IC_MEM_HCR0_OFFSET   (0x0790)         /*  Hierarchy Critical Register (core 0) */
+#define _BGP_IC_MEM_HCR1_OFFSET   (0x0794)         /*  Hierarchy Critical Register (core 1) */
+#define _BGP_IC_MEM_HCR2_OFFSET   (0x0798)         /*  Hierarchy Critical Register (core 2) */
+#define _BGP_IC_MEM_HCR3_OFFSET   (0x079C)         /*  Hierarchy Critical Register (core 3) */
+
+
+#define _BGP_IC_MEM_HMCHKR_OFFSET  (0x07A0)        /*  Hierarchy Machine Check Register */
+#define _BGP_IC_MEM_HMCHKR0_OFFSET (0x07A0)        /*  Hierarchy Machine Check Register (core 0) */
+#define _BGP_IC_MEM_HMCHKR1_OFFSET (0x07A4)        /*  Hierarchy Machine Check Register (core 1) */
+#define _BGP_IC_MEM_HMCHKR2_OFFSET (0x07A8)        /*  Hierarchy Machine Check Register (core 2) */
+#define _BGP_IC_MEM_HMCHKR3_OFFSET (0x07AC)        /*  Hierarchy Machine Check Register (core 3) */
+
+
+#define _BGP_IC_MEM_HR_TI_MCHECK_OFFSET             (0x07B0)      /*  hierarchy register ti_m_check (RO) */
+#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_OFFSET         (0x07B4)      /*  hierarchy register upc_timestamp_event (RO) */
+#define _BGP_IC_MEM_HR_CI_SYNC_STOP_OFFSET          (0x07B8)      /*  hierarchy register ci_sync_stop (RO) */
+
+
+#define _BGP_IC_MEM_ERR_RW_OFFSET                   (0x07C0)      /*  IC Error Register (RW) */
+#define _BGP_IC_MEM_ERR_RDCLR_OFFSET                (0x07C4)      /*  IC Error Register (RO) (Read Clear all bits) */
+#define _BGP_IC_MEM_ERR_ADDR_OFFSET                 (0x07C8)      /*  IC Error Address Register (RO) */
+#define _BGP_IC_MEM_ERR_DATA_OFFSET                 (0x07CC)      /*  IC Error Data Register (RO) */
+
+
+#define _BGP_IC_MEM_HR_TI_MCHECK_WOF_OFFSET         (0x07D0)    /*  hierarchy register ti_m_check_WOF (RW) */
+#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_WOF_OFFSET     (0x07D4)    /*  hierarchy register upc_timestamp_event_WOF (RW) */
+#define _BGP_IC_MEM_HR_CI_SYNC_STOP_WOF_OFFSET      (0x07D8)    /*  hierarchy register ci_sync_stop_WOF (RW) */
+
+
+
+/* ************************************************************************* */
+/*              definitions for each interrupt generating device             */
+/* ************************************************************************* */
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 00:31                      */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_HIER_POS      0
+#define _BGP_IC_C2C_UNIT_NUM      0
+#define _BGP_IC_C2C_UNIT_POS      0
+#define _BGP_IC_C2C_UNIT_SIZE     32
+#define _BGP_IC_C2C_UNIT_MASK     0xffffffff
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 00:07  (Core 0)            */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C0_HIER_POS   0
+#define _BGP_IC_C2C_C0_UNIT_NUM   0
+#define _BGP_IC_C2C_C0_UNIT_POS   0
+#define _BGP_IC_C2C_C0_UNIT_SIZE  8
+#define _BGP_IC_C2C_C0_UNIT_MASK  0xff000000
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 08:15  (Core 1)            */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C1_HIER_POS   0
+#define _BGP_IC_C2C_C1_UNIT_NUM   0
+#define _BGP_IC_C2C_C1_UNIT_POS   8
+#define _BGP_IC_C2C_C1_UNIT_SIZE  8
+#define _BGP_IC_C2C_C1_UNIT_MASK  0x00ff0000
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 16:23  (Core 2)            */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C2_HIER_POS   0
+#define _BGP_IC_C2C_C2_UNIT_NUM   0
+#define _BGP_IC_C2C_C2_UNIT_POS   16
+#define _BGP_IC_C2C_C2_UNIT_SIZE  8
+#define _BGP_IC_C2C_C2_UNIT_MASK  0x0000ff00
+
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 24:31  (Core 3)            */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C3_HIER_POS   0
+#define _BGP_IC_C2C_C3_UNIT_NUM   0
+#define _BGP_IC_C2C_C3_UNIT_POS   24
+#define _BGP_IC_C2C_C3_UNIT_SIZE  8
+#define _BGP_IC_C2C_C3_UNIT_MASK  0x000000ff
+
+
+
+
+
+/* ************************************************************************* */
+/* DMA Fatal Interrupt Request: Group 1 bits 00:31                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_FT_HIER_POS   1
+#define _BGP_IC_DMA_FT_UNIT_NUM   1
+#define _BGP_IC_DMA_FT_UNIT_POS   0
+#define _BGP_IC_DMA_FT_UNIT_SIZE  32
+#define _BGP_IC_DMA_FT_UNIT_MASK  0xffffffff
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 2 bits 00:31                       */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G2_HIER_POS   2
+#define _BGP_IC_DMA_NFT_G2_UNIT_NUM   2
+#define _BGP_IC_DMA_NFT_G2_UNIT_POS   0
+#define _BGP_IC_DMA_NFT_G2_UNIT_SIZE  32
+#define _BGP_IC_DMA_NFT_G2_UNIT_MASK  0xffffffff
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31                       */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G3_HIER_POS   3
+#define _BGP_IC_DMA_NFT_G3_UNIT_NUM   3
+#define _BGP_IC_DMA_NFT_G3_UNIT_POS   0
+#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE  32
+#define _BGP_IC_DMA_NFT_G3_UNIT_MASK  0xffffffff
+
+
+/* ************************************************************************* */
+/* DP0 PU0 Interrupt Request:  Group 4  bits 00:02                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP0_PU0_HIER_POS      4
+#define _BGP_IC_DP0_PU0_UNIT_NUM      4
+#define _BGP_IC_DP0_PU0_UNIT_POS      0
+#define _BGP_IC_DP0_PU0_UNIT_SIZE     3
+#define _BGP_IC_DP0_PU0_UNIT_MASK     0xE0000000
+
+/* ************************************************************************* */
+/* DP0 PU1 Interrupt Request:  Group 4  bits 03:05                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP0_PU1_HIER_POS      4
+#define _BGP_IC_DP0_PU1_UNIT_NUM      4
+#define _BGP_IC_DP0_PU1_UNIT_POS      3
+#define _BGP_IC_DP0_PU1_UNIT_SIZE     3
+#define _BGP_IC_DP0_PU1_UNIT_MASK     0x1C000000
+
+/* ************************************************************************* */
+/* DP1 PU0 Interrupt Request:  Group 4  bits 06:08                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP1_PU0_HIER_POS      4
+#define _BGP_IC_DP1_PU0_UNIT_NUM      4
+#define _BGP_IC_DP1_PU0_UNIT_POS      6
+#define _BGP_IC_DP1_PU0_UNIT_SIZE     3
+#define _BGP_IC_DP1_PU0_UNIT_MASK     0x03800000
+
+/* ************************************************************************* */
+/* DP1 PU1 Interrupt Request:  Group 4  bits 09:11                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP1_PU1_HIER_POS      4
+#define _BGP_IC_DP1_PU1_UNIT_NUM      4
+#define _BGP_IC_DP1_PU1_UNIT_POS      9
+#define _BGP_IC_DP1_PU1_UNIT_SIZE     3
+#define _BGP_IC_DP1_PU1_UNIT_MASK     0x00700000
+
+
+/* ************************************************************************* */
+/* Global Interrupt:           Group 4  bits 12:21                           */
+/* ************************************************************************* */
+
+#define _BGP_IC_GINT_HIER_POS         4
+#define _BGP_IC_GINT_UNIT_NUM         4
+#define _BGP_IC_GINT_UNIT_POS         12
+#define _BGP_IC_GINT_UNIT_SIZE        10
+#define _BGP_IC_GINT_UNIT_MASK        0x000FFC00
+
+
+/* ************************************************************************* */
+/* SRAM Interrupt Request:      Group 4  bits 22:24                          */
+/* ************************************************************************* */
+
+#define _BGP_IC_SRAM_HIER_POS         4
+#define _BGP_IC_SRAM_UNIT_NUM         4
+#define _BGP_IC_SRAM_UNIT_POS         22
+#define _BGP_IC_SRAM_UNIT_SIZE        3
+#define _BGP_IC_SRAM_UNIT_MASK        0x00000380
+
+
+/* ************************************************************************* */
+/* TI Global Attention Interrupt request:     Group 4 bit 25                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_GLOB_ATT_HIER_POS     4
+#define _BGP_IC_GLOB_ATT_UNIT_NUM     4
+#define _BGP_IC_GLOB_ATT_UNIT_POS     25
+#define _BGP_IC_GLOB_ATT_UNIT_SIZE    1
+#define _BGP_IC_GLOB_ATT_UNIT_MASK    0x00000040
+
+
+/* ************************************************************************* */
+/* TI LB Scan Attention Interrupt request:    Group 4 bit 26                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_LB_SCATTN_HIER_POS    4
+#define _BGP_IC_LB_SCATTN_UNIT_NUM    4
+#define _BGP_IC_LB_SCATTN_UNIT_POS    26
+#define _BGP_IC_LB_SCATTN_UNIT_SIZE   1
+#define _BGP_IC_LB_SCATTN_UNIT_MASK   0x00000020
+
+
+/* ************************************************************************* */
+/* TI AB Scan Attention Interrupt request:    Group 4 bit 27                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_AB_SCATTN_HIER_POS    4
+#define _BGP_IC_AB_SCATTN_UNIT_NUM    4
+#define _BGP_IC_AB_SCATTN_UNIT_POS    27
+#define _BGP_IC_AB_SCATTN_UNIT_SIZE   1
+#define _BGP_IC_AB_SCATTN_UNIT_MASK   0x00000010
+
+
+/* ************************************************************************* */
+/* TI HB Scan Attention Interrupt request:    Group 4 bit 28                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_HB_SCATTN_HIER_POS    4
+#define _BGP_IC_HB_SCATTN_UNIT_NUM    4
+#define _BGP_IC_HB_SCATTN_UNIT_POS    28
+#define _BGP_IC_HB_SCATTN_UNIT_SIZE   1
+#define _BGP_IC_HB_SCATTN_UNIT_MASK   0x00000008
+
+
+/* ************************************************************************* */
+/* TI DCR Read Timeout Interrupt request:    Group 4 bit 29                  */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCR_RD_TO_HIER_POS    4
+#define _BGP_IC_DCR_RD_TO_UNIT_NUM    4
+#define _BGP_IC_DCR_RD_TO_UNIT_POS    29
+#define _BGP_IC_DCR_RD_TO_UNIT_SIZE   1
+#define _BGP_IC_DCR_RD_TO_UNIT_MASK   0x00000004
+
+
+/* ************************************************************************* */
+/* TI DCR Write Timeout Interrupt request:    Group 4 bit 30                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCR_WR_TO_HIER_POS    4
+#define _BGP_IC_DCR_WR_TO_UNIT_NUM    4
+#define _BGP_IC_DCR_WR_TO_UNIT_POS    30
+#define _BGP_IC_DCR_WR_TO_UNIT_SIZE   1
+#define _BGP_IC_DCR_WR_TO_UNIT_MASK   0x00000002
+
+
+
+/* ************************************************************************* */
+/* Collective Non-Critical interrupt:       Group 5 bits 00:19               */
+/* ************************************************************************* */
+
+#define _BGP_IC_COLNCRIT_HIER_POS     5
+#define _BGP_IC_COLNCRIT_UNIT_NUM     5
+#define _BGP_IC_COLNCRIT_UNIT_POS     0
+#define _BGP_IC_COLNCRIT_UNIT_SIZE    20
+#define _BGP_IC_COLNCRIT_UNIT_MASK    0xFFFFF000
+
+/* ************************************************************************* */
+/* Collective Critical interrupt:           Group 5 bits 20:23               */
+/* ************************************************************************* */
+
+#define _BGP_IC_COLCRIT_HIER_POS      5
+#define _BGP_IC_COLCRIT_UNIT_NUM      5
+#define _BGP_IC_COLCRIT_UNIT_POS      20
+#define _BGP_IC_COLCRIT_UNIT_SIZE     4
+#define _BGP_IC_COLCRIT_UNIT_MASK     0x00000f00
+
+
+/* ************************************************************************* */
+/* SerDesr machine check:                   Group 6 bits 0:23                */
+/* ************************************************************************* */
+
+#define _BGP_IC_SERDES_MCK_HIER_POS   6
+#define _BGP_IC_SERDES_MCK_UNIT_NUM   6
+#define _BGP_IC_SERDES_MCK_UNIT_POS   0
+#define _BGP_IC_SERDES_MCK_UNIT_SIZE  24
+#define _BGP_IC_SERDES_MCK_UNIT_MASK  0xFFFFFF00
+
+
+/* ************************************************************************* */
+/* UPC interrupt request:                   Group 6 bit 24                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_UPC_HIER_POS          6
+#define _BGP_IC_UPC_UNIT_NUM          6
+#define _BGP_IC_UPC_UNIT_POS          24
+#define _BGP_IC_UPC_UNIT_SIZE         1
+#define _BGP_IC_UPC_UNIT_MASK         0x00000080
+
+
+/* ************************************************************************* */
+/* UPC Error interrupt request:             Group 6 bit 25                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_UPCERR_HIER_POS       6
+#define _BGP_IC_UPCERR_UNIT_NUM       6
+#define _BGP_IC_UPCERR_UNIT_POS       25
+#define _BGP_IC_UPCERR_UNIT_SIZE      1
+#define _BGP_IC_UPCERR_UNIT_MASK      0x00000040
+
+/* ************************************************************************* */
+/* DCR Bus interrupt request:               Group 6 bit 26                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCRBUS_HIER_POS       6
+#define _BGP_IC_DCRBUS_UNIT_NUM       6
+#define _BGP_IC_DCRBUS_UNIT_POS       26
+#define _BGP_IC_DCRBUS_UNIT_SIZE      1
+#define _BGP_IC_DCRBUS_UNIT_MASK      0x00000020
+
+/* ************************************************************************* */
+/* BIC machine check:                      Group 6 bit 27                    */
+/* ************************************************************************* */
+
+#define _BGP_IC_BIC_MCHK_HIER_POS     6
+#define _BGP_IC_BIC_MCHK_UNIT_NUM     6
+#define _BGP_IC_BIC_MCHK_UNIT_POS     27
+#define _BGP_IC_BIC_MCHK_UNIT_SIZE    1
+#define _BGP_IC_BIC_MCHK_UNIT_MASK    0x00000010
+
+/* ************************************************************************* */
+/* BIC interrupt request:                   Group 6 bit 28                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_BIC_IRQ_HIER_POS      6
+#define _BGP_IC_BIC_IRQ_UNIT_NUM      6
+#define _BGP_IC_BIC_IRQ_UNIT_POS      28
+#define _BGP_IC_BIC_IRQ_UNIT_SIZE     1
+#define _BGP_IC_BIC_IRQ_UNIT_MASK     0x00000008
+
+/* ************************************************************************* */
+/* DEVBUS interrupt request:                Group 6 bit 29                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DEVBUS_IRQ_HIER_POS   6
+#define _BGP_IC_DEVBUS_IRQ_UNIT_NUM   6
+#define _BGP_IC_DEVBUS_IRQ_UNIT_POS   29
+#define _BGP_IC_DEVBUS_IRQ_UNIT_SIZE  1
+#define _BGP_IC_DEVBUS_IRQ_UNIT_MASK  0x00000004
+
+/* ************************************************************************* */
+/* Clockstop Stopped interrupt request:     Group 6 bit 30                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_CLK_STOP_HIER_POS     6
+#define _BGP_IC_CLK_STOP_UNIT_NUM     6
+#define _BGP_IC_CLK_STOP_UNIT_POS     30
+#define _BGP_IC_CLK_STOP_UNIT_SIZE    1
+#define _BGP_IC_CLK_STOP_UNIT_MASK    0x00000002
+
+/* ************************************************************************* */
+/* Environment Monitor interrupt request:   Group 6 bit 31                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_ENV_MON_HIER_POS      6
+#define _BGP_IC_ENV_MON_UNIT_NUM      6
+#define _BGP_IC_ENV_MON_UNIT_POS      31
+#define _BGP_IC_ENV_MON_UNIT_SIZE     1
+#define _BGP_IC_ENV_MON_UNIT_MASK     0x00000001
+
+
+/* ************************************************************************* */
+/* L30 machine check:                       Group 7 bits 0:10                */
+/* ************************************************************************* */
+
+#define _BGP_IC_L30_MCHK_HIER_POS     7
+#define _BGP_IC_L30_MCHK_UNIT_NUM     7
+#define _BGP_IC_L30_MCHK_UNIT_POS     0
+#define _BGP_IC_L30_MCHK_UNIT_SIZE    11
+#define _BGP_IC_L30_MCHK_UNIT_MASK    0xFFE00000
+
+/* ************************************************************************* */
+/* L30 interrupt request:                   Group 7 bits 11                  */
+/* ************************************************************************* */
+
+#define _BGP_IC_L30_IRQ_HIER_POS     7
+#define _BGP_IC_L30_IRQ_UNIT_NUM     7
+#define _BGP_IC_L30_IRQ_UNIT_POS     11
+#define _BGP_IC_L30_IRQ_UNIT_SIZE    1
+#define _BGP_IC_L30_IRQ_UNIT_MASK    0x00100000
+
+/* ************************************************************************* */
+/* L31 machine check:                       Group 7 bits 12:22               */
+/* ************************************************************************* */
+
+#define _BGP_IC_L31_MCHK_HIER_POS     7
+#define _BGP_IC_L31_MCHK_UNIT_NUM     7
+#define _BGP_IC_L31_MCHK_UNIT_POS     12
+#define _BGP_IC_L31_MCHK_UNIT_SIZE    11
+#define _BGP_IC_L31_MCHK_UNIT_MASK    0x000FFE00
+
+/* ************************************************************************* */
+/* L31 interrupt request:                   Group 7 bits 23                  */
+/* ************************************************************************* */
+
+#define _BGP_IC_L31_IRQ_HIER_POS      7
+#define _BGP_IC_L31_IRQ_UNIT_NUM      7
+#define _BGP_IC_L31_IRQ_UNIT_POS      23
+#define _BGP_IC_L31_IRQ_UNIT_SIZE     1
+#define _BGP_IC_L31_IRQ_UNIT_MASK     0x00000100
+
+
+/* ************************************************************************* */
+/* DDR 0 Recoverable error:                 Group 7 bit 24                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_RERR_HIER_POS    7
+#define _BGP_IC_DDR0_RERR_UNIT_NUM    7
+#define _BGP_IC_DDR0_RERR_UNIT_POS    24
+#define _BGP_IC_DDR0_RERR_UNIT_SIZE   1
+#define _BGP_IC_DDR0_RERR_UNIT_MASK   0x00000080
+
+/* ************************************************************************* */
+/* DDR 0 Special Attention:                 Group 7 bit 25                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_SATT_HIER_POS    7
+#define _BGP_IC_DDR0_SATT_UNIT_NUM    7
+#define _BGP_IC_DDR0_SATT_UNIT_POS    25
+#define _BGP_IC_DDR0_SATT_UNIT_SIZE   1
+#define _BGP_IC_DDR0_SATT_UNIT_MASK   0x00000040
+
+/* ************************************************************************* */
+/* DDR 0 Machine Check:                     Group 7 bit 26                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_MCHK_HIER_POS    7
+#define _BGP_IC_DDR0_MCHK_UNIT_NUM    7
+#define _BGP_IC_DDR0_MCHK_UNIT_POS    26
+#define _BGP_IC_DDR0_MCHK_UNIT_SIZE   1
+#define _BGP_IC_DDR0_MCHK_UNIT_MASK   0x00000020
+
+
+/* ************************************************************************* */
+/* DDR 1 Recoverable error:                 Group 7 bit 27                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_RERR_HIER_POS    7
+#define _BGP_IC_DDR1_RERR_UNIT_NUM    7
+#define _BGP_IC_DDR1_RERR_UNIT_POS    27
+#define _BGP_IC_DDR1_RERR_UNIT_SIZE   1
+#define _BGP_IC_DDR1_RERR_UNIT_MASK   0x00000010
+
+/* ************************************************************************* */
+/* DDR 1 Special Attention:                 Group 7 bit 28                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_SATT_HIER_POS    7
+#define _BGP_IC_DDR1_SATT_UNIT_NUM    7
+#define _BGP_IC_DDR1_SATT_UNIT_POS    28
+#define _BGP_IC_DDR1_SATT_UNIT_SIZE   1
+#define _BGP_IC_DDR1_SATT_UNIT_MASK   0x00000008
+
+/* ************************************************************************* */
+/* DDR 1 Machine Check:                     Group 7 bit 29                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_MCHK_HIER_POS    7
+#define _BGP_IC_DDR1_MCHK_UNIT_NUM    7
+#define _BGP_IC_DDR1_MCHK_UNIT_POS    29
+#define _BGP_IC_DDR1_MCHK_UNIT_SIZE   1
+#define _BGP_IC_DDR1_MCHK_UNIT_MASK   0x00000004
+
+
+/* ************************************************************************* */
+/* Test Interface interrupt request:        Group 7  bit 30:31               */
+/* ************************************************************************* */
+
+#define _BGP_IC_TESTINT_HIER_POS      7
+#define _BGP_IC_TESTINT_UNIT_NUM      7
+#define _BGP_IC_TESTINT_UNIT_POS      30
+#define _BGP_IC_TESTINT_UNIT_SIZE     2
+#define _BGP_IC_TESTINT_UNIT_MASK     0x00000003
+
+
+/* ************************************************************************* */
+/* Ethernet TOMAL interrupt request:        Group 8 bits 0:1                 */
+/* ************************************************************************* */
+
+#define _BGP_IC_TOMAL_HIER_POS        8
+#define _BGP_IC_TOMAL_UNIT_NUM        8
+#define _BGP_IC_TOMAL_UNIT_POS        0
+#define _BGP_IC_TOMAL_UNIT_SIZE       2
+#define _BGP_IC_TOMAL_UNIT_MASK       0xC0000000
+
+
+
+/* ************************************************************************* */
+/* Ethernet XEMAC interrupt request:         Group 9 bits 0                  */
+/* ************************************************************************* */
+
+#define _BGP_IC_XEMAC_HIER_POS        9
+#define _BGP_IC_XEMAC_UNIT_NUM        9
+#define _BGP_IC_XEMAC_UNIT_POS        0
+#define _BGP_IC_XEMAC_UNIT_SIZE       1
+#define _BGP_IC_XEMAC_UNIT_MASK       0x80000000
+
+/* ************************************************************************* */
+/* Ethernet interrupt request:              Group 9 bits 1                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_ETH_HIER_POS          9
+#define _BGP_IC_ETH_UNIT_NUM          9
+#define _BGP_IC_ETH_UNIT_POS          1
+#define _BGP_IC_ETH_UNIT_SIZE         1
+#define _BGP_IC_ETH_UNIT_MASK         0x40000000
+
+/* ************************************************************************* */
+/* Ethernet XENPAK interrupt request:       Group 9 bits 2                   */
+/* ************************************************************************* */
+
+#define _BGP_IC_XENPAK_HIER_POS       9
+#define _BGP_IC_XENPAK_UNIT_NUM       9
+#define _BGP_IC_XENPAK_UNIT_POS       2
+#define _BGP_IC_XENPAK_UNIT_SIZE      1
+#define _BGP_IC_XENPAK_UNIT_MASK      0x20000000
+
+
+
+
+#endif
diff --git a/arch/powerpc/include/common/alignment.h b/arch/powerpc/include/common/alignment.h
new file mode 100644
index 0000000..10bfd37
--- /dev/null
+++ b/arch/powerpc/include/common/alignment.h
@@ -0,0 +1,66 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/alignment.h
+ */
+
+#ifndef	_ALIGNMENT_H_  /*  Prevent multiple inclusion */
+#define	_ALIGNMENT_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#if defined(__ASSEMBLY__)
+
+#define ALIGN_L1_DIRTYBIT  3
+#define ALIGN_QUADWORD     4
+#define ALIGN_L1_CACHE     5
+#define ALIGN_L1I_CACHE    5
+#define ALIGN_L1D_CACHE    5
+#define ALIGN_L3_CACHE     7
+
+#elif defined(__GNUC__) || defined(__xlC__)
+
+#define ALIGN_L1_DIRTYBIT __attribute__ ((aligned (  8)))
+#define ALIGN_QUADWORD    __attribute__ ((aligned ( 16)))
+#define ALIGN_L1_CACHE    __attribute__ ((aligned ( 32)))
+#define ALIGN_L1I_CACHE   __attribute__ ((aligned ( 32)))
+#define ALIGN_L1D_CACHE   __attribute__ ((aligned ( 32)))
+#define ALIGN_L3_CACHE    __attribute__ ((aligned (128)))
+
+#else
+
+#warning "Need alignment directives for your compiler!"
+
+#define ALIGN_QUADWORD
+#define ALIGN_L1_CACHE
+#define ALIGN_L1I_CACHE
+#define ALIGN_L1D_CACHE
+#define ALIGN_L3_CACHE
+
+#endif  /*  __ASSEMBLY__ */
+
+__END_DECLS
+
+
+
+#endif  /*  Add nothing below this line */
diff --git a/arch/powerpc/include/common/bgp_bitnumbers.h b/arch/powerpc/include/common/bgp_bitnumbers.h
new file mode 100644
index 0000000..5acb4db
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_bitnumbers.h
@@ -0,0 +1,114 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_bitnumbers.h
+ */
+
+#ifndef _BGL_BITNUMBERS_H_   /*  Prevent multiple inclusion */
+#define _BGL_BITNUMBERS_H_
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+/*  These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields */
+/*   b = IBM bit number of the least significant bit (highest number) */
+/*   x = value to set in field */
+/*   s = size */
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b)    ((1<<(31-(b))))
+#define _B1(b,x)  (((x)&0x1)<<(31-(b)))
+#define _B2(b,x)  (((x)&0x3)<<(31-(b)))
+#define _B3(b,x)  (((x)&0x7)<<(31-(b)))
+#define _B4(b,x)  (((x)&0xF)<<(31-(b)))
+#define _B5(b,x)  (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x)  (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x)  (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x)  (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x)  (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#ifndef __ASSEMBLY__
+
+/*  These defines ease extraction of bitfields.  (Not useful in assembler code.) */
+/*   x = 32 bit value */
+/*   b = IBM bit number of least significant bit of field */
+/*   when b is a const, compiler should generate a single rotate-and-mask instruction */
+#define _GN(x,b)  (((x)>>(31-(b)))&0x1)
+#define _G2(x,b)  (((x)>>(31-(b)))&0x3)
+#define _G3(x,b)  (((x)>>(31-(b)))&0x7)
+#define _G4(x,b)  (((x)>>(31-(b)))&0xF)
+#define _G5(x,b)  (((x)>>(31-(b)))&0x1F)
+#define _G6(x,b)  (((x)>>(31-(b)))&0x3F)
+#define _G7(x,b)  (((x)>>(31-(b)))&0x7F)
+#define _G8(x,b)  (((x)>>(31-(b)))&0xFF)
+#define _G9(x,b)  (((x)>>(31-(b)))&0x1FF)
+#define _G10(x,b) (((x)>>(31-(b)))&0x3FF)
+#define _G11(x,b) (((x)>>(31-(b)))&0x7FF)
+#define _G12(x,b) (((x)>>(31-(b)))&0xFFF)
+#define _G13(x,b) (((x)>>(31-(b)))&0x1FFF)
+#define _G14(x,b) (((x)>>(31-(b)))&0x3FFF)
+#define _G15(x,b) (((x)>>(31-(b)))&0x7FFF)
+#define _G16(x,b) (((x)>>(31-(b)))&0xFFFF)
+#define _G17(x,b) (((x)>>(31-(b)))&0x1FFFF)
+#define _G18(x,b) (((x)>>(31-(b)))&0x3FFFF)
+#define _G19(x,b) (((x)>>(31-(b)))&0x7FFFF)
+#define _G20(x,b) (((x)>>(31-(b)))&0xFFFFF)
+#define _G21(x,b) (((x)>>(31-(b)))&0x1FFFFF)
+#define _G22(x,b) (((x)>>(31-(b)))&0x3FFFFF)
+#define _G23(x,b) (((x)>>(31-(b)))&0x7FFFFF)
+#define _G24(x,b) (((x)>>(31-(b)))&0xFFFFFF)
+#define _G25(x,b) (((x)>>(31-(b)))&0x1FFFFFF)
+#define _G26(x,b) (((x)>>(31-(b)))&0x3FFFFFF)
+#define _G27(x,b) (((x)>>(31-(b)))&0x7FFFFFF)
+#define _G28(x,b) (((x)>>(31-(b)))&0xFFFFFFF)
+#define _G29(x,b) (((x)>>(31-(b)))&0x1FFFFFFF)
+#define _G30(x,b) (((x)>>(31-(b)))&0x3FFFFFFF)
+#define _G31(x,b) (((x)>>(31-(b)))&0x7FFFFFFF)
+
+#endif  /*  __ASSEMBLY__ */
+
+__END_DECLS
+
+#endif  /*  Add nothing below this line. */
+
diff --git a/arch/powerpc/include/common/bgp_chipversion.h b/arch/powerpc/include/common/bgp_chipversion.h
new file mode 100644
index 0000000..eba213a
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_chipversion.h
@@ -0,0 +1,52 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_chipversion.h
+ */
+
+#ifndef	_BGP_CHIPVERSION_H_  /*  Prevent multiple inclusion */
+#define	_BGP_CHIPVERSION_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#define BGP_CHIPVERSION_DD2
+
+#if defined BGP_CHIPVERSION_DD1
+/*   Settings for DD1 */
+#define BGP_DD1_WORKAROUNDS 1
+
+#elif defined BGP_CHIPVERSION_DD2
+/*   Settings for DD2 */
+
+#else
+/*   */
+#error "Invalid chip version setting"
+
+#endif
+
+
+__END_DECLS
+
+
+
+#endif  /*  Add nothing below this line. */
diff --git a/arch/powerpc/include/common/bgp_personality.h b/arch/powerpc/include/common/bgp_personality.h
new file mode 100644
index 0000000..9d64516
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_personality.h
@@ -0,0 +1,786 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_personality.h
+ */
+
+#ifndef	_BGP_PERSONALITY_H_  /*  Prevent multiple inclusion */
+#define	_BGP_PERSONALITY_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#include <common/bgp_chipversion.h>
+#include <common/alignment.h>
+#include <common/bgp_bitnumbers.h>
+#include <bpcore/bgp_types.h>
+
+/*  */
+/*  I/O Node Linux currently hard-codes the personality address. */
+/*  */
+#define _BGP_HARDCODED_PERSONALITY_SRAM_ADDRESS (0xFFFFF800)
+
+#define _BGP_PERSONALITY_VERSION (0x0A)
+
+#define _BGP_DEFAULT_FREQ (850)   /*  Match the current DD2 hardware */
+
+#define _BGP_PERS_Unused_DEFAULT 0
+
+#define _BGP_PERS_PROCESSCONFIG_DIAGS      (0xFF000000)  /*  Diagnostic Mode: All Cores Enabled and Privileged in Process 0 */
+#define _BGP_PERS_PROCESSCONFIG_SMP        (0x0F000000)  /*  All Cores Enabled User-Space in Process 0 */
+#define _BGP_PERS_PROCESSCONFIG_VNM        (0x08040201)  /*  4 Single-Core Processes (a.k.a. Virtual Nodes) */
+#define _BGP_PERS_PROCESSCONFIG_2x2        (0x0C030000)  /*  2 Processes of 2 Cores each in same DP unit */
+#define _BGP_PERS_PROCESSCONFIG_DEFAULT    (_BGP_PERS_PROCESSCONFIG_DIAGS)
+#define _BGP_PERS_PROCESSCONFIG_PRIV_MSK   (0xF0F0F0F0)  /*  Mask to isolate privileged core flags */
+
+
+/*  Personality.Kernel_Config.RASPolicy */
+#define _BGP_PERS_RASPOLICY_VERBOSITY(x)   _B2( 1,x)   /*  Verbosity as shown below */
+#define _BGP_PERS_RASPOLICY_MINIMAL          _BGP_PERS_RASPOLICY_VERBOSITY(0)  /*  Benchmarking Level of Capture and Reporting */
+#define _BGP_PERS_RASPOLICY_NORMAL           _BGP_PERS_RASPOLICY_VERBOSITY(1)  /*  Normal Production Level of Capture and Reporting */
+#define _BGP_PERS_RASPOLICY_VERBOSE          _BGP_PERS_RASPOLICY_VERBOSITY(2)  /*  Manufacturing Test and Diagnostics */
+#define _BGP_PERS_RASPOLICY_EXTREME          _BGP_PERS_RASPOLICY_VERBOSITY(3)  /*  Report Every Event Immediately - Thresholds set to 1 */
+#define _BGP_PERS_RASPOLICY_FATALEXIT      _BN( 2)    /*  Fatal is Fatal, so exit. */
+
+#define _BGP_PERS_RASPOLICY_DEFAULT        (_BGP_PERS_RASPOLICY_VERBOSE | _BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define _BGP_PERSONALITY_LEN_NFSDIR (32)  /*  32bytes */
+
+#define _BGP_PERSONALITY_LEN_SECKEY (32)  /*  32bytes */
+
+/*  Personality.NodeConfig Driver Enables and Configurations */
+#define _BGP_PERS_ENABLE_Simulation      _BN( 0)   /*  Running on VHDL Simulation */
+#define _BGP_PERS_ENABLE_LockBox         _BN( 1)
+#define _BGP_PERS_ENABLE_BIC             _BN( 2)
+#define _BGP_PERS_ENABLE_DDR             _BN( 3)   /*  DDR Controllers (not Fusion DDR model) */
+#define _BGP_PERS_ENABLE_LoopBack        _BN( 4)   /*  LoopBack: Internal TS/TR or SerDes Loopback */
+#define _BGP_PERS_ENABLE_GlobalInts      _BN( 5)
+#define _BGP_PERS_ENABLE_Collective      _BN( 6)   /*  Enable Collective Network */
+#define _BGP_PERS_ENABLE_Torus           _BN( 7)
+#define _BGP_PERS_ENABLE_TorusMeshX      _BN( 8)   /*  Torus is a Mesh in the X-dimension */
+#define _BGP_PERS_ENABLE_TorusMeshY      _BN( 9)   /*  Torus is a Mesh in the Y-dimension */
+#define _BGP_PERS_ENABLE_TorusMeshZ      _BN(10)   /*  Torus is a Mesh in the Z-dimension */
+#define _BGP_PERS_ENABLE_TreeA           _BN(11)   /*  Enable Collective Network A-link */
+#define _BGP_PERS_ENABLE_TreeB           _BN(12)   /*  Enable Collective Network B-link */
+#define _BGP_PERS_ENABLE_TreeC           _BN(13)   /*  Enable Collective Network C-link */
+#define _BGP_PERS_ENABLE_DMA             _BN(14)
+#define _BGP_PERS_ENABLE_SerDes          _BN(15)
+#define _BGP_PERS_ENABLE_UPC             _BN(16)
+#define _BGP_PERS_ENABLE_EnvMon          _BN(17)
+#define _BGP_PERS_ENABLE_Ethernet        _BN(18)
+#define _BGP_PERS_ENABLE_JTagLoader      _BN(19)   /*  Converse with JTag Host to load kernel */
+#define _BGP_PERS_ENABLE_MailBoxReceive  _BGP_PERS_ENABLE_JTagLoader
+#define _BGP_PERS_ENABLE_PowerSave       _BN(20)   /*  Turn off unused devices (Eth on CN, TS on ION) */
+#define _BGP_PERS_ENABLE_FPU             _BN(21)   /*  Enable Double-Hummers (not supported in EventSim) */
+#define _BGP_PERS_ENABLE_StandAlone      _BN(22)   /*  Disable "CIOD" interface, Requires Collective! */
+#define _BGP_PERS_ENABLE_TLBMisses       _BN(23)   /*  TLB Misses vs Wasting Memory (see bgp_AppSetup.c) */
+#define _BGP_PERS_ENABLE_Mambo           _BN(24)   /*  Running under Mambo? Used by Linux */
+#define _BGP_PERS_ENABLE_TreeBlast       _BN(25)   /*  Enable Tree "Blast" mode */
+#define _BGP_PERS_ENABLE_BlindStacks     _BN(26)   /*  For "XB" Tests, Lock 16K Stacks in Blind Device */
+#define _BGP_PERS_ENABLE_CNK_Malloc      _BN(27)   /*  Enable Malloc Support in CNK. */
+#define _BGP_PERS_ENABLE_Reproducibility _BN(28)   /*  Enable Cycle Reproducibility */
+#define _BGP_PERS_ENABLE_HighThroughput  _BN(29)   /*  Enable high throughput computing mode */
+#define _BGP_PERS_ENABLE_DiagnosticsMode _BN(30)   /*  Enable diagnostics mode */
+
+/*  Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back) */
+/*   This overrides most L1, L2, and Snoop settings. Carefull!! */
+#define _BGP_PERS_ENABLE_BGLMODE      _BN(31)   /*  (not yet fully implemented) */
+
+/*  Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave */
+
+#define _BGP_PERS_NODECONFIG_DEFAULT (_BGP_PERS_ENABLE_Simulation  |\
+                                      _BGP_PERS_ENABLE_LockBox     |\
+                                      _BGP_PERS_ENABLE_BIC         |\
+                                      _BGP_PERS_ENABLE_DDR         |\
+                                      _BGP_PERS_ENABLE_LoopBack    |\
+                                      _BGP_PERS_ENABLE_GlobalInts  |\
+                                      _BGP_PERS_ENABLE_Collective  |\
+                                      _BGP_PERS_ENABLE_Torus       |\
+                                      _BGP_PERS_ENABLE_UPC         |\
+                                      _BGP_PERS_ENABLE_EnvMon      |\
+                                      _BGP_PERS_ENABLE_FPU         |\
+                                      _BGP_PERS_ENABLE_TLBMisses   |\
+                                      _BGP_PERS_ENABLE_StandAlone)
+
+/*  Default Setup for Hardware: */
+/*      Supports Stand-Alone CNA Applications. */
+/*      Bootloader-Extensions and XB's must turn-off JTagLoader */
+#define _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (_BGP_PERS_ENABLE_JTagLoader  |\
+                                                   _BGP_PERS_ENABLE_LockBox     |\
+                                                   _BGP_PERS_ENABLE_BIC         |\
+                                                   _BGP_PERS_ENABLE_DDR         |\
+                                                   _BGP_PERS_ENABLE_GlobalInts  |\
+                                                   _BGP_PERS_ENABLE_Collective  |\
+                                                   _BGP_PERS_ENABLE_SerDes      |\
+                                                   _BGP_PERS_ENABLE_UPC         |\
+                                                   _BGP_PERS_ENABLE_EnvMon      |\
+                                                   _BGP_PERS_ENABLE_FPU         |\
+                                                   _BGP_PERS_ENABLE_TLBMisses   |\
+                                                   _BGP_PERS_ENABLE_StandAlone)
+
+
+
+/*  these fields are defined by the control system depending on compute/io node */
+/*                                                    _BGP_PERS_ENABLE_Torus       | */
+/*                                                    _BGP_PERS_ENABLE_TorusMeshX  | */
+/*                                                    _BGP_PERS_ENABLE_TorusMeshY  | */
+/*                                                    _BGP_PERS_ENABLE_TorusMeshZ  | */
+
+
+
+/*  Personality.L1Config: Controls and Settings for L1 Cache */
+#define _BGP_PERS_L1CONFIG_L1I          _BN( 0)     /*  L1 Enabled for Instructions */
+#define _BGP_PERS_L1CONFIG_L1D          _BN( 1)     /*  L1 Enabled for Data */
+#define _BGP_PERS_L1CONFIG_L1SWOA       _BN( 2)     /*  L1 Store WithOut Allocate */
+#define _BGP_PERS_L1CONFIG_L1Recovery   _BN( 3)     /*  L1 Full Recovery Mode */
+#define _BGP_PERS_L1CONFIG_L1WriteThru  _BN( 4)     /*  L1 Write-Thru (not svc_host changeable (yet?)) */
+#define _BGP_PERS_L1CONFIG_DO_L1ITrans  _BN( 5)     /*  Enable L1 Instructions Transient? */
+#define _BGP_PERS_L1CONFIG_DO_L1DTrans  _BN( 6)     /*  Enable L1 Data         Transient? */
+                                                    /*  unused 9bits: 7..15 */
+#define _BGP_PERS_L1CONFIG_L1ITrans(x)  _B8(23,x)   /*  L1 Transient for Instructions in Groups of 16 Lines */
+#define _BGP_PERS_L1CONFIG_L1DTrans(x)  _B8(31,x)   /*  L1 Transient for Data         in Groups of 16 Lines */
+
+#define _BGP_PERS_L1CONFIG_DEFAULT (_BGP_PERS_L1CONFIG_L1I         |\
+                                    _BGP_PERS_L1CONFIG_L1D         |\
+                                    _BGP_PERS_L1CONFIG_L1SWOA      |\
+				    _BGP_PERS_L1CONFIG_L1Recovery  |\
+                                    _BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union T_BGP_Pers_L1Cfg
+               {
+               uint32_t l1cfg;
+               struct {
+                      unsigned l1i         :  1;
+                      unsigned l1d         :  1;
+                      unsigned l1swoa      :  1;
+                      unsigned l1recovery  :  1;
+                      unsigned l1writethru :  1;
+                      unsigned do_l1itrans :  1;
+                      unsigned do_l1dtrans :  1;
+                      unsigned l1rsvd      :  9;
+                      unsigned l1itrans    :  8;
+                      unsigned l1dtrans    :  8;
+                      };
+               }
+               _BGP_Pers_L1Cfg;
+
+/*  Personality.L2Config: Controls and Settings for L2 and Snoop */
+#define _BGP_PERS_L2CONFIG_L2I                _BN( 0)   /*  L2 Instruction Caching Enabled */
+#define _BGP_PERS_L2CONFIG_L2D                _BN( 1)   /*  L2 Data        Caching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PF               _BN( 2)   /*  L2 Automatic Prefetching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PFO              _BN( 3)   /*  L2 Optimistic Prefetching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PFA              _BN( 4)   /*  L2 Aggressive Prefetching Enabled (fewer deeper streams) */
+#define _BGP_PERS_L2CONFIG_L2PFS              _BN( 5)   /*  L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers) */
+#define _BGP_PERS_L2CONFIG_Snoop              _BN( 6)   /*  Just NULL Snoop Filter */
+#define _BGP_PERS_L2CONFIG_SnoopCache         _BN( 7)   /*  Snoop Caches */
+#define _BGP_PERS_L2CONFIG_SnoopStream        _BN( 8)   /*  Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata) */
+#define _BGP_PERS_L2CONFIG_SnoopRange         _BN( 9)   /*  Snoop Range Filter when possible */
+#define _BGP_PERS_L2CONFIG_BUG824LUMPY        _BN(10)   /*  BPC_BUGS 824: Fix with Lumpy Performance */
+#define _BGP_PERS_L2CONFIG_BUG824SMOOTH       _BN(11)   /*  BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory */
+#define _BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12)   /*  Special for Snoop diagnostics. See bgp_vmm.c */
+                                               /*  additional bits may be used for Snoop setting tweaks */
+
+/*  Default L2 Configuration: */
+/*    L2 Enabled with Multi-Stream Aggressive Prefetching */
+/*    Snoop Enabled with all filters except Range */
+#define _BGP_PERS_L2CONFIG_DEFAULT   (_BGP_PERS_L2CONFIG_L2I        |\
+                                      _BGP_PERS_L2CONFIG_L2D        |\
+                                      _BGP_PERS_L2CONFIG_L2PF       |\
+                                      _BGP_PERS_L2CONFIG_L2PFO      |\
+                                      _BGP_PERS_L2CONFIG_L2PFS      |\
+                                      _BGP_PERS_L2CONFIG_Snoop      |\
+                                      _BGP_PERS_L2CONFIG_SnoopCache |\
+                                      _BGP_PERS_L2CONFIG_SnoopStream)
+
+/*  Personality.L3Config: Controls and Settings for L3 */
+/*    Note: Most bits match _BGP_L3x_CTRL DCRs. */
+/*          See arch/include/bpcore/bgl_l3_dcr.h */
+#define _BGP_PERS_L3CONFIG_L3I        _BN( 0)     /*  L3 Enabled for Instructions */
+#define _BGP_PERS_L3CONFIG_L3D        _BN( 1)     /*  L3 Enabled for Data */
+#define _BGP_PERS_L3CONFIG_L3PFI      _BN( 2)     /*  Inhibit L3 Prefetch from DDR */
+#define _BGP_PERS_L3CONFIG_DO_Scratch _BN( 3)     /*  Set up Scratch? */
+#define _BGP_PERS_L3CONFIG_DO_PFD0    _BN( 4)     /*  Adjust PFD0? */
+#define _BGP_PERS_L3CONFIG_DO_PFD1    _BN( 5)     /*  Adjust PFD1? */
+#define _BGP_PERS_L3CONFIG_DO_PFDMA   _BN( 6)     /*  Adjust PFDMA? */
+#define _BGP_PERS_L3CONFIG_DO_PFQD    _BN( 7)     /*  Adjust PFQD? */
+                                       /*  8..15 unused/available */
+#define _BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x)   /*  Scratch 8ths: 0..8 */
+#define _BGP_PERS_L3CONFIG_PFD0(x)    _B3(22,x)   /*  Prefetch Depth for DP0 */
+#define _BGP_PERS_L3CONFIG_PFD1(x)    _B3(25,x)   /*  Prefetch Depth for DP1 */
+#define _BGP_PERS_L3CONFIG_PFDMA(x)   _B3(28,x)   /*  Prefetch Depth for DMA */
+#define _BGP_PERS_L3CONFIG_PFQD(x)    _B3(31,x)   /*  Prefetch Queue Depth */
+
+/*  General L3 Configuration */
+typedef union T_BGP_Pers_L3Cfg
+               {
+               uint32_t l3cfg;
+               struct {
+                      unsigned l3i        :  1;
+                      unsigned l3d        :  1;
+                      unsigned l3pfi      :  1;
+                      unsigned do_scratch :  1;
+                      unsigned do_pfd0    :  1;
+                      unsigned do_pfd1    :  1;
+                      unsigned do_pfdma   :  1;
+                      unsigned do_pfqd    :  1;
+                      unsigned rsvd       :  8;
+                      unsigned scratch    :  4;
+                      unsigned pfd0       :  3;
+                      unsigned pfd1       :  3;
+                      unsigned pfdma      :  3;
+                      unsigned pfqd       :  3;
+                      };
+               }
+               _BGP_Pers_L3Cfg;
+
+/*  Default L3 Configuration: */
+/*    L3 Enabled for Instructions and Data */
+/*    No Prefetch Depth overrides, No Scratch, No Scrambling. */
+#define _BGP_PERS_L3CONFIG_DEFAULT    (_BGP_PERS_L3CONFIG_L3I |\
+                                       _BGP_PERS_L3CONFIG_L3D |\
+				       _BGP_PERS_L3CONFIG_DO_PFDMA |\
+                                       _BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+/*  L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users) */
+#define _BGP_PERS_L3SELECT_DO_CacheSel _BN( 0)    /*  Adjust Cache Select setting? */
+#define _BGP_PERS_L3SELECT_DO_BankSel  _BN( 1)    /*  Adjust Bank  Select setting? */
+#define _BGP_PERS_L3SELECT_Scramble    _BN( 2)    /*  L3 Scramble */
+#define _BGP_PERS_L3SELECT_PFby2       _BN( 3)    /*  Prefetch by 2 if set, else by 1 (default) if clear. */
+#define _BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x)  /*  PhysAddr Bit for L3 Selection (0..26) */
+#define _BGP_PERS_L3SELECT_BankSel(x)  _B5(13,x)  /*  PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel. */
+
+typedef union T_BGP_Pers_L3Select
+               {
+               uint32_t l3select;
+               struct {
+                      unsigned do_CacheSel :  1;
+                      unsigned do_BankSel  :  1;
+                      unsigned l3Scramble  :  1;
+                      unsigned l3_PF_by2   :  1;  /*  default is PreFetch by 1. */
+                      unsigned CacheSel    :  5;  /*  Physical Address Bit for L3 Selection (0..26) */
+                      unsigned BankSel     :  5;  /*  0..26 Must be strictly greater than CacheSel. */
+                      unsigned rsvd        : 18;
+                      };
+               }
+               _BGP_Pers_L3Select;
+
+/*  Default L3 Selection Configuration: Disable overrides, but set h/w default values. */
+#define _BGP_PERS_L3SELECT_DEFAULT  (_BGP_PERS_L3SELECT_CacheSel(21) |\
+                                     _BGP_PERS_L3SELECT_BankSel(26))
+
+/*  Tracing Masks and default trace configuration */
+/*    See also arch/include/cnk/Trace.h */
+#define _BGP_TRACE_CONFIG    _BN( 0)    /*  Display Encoded personality config on startup */
+#define _BGP_TRACE_ENTRY     _BN( 1)    /*  Function enter and exit */
+#define _BGP_TRACE_INTS      _BN( 2)    /*  Standard Interrupt Dispatch */
+#define _BGP_TRACE_CINTS     _BN( 3)    /*  Critical Interrupt Dispatch */
+#define _BGP_TRACE_MCHK      _BN( 4)    /*  Machine Check Dispatch */
+#define _BGP_TRACE_SYSCALL   _BN( 5)    /*  System Calls */
+#define _BGP_TRACE_VMM       _BN( 6)    /*  Virtual Memory Manager */
+#define _BGP_TRACE_DEBUG     _BN( 7)    /*  Debug Events (app crashes etc) */
+#define _BGP_TRACE_TORUS     _BN( 8)    /*  Torus Init */
+#define _BGP_TRACE_TREE      _BN( 9)    /*  Tree  Init */
+#define _BGP_TRACE_GLOBINT   _BN(10)    /*  Global Interrupts */
+#define _BGP_TRACE_DMA       _BN(11)    /*  DMA Setup */
+#define _BGP_TRACE_SERDES    _BN(12)    /*  SerDes Init */
+#define _BGP_TRACE_TESTINT   _BN(13)    /*  Test Interface, ECID, Config */
+#define _BGP_TRACE_ETHTX     _BN(14)    /*  Ethernet Transmit */
+#define _BGP_TRACE_ETHRX     _BN(15)    /*  Ethernet Receive */
+#define _BGP_TRACE_POWER     _BN(16)    /*  Power Control */
+#define _BGP_TRACE_PROCESS   _BN(17)    /*  Process/Thread Mapping */
+#define _BGP_TRACE_EXIT_SUM  _BN(18)    /*  Report Per-Core Interrupt and Error Summary on exit() */
+#define _BGP_TRACE_SCHED     _BN(19)    /*  Report Scheduler Information */
+#define _BGP_TRACE_RAS       _BN(20)    /*  Report RAS Events (in addition to sending to Host) */
+#define _BGP_TRACE_ECID      _BN(21)    /*  Report UCI and ECID on boot */
+#define _BGP_TRACE_FUTEX     _BN(22)    /*  Trace Futex operations */
+#define _BGP_TRACE_MemAlloc  _BN(23)    /*  Trace MMAP and Shared Memory operations */
+#define _BGP_TRACE_CONTROL   _BN(24)    /*  Trace control messages exchanged with I/O node */
+#define _BGP_TRACE_MSGS      _BN(25)    /*  Trace messages and packets sent on virtual channel 0 */
+#define _BGP_TRACE_DEBUGGER  _BN(26)    /*  Trace debugger messages exchanged with I/O node */
+#define _BGP_TRACE_WARNINGS  _BN(30)    /*  Trace Warnings */
+#define _BGP_TRACE_VERBOSE   _BN(31)    /*  Verbose Tracing Modifier */
+
+/*  Enable tracking of Regression Suite coverage and report UCI+ECID on boot */
+#define _BGP_PERS_TRACE_DEFAULT 0
+/* (_BGP_TRACE_CONFIG | _BGP_TRACE_ECID) */
+
+
+typedef struct _BGP_Personality_Kernel_t
+                {
+                uint32_t  UniversalComponentIdentifier;  /*  see include/common/bgp_ras.h */
+
+                uint32_t  FreqMHz;                       /*  Clock_X1 Frequency in MegaHertz (eg 1000) */
+
+                uint32_t  RASPolicy;                     /*  Verbosity level, and other RAS Reporting Controls */
+
+                 /*  Process Config: */
+                 /*    Each byte represents a process (1 to 4 processes supported) */
+                 /*      No core can be assigned to more than 1 process. */
+                 /*      Cores assigned to no process are disabled. */
+                 /*      Cores with in a process share the same address space. */
+                 /*      Separate processes have distinct address spaces. */
+                 /*    Within each process (0 to 4 cores assigned to a process): */
+                 /*      Lower nibble is bitmask of which core belongs to that process. */
+                 /*      Upper nibble is bitmask whether that thread is privileged or user. */
+                 /*      Processes with zero cores do not exist. */
+                 /*    E.g., for Diagnostics, we sometimes use 0xFF000000, which means */
+                 /*      that all 4 cores run privileged in process 0. */
+                uint32_t  ProcessConfig;
+
+                uint32_t  TraceConfig;         /*  Kernel Tracing Enables */
+                uint32_t  NodeConfig;          /*  Kernel Driver Enables */
+                uint32_t  L1Config;            /*  L1 Config and setup controls */
+                uint32_t  L2Config;            /*  L2 and Snoop Config and setup controls */
+                uint32_t  L3Config;            /*  L3 Config and setup controls */
+                uint32_t  L3Select;            /*  L3 Cache and Bank Selection controls */
+
+                uint32_t  SharedMemMB;         /*  Memory to Reserve for Sharing among Processes */
+
+                uint32_t  ClockStop0;         /*  Upper 11Bits of ClockStop, enabled if Non-zero */
+                uint32_t  ClockStop1;         /*  Lower 32Bits of ClockStop, enabled if Non-zero */
+                }
+                _BGP_Personality_Kernel_t;
+
+
+/*  Defaults for DDR Config */
+#define _BGP_PERS_DDR_PBX0_DEFAULT             (0x411D1512)     /*  PBX DCRs setting (in IBM bit numbering) */
+#define _BGP_PERS_DDR_PBX1_DEFAULT             (0x40000000)     /*  PBX DCRs setting (in IBM bit numbering) */
+#define _BGP_PERS_DDR_MemConfig0_DEFAULT       (0x81fc4080)     /*  MemConfig */
+#define _BGP_PERS_DDR_MemConfig1_DEFAULT       (0x0C0ff800)     /*  MemConfig */
+#define _BGP_PERS_DDR_ParmCtl0_DEFAULT         (0x3216c008)     /*  Parm Control */
+#define _BGP_PERS_DDR_ParmCtl1_DEFAULT         (0x4168c323)     /*  Parm Control */
+#define _BGP_PERS_DDR_MiscCtl0_DEFAULT         (0)     /*  Misc. Control */
+#define _BGP_PERS_DDR_MiscCtl1_DEFAULT         (0)     /*  Misc. Control */
+#define _BGP_PERS_DDR_CmdBufMode0_DEFAULT      (0x00400fdf)     /*  Command Buffer Mode */
+#define _BGP_PERS_DDR_CmdBufMode1_DEFAULT      (0xffc80600)     /*  Command Buffer Mode */
+#define _BGP_PERS_DDR_RefrInterval0_DEFAULT    (0xD1000002)     /*  Refresh Interval */
+#define _BGP_PERS_DDR_RefrInterval1_DEFAULT    (0x04000000)     /*  Refresh Interval */
+#define _BGP_PERS_DDR_ODTCtl0_DEFAULT          (0)     /*  ODT Control */
+#define _BGP_PERS_DDR_ODTCtl1_DEFAULT          (0)     /*  ODT Control */
+#define _BGP_PERS_DDR_TimingTweaks_DEFAULT     (0)     /*  DRAM timing tweaks to use */
+#define _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805)     /*  Data Strobe Calibration */
+#define _BGP_PERS_DDR_DQSCtl_DEFAULT           (0x00000168)     /*  DQS Control */
+#define _BGP_PERS_DDR_Throttle_DEFAULT         (0)     /*  DDR Throttle */
+
+#define _BGP_PERS_DDR_CAS_DEFAULT              (4)     /*  CAS Latency (3, 4, or 5) */
+#define _BGP_PERS_DDR_DDRSizeMB_DEFAULT        (2048)  /*  Total DDR size in MegaBytes (512MB - 16384MB). */
+#define _BGP_PERS_DDR_Chips_DEFAULT            (0x01)  /*  Type of DDR chips: 512GBx8 */
+
+#define _BGP_PERS_DDRFLAGS_ENABLE_Scrub        _BN(0)  /*  Enable DDR Slow Scrub when 1 */
+
+/*  DDRFLAGS default: Enable Slow Scrub. */
+#define _BGP_PERS_DDRFLAGS_DEFAULT             (_BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define _BGP_PERS_SRBS0_DEFAULT                (0xFFFFFFFF)
+#define _BGP_PERS_SRBS1_DEFAULT                (0xFFFFFFFF)
+
+typedef struct _BGP_Personality_DDR_t
+                {
+                uint32_t  DDRFlags;          /*  Misc. Flags and Settings */
+                uint32_t  SRBS0;             /*  Controller 0 SRBS/CK Settings */
+                uint32_t  SRBS1;             /*  Controller 1 SRBS/CK Settings */
+                uint32_t  PBX0;              /*  PBX DCRs setting (in IBM bit numbering) */
+                uint32_t  PBX1;              /*  PBX DCRs setting (in IBM bit numbering) */
+                uint32_t  MemConfig0;        /*  MemConfig */
+                uint32_t  MemConfig1;        /*  MemConfig */
+                uint32_t  ParmCtl0;          /*  Parm Control */
+                uint32_t  ParmCtl1;          /*  Parm Control */
+                uint32_t  MiscCtl0;          /*  Misc. Control */
+                uint32_t  MiscCtl1;          /*  Misc. Control */
+                uint32_t  CmdBufMode0;       /*  Command Buffer Mode */
+                uint32_t  CmdBufMode1;       /*  Command Buffer Mode */
+                uint32_t  RefrInterval0;     /*  Refresh Interval */
+                uint32_t  RefrInterval1;     /*  Refresh Interval */
+                uint32_t  ODTCtl0;           /*  ODT Control */
+                uint32_t  ODTCtl1;           /*  ODT Control */
+                uint8_t   TimingTweaks;      /*  DRAM timing tweak type */
+                uint8_t   Unused0;
+                uint8_t   Unused1;
+                uint8_t   Unused2;
+                uint32_t  DataStrobeCalib1;  /*  Data Strobe Calibration */
+                uint32_t  DQSCtl;            /*  DQS Control */
+                uint32_t  Throttle;          /*  DDR Throttle */
+                uint16_t  DDRSizeMB;         /*  Total DDR size in MegaBytes (512MB - 16384MB). */
+                uint8_t   Chips;             /*  Type of DDR chips */
+                uint8_t   CAS;               /*  CAS Latency (3, 4, or 5) */
+                }
+                _BGP_Personality_DDR_t;
+
+
+typedef struct _BGP_Personality_Networks_t
+                {
+                uint32_t  BlockID;          /*  a.k.a. PartitionID */
+
+                uint8_t   Xnodes,
+                          Ynodes,
+                          Znodes,
+                          Xcoord,
+                          Ycoord,
+                          Zcoord;
+
+                 /*  PSet Support */
+                uint16_t  PSetNum;
+                uint32_t  PSetSize;
+                uint32_t  RankInPSet;
+
+                uint32_t  IOnodes;
+                uint32_t  Rank;                /*  Rank in Block (or Partition) */
+                uint32_t  IOnodeRank;          /*  Rank (and therefore P2P Addr) of my I/O Node */
+                uint16_t  TreeRoutes[ 16 ];
+                }
+                _BGP_Personality_Networks_t;
+
+
+typedef struct _BGP_IP_Addr_t
+                {
+                 /*  IPv6 Addresses are 16 bytes, where the */
+                 /*   lower 4 (indices 12-15) can be used for IPv4 address. */
+                uint8_t octet[ 16 ];
+                }
+                _BGP_IP_Addr_t;
+
+
+typedef struct _BGP_Personality_Ethernet_t
+                {
+                uint16_t       MTU;             /*  Initial emac MTU size */
+                uint8_t        EmacID[6];       /*  MAC address for emac */
+                _BGP_IP_Addr_t IPAddress;       /*  IPv6/IPv4 address of this node */
+                _BGP_IP_Addr_t IPNetmask;       /*  IPv6/IPv4 netmask */
+                _BGP_IP_Addr_t IPBroadcast;     /*  IPv6/IPv4 broadcast address */
+                _BGP_IP_Addr_t IPGateway;       /*  IPv6/IPv4 initial gateway (zero if none) */
+                _BGP_IP_Addr_t NFSServer;       /*  IPv6/IPv4 NFS system software server address */
+                _BGP_IP_Addr_t serviceNode;     /*  IPv6/IPv4 address of service node */
+
+                 /*  NFS mount info */
+                char      NFSExportDir[_BGP_PERSONALITY_LEN_NFSDIR];
+                char      NFSMountDir[ _BGP_PERSONALITY_LEN_NFSDIR];
+
+                 /*  Security Key for Service Node authentication */
+                uint8_t   SecurityKey[ _BGP_PERSONALITY_LEN_SECKEY ];
+                }
+                _BGP_Personality_Ethernet_t;
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective        _BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus             _BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC      _BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x)           _B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x)           _B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full            0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly         1
+#define BGP_PERS_BLKCFG_CIOMode_None            2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3             0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4             1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+                                 BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+
+typedef struct T_BGP_Personality_t
+                {
+                uint16_t  CRC;
+                uint8_t   Version;
+                uint8_t   PersonalitySizeWords;
+
+                _BGP_Personality_Kernel_t   Kernel_Config;
+
+                _BGP_Personality_DDR_t      DDR_Config;
+
+                _BGP_Personality_Networks_t Network_Config;
+
+                _BGP_Personality_Ethernet_t Ethernet_Config;
+
+		uint8_t Block_Config;
+                uint8_t padd[7];  /*  Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr) */
+                                   /*  to simplify jtag operations. See issue #140. */
+                }
+                _BGP_Personality_t;
+
+#define Network_Config_treeInfo0 DDR_Config.ODTCtl0
+#define Network_Config_treeInfo1 DDR_Config.ODTCtl1
+#define Network_Config_treeInfo2 DDR_Config.CmdBufMode0
+
+/* _BGP_PersonalityTreeInfo provides information about one of the tree
+ * ports (A,B or C) on this node.  It is a 32-bit value.
+ * See accessor methods below which interpret these fields with this layout:
+ *
+ *  .-.-.--.--.--.------------------------.
+ *  |V|R|LT|CW|DP|   destP2Paddr          |
+ *  `-'-'--'--'--'------------------------'
+ *   1 1 2  2  2  24	<- bits in field
+ *
+ * V	Valid bit.  Use is deprecated.  Was used for forward compatibility
+ * R	Wire is redundant
+ * LT	Link type (2 bit).  0->no wire, 1->compute node, 2->I/O node, 3->reserved
+ * CW   CommWorld wire interpret (2 bit): 0->unused wire, 1->child, 2->parent
+ * DP	Destination Port on other end of wire (2 bit) 0,1,2 -> A,B,C
+ * destP2Paddr (24 bit) Tree address of node on other end of the wire
+ */
+
+#define _BGP_PERS_TREEINFO_VALID             0x80000000
+#define _BGP_PERS_TREEINFO_REDUNDANT         0x40000000
+#define _BGP_PERS_TREEINFO_LINKTYPE_MASK     0x30000000
+#define _BGP_PERS_TREEINFO_LINKTYPE_SHIFT    28
+#define _BGP_PERS_TREEINFO_COMMWORLD_MASK    0x0c000000
+#define _BGP_PERS_TREEINFO_COMMWORLD_SHIFT   26
+#define _BGP_PERS_TREEINFO_DESTPORT_MASK     0x03000000
+#define _BGP_PERS_TREEINFO_DESTPORT_SHIFT    24
+#define _BGP_PERS_TREEINFO_DESTP2P_MASK      0x00ffffff
+
+#define _BGP_PERS_TREEINFO_LINKTYPE_NOWIRE       0
+#define _BGP_PERS_TREEINFO_LINKTYPE_COMPUTE      1
+#define _BGP_PERS_TREEINFO_LINKTYPE_IO           2
+
+#define _BGP_PERS_TREEINFO_COMMWORLD_UNUSED      0 /* unused wire */
+#define _BGP_PERS_TREEINFO_COMMWORLD_CHILD       1
+#define _BGP_PERS_TREEINFO_COMMWORLD_PARENT      2
+
+#define _BGP_PERS_TREE_PORT_0    0
+#define _BGP_PERS_TREE_PORT_1    1
+#define _BGP_PERS_TREE_PORT_2    2
+
+/* This struct is the layout on big endian architectures (ppc) */
+typedef struct {
+    unsigned valid:1;  /* 1 -> this info is valid */
+    unsigned redundant:1; /* 1 -> redundant wire */
+    unsigned linkType:2; /* 0 -> no wire, 1 -> compute node, 2 -> I/O */
+    unsigned commWorld:2; /* 1 -> child port, 2 -> parent port on comm_world tree */
+    unsigned destPort:2; /* dest port 0,1,2 -> A,B,C */
+    unsigned destP2Paddr:24; /* destination tree addr on this port */
+} _BGP_PersonalityTreeInfo_t;
+
+
+
+/*  Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION) */
+/*   This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */
+#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+           0,                                              /* CRC                  */ \
+           _BGP_PERSONALITY_VERSION,                       /* Version              */ \
+           (sizeof(_BGP_Personality_t)/sizeof(uint32_t)),  /* PersonalitySizeWords */ \
+           {  /* _BGP_Personality_Kernel_t: */ \
+              0,                                   /* MachineLocation        */ \
+              _BGP_DEFAULT_FREQ,                   /* FreqMHz       */ \
+              _BGP_PERS_RASPOLICY_DEFAULT,         /* RASPolicy     */ \
+              _BGP_PERS_PROCESSCONFIG_DEFAULT,     /* ProcessConfig */ \
+              _BGP_PERS_TRACE_DEFAULT,             /* TraceConfig   */ \
+              _BGP_PERS_NODECONFIG_DEFAULT,        /* NodeConfig    */ \
+              _BGP_PERS_L1CONFIG_DEFAULT,          /* L1Config      */ \
+              _BGP_PERS_L2CONFIG_DEFAULT,          /* L2Config      */ \
+              _BGP_PERS_L3CONFIG_DEFAULT,          /* L3Config      */ \
+              _BGP_PERS_L3SELECT_DEFAULT,          /* L3Select      */ \
+              0,                                   /* SharedMemMB   */ \
+              0,                                   /* ClockStop0    */ \
+              0                                    /* ClockStop1    */ \
+              }, \
+           {  /* _BGP_Personality_DDR_t: */ \
+              _BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              _BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              _BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              _BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              _BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              _BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              _BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              _BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              _BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              _BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              _BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              _BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              _BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              _BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              _BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              _BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              _BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              _BGP_PERS_DDR_TimingTweaks_DEFAULT,     /* TimingTweaks     */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused0          */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused1          */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused2          */ \
+              _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              _BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              _BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              _BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              _BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              _BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* _BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* _BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	     0,                                    /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+/*  Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE) */
+/*   This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */
+#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+           0,                                             /* CRC                  */ \
+           _BGP_PERSONALITY_VERSION,                      /* Version              */ \
+           (sizeof(_BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+           {  /* _BGP_Personality_Kernel_t: */ \
+              0,                                          /* MachineLocation      */ \
+              _BGP_DEFAULT_FREQ,                          /* FreqMHz       */ \
+              _BGP_PERS_RASPOLICY_DEFAULT,                /* RASPolicy     */ \
+              _BGP_PERS_PROCESSCONFIG_SMP,                /* ProcessConfig */ \
+              _BGP_PERS_TRACE_DEFAULT,                    /* TraceConfig   */ \
+              _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE,  /* NodeConfig    */ \
+              _BGP_PERS_L1CONFIG_DEFAULT,                 /* L1Config      */ \
+              _BGP_PERS_L2CONFIG_DEFAULT,                 /* L2Config      */ \
+              _BGP_PERS_L3CONFIG_DEFAULT,                 /* L3Config      */ \
+              _BGP_PERS_L3SELECT_DEFAULT,                 /* L3Select      */ \
+              0,                                          /* SharedMemMB   */ \
+              0,                                          /* ClockStop0    */ \
+              0                                           /* ClockStop1    */ \
+              }, \
+           {  /* _BGP_Personality_DDR_t: */ \
+              _BGP_PERS_DDRFLAGS_DEFAULT,             /* DDRFlags         */ \
+              _BGP_PERS_SRBS0_DEFAULT,                /* SRBS0            */ \
+              _BGP_PERS_SRBS1_DEFAULT,                /* SRBS1            */ \
+              _BGP_PERS_DDR_PBX0_DEFAULT,             /* PBX0             */ \
+              _BGP_PERS_DDR_PBX1_DEFAULT,             /* PBX1             */ \
+              _BGP_PERS_DDR_MemConfig0_DEFAULT,       /* MemConfig0       */ \
+              _BGP_PERS_DDR_MemConfig1_DEFAULT,       /* MemConfig1       */ \
+              _BGP_PERS_DDR_ParmCtl0_DEFAULT,         /* ParmCtl0         */ \
+              _BGP_PERS_DDR_ParmCtl1_DEFAULT,         /* ParmCtl1         */ \
+              _BGP_PERS_DDR_MiscCtl0_DEFAULT,         /* MiscCtl0         */ \
+              _BGP_PERS_DDR_MiscCtl1_DEFAULT,         /* MiscCtl1         */ \
+              _BGP_PERS_DDR_CmdBufMode0_DEFAULT,      /* CmdBufMode0      */ \
+              _BGP_PERS_DDR_CmdBufMode1_DEFAULT,      /* CmdBufMode1      */ \
+              _BGP_PERS_DDR_RefrInterval0_DEFAULT,    /* RefrInterval0    */ \
+              _BGP_PERS_DDR_RefrInterval1_DEFAULT,    /* RefrInterval1    */ \
+              _BGP_PERS_DDR_ODTCtl0_DEFAULT,          /* ODTCtl0          */ \
+              _BGP_PERS_DDR_ODTCtl1_DEFAULT,          /* ODTCtl1          */ \
+              _BGP_PERS_DDR_TimingTweaks_DEFAULT,     /* TimingTweaks     */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused0          */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused1          */ \
+              _BGP_PERS_Unused_DEFAULT,               /* Unused2          */ \
+              _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+              _BGP_PERS_DDR_DQSCtl_DEFAULT,           /* DQSCtl           */ \
+              _BGP_PERS_DDR_Throttle_DEFAULT,         /* Throttle         */ \
+              _BGP_PERS_DDR_DDRSizeMB_DEFAULT,        /* DDRSizeMB        */ \
+              _BGP_PERS_DDR_Chips_DEFAULT,            /* Chips            */ \
+              _BGP_PERS_DDR_CAS_DEFAULT               /* CAS              */ \
+              }, \
+           {  /* _BGP_Personality_Networks_t: */ \
+              0,                                   /* BlockID                */ \
+              1, 1, 1,                             /* Xnodes, Ynodes, Znodes */ \
+              0, 0, 0,                             /* Xcoord, Ycoord, Zcoord */ \
+              0,                                   /* PSetNum                */ \
+              0,                                   /* PSetSize               */ \
+              0,                                   /* RankInPSet             */ \
+              0,                                   /* IOnodes                */ \
+              0,                                   /* Rank                   */ \
+              0,                                   /* IOnodeRank             */ \
+              { 0, }                               /* TreeRoutes[ 16 ]       */ \
+              }, \
+           {  /* _BGP_Personality_Ethernet_t: */ \
+              1536,                                /* mtu              */ \
+              { 0, },                              /* EmacID[6]        */ \
+              { { 0x00,0x00,0x00,0x00,             /* IPAddress        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPNetmask        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0xFF,0xFF,0xFF,0x70  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPBroadcast      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* IPGateway        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* NFSServer        */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              { { 0x00,0x00,0x00,0x00,             /* serviceNode      */ \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00, \
+                  0x00,0x00,0x00,0x00  \
+                  } }, \
+              "",                                  /* NFSExportDir[32] */ \
+              "",                                  /* NFSMountDir[32]  */ \
+              { 0x00, }                            /* SecurityKey[32]  */ \
+              }, \
+	      0, 				   /* Block_Config */ \
+           { 0, }                                  /* padd[7]          */ \
+           }
+
+
+__END_DECLS
+
+
+
+#endif  /*  Add nothing below this line. */
diff --git a/arch/powerpc/include/common/namespace.h b/arch/powerpc/include/common/namespace.h
new file mode 100644
index 0000000..a5ee88e
--- /dev/null
+++ b/arch/powerpc/include/common/namespace.h
@@ -0,0 +1,47 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/namespace.h
+ */
+
+#ifndef	_NAMESPACE_H_  /*  Prevent multiple inclusion */
+#define	_NAMESPACE_H_
+
+
+
+
+#if !defined(__ASSEMBLY__) && defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __C_LINKAGE extern "C"
+#else
+#define __BEGIN_DECLS
+#define __C_LINKAGE
+#endif
+
+
+#if !defined(__ASSEMBLY__) && defined(__cplusplus)
+#define __END_DECLS }
+#else
+#define __END_DECLS
+#endif
+
+
+
+
+#endif  /*  Add nothing below this line */
diff --git a/arch/powerpc/include/spi/DMA_Assert.h b/arch/powerpc/include/spi/DMA_Assert.h
new file mode 100644
index 0000000..5f21b64
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Assert.h
@@ -0,0 +1,276 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef __DMA_ASSERT_H_ /* Prevent multiple inclusion */
+#define __DMA_ASSERT_H_
+
+#ifndef __LINUX_KERNEL__
+
+/*!
+ * \file spi/DMA_Assert.h
+ *
+ * \brief DMA SPI Assert Macros
+ *
+ * Two sets of assert macros are provided:
+ * - Kernel Asserts
+ * - User-mode Asserts
+ *
+ * When DMA SPIs are used within the kernel, a special assert routine is called
+ * that does NOT abort.  It just prints the assertion and the location and
+ * continues.
+ *
+ * When DMA SPIs are used within user-mode code, the normal assert routine is
+ * called, which prints the assertion and location and aborts.
+ *
+ * Several levels of asserts are provided, and #define variables control which
+ * levels are activated.  The following assert macros are available:
+ *
+ * SPI_abort  - Always active and always issues assert(0).
+ *              Primarily used for unimplemented code paths.
+ *              Not available in the kernel.
+ * SPI_assert - Active by default, or when ASSERT_PROD is defined.
+ *              Meant to flag user errors.
+ * SPI_assert_debug - Active by default.  Meant to flag coding
+ *                    errors before shipping.
+ *
+ * The following #defines control which level of asserts are compiled into
+ * the code.  Only one of ASSERT_ABORT, ASSERT_PROD (or nothing) should
+ * be specified.
+ * - ASSERT_ABORT means that the "abort" level is the only level
+ *   of asserts that is active.  Other levels are turned off.
+ * - ASSERT_PROD means that "abort" and "assert" levels are active.
+ *   "assert_debug" is turned off.
+ * - Not specifying ASSERT_ABORT or ASSER_PROD means that all
+ *   levels of asserts ("abort", "assert", "assert_debug") are
+ *   active.
+ */
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+#include <stdio.h>
+
+/* ============================================================ */
+
+#ifdef __CNK__
+
+/*!
+ * \brief Production-level Kernel Assert.
+ *
+ * This production level of assert will be active during normal production
+ * code execution.
+ *
+ * When in the kernel, just do a printf, but don't exit.
+ */
+#define SPI_assert(x)       DMA_KernelAssert(x)
+
+/*!
+ * \brief Debug-level Kernel Assert.
+ *
+ * This debug level of assert will only be active during in-house debugging.
+ *
+ * When in the kernel, just do a printf, but don't exit.
+ */
+#define SPI_assert_debug(x) DMA_KernelAssert(x)
+
+#ifdef NDEBUG
+
+/*!
+ * \brief No Debug Kernel Assert Internal Macro
+ *
+ * This macro is used internally for when asserts are turned off via the NDEBUG
+ * flag.  It does nothing.
+ */
+#define DMA_KernelAssert( __assert_test ) ((void)0)
+
+/* ============================================================ */
+
+#else /* not NDEBUG */
+
+/*!
+ * \brief Kernel Assert Internal Function
+ *
+ * This function is called when the kernel determines that it needs to assert.
+ * It prints the assertion that failed and the code location, but does not
+ * abort.  The kernel should continue executing.
+ *
+ * \param[in]  Pointer to the assertion string that failed the test
+ * \param[in]  Pointer to the name of the source file that coded the assert
+ * \param[in]  Line number within the source file that coded the assert
+ */
+extern inline void __DMA_KernelAssert( const char *__assertion,
+				       const char *__file,
+				       int __line )
+{
+   printf("Assertion Failed: %s, file %s, line %d.\n",
+	  __assertion,
+	  __file,
+	  __line );
+}
+
+
+/*!
+ * \brief Kernel Assert Internal Macro
+ *
+ * This macro is used internally when asserts are turned on (the NDEBUG flag
+ * is not specified).  It tests the assertion.  If the assertion is true, it
+ * does nothing.  If the assertion is false, it invokes the __DMA_KernelAssert
+ * internal function to print out the assert information.
+ *
+ * \param[in]  Pointer to the assertion string that failed the test
+ * \param[in]  Pointer to the name of the source file that coded the assert
+ * \param[in]  Line number within the source file that coded the assert
+ */
+#define DMA_KernelAssert( __assert_test ) \
+                           ((__assert_test) ? ((void)0) : \
+                           __DMA_KernelAssert( #__assert_test, __FILE__, __LINE__ ))
+
+
+#endif /* NDEBUG */
+
+/* ============================================================ */
+
+#else /* not __CNK__ */
+
+#include <assert.h>
+
+#ifdef ASSERT_ABORT
+
+/*!
+ * \brief Abort-level Abort Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort()         assert(0)
+
+/*!
+ * \brief Abort-level Production Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ * This macro will not assert.  It will simply execute the assert test, but
+ * because abort-level-only asserts are active, it will not assert.
+ *
+ */
+#define SPI_assert(x)
+
+/*!
+ * \brief Abort-level Debug Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ * This macro will not assert.  It will simply execute the assert test, but
+ * because abort-level-only asserts are active, it will not assert.
+ *
+ */
+#define SPI_assert_debug(x)
+
+/* ============================================================ */
+
+#else /* Not ASSERT_ABORT */
+
+#ifdef ASSERT_PROD
+
+/*!
+ * \brief Production-level Abort Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort()         assert(0)
+
+/*!
+ * \brief Production-level Production Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert(x)       assert(x)
+
+/*!
+ * \brief Production-level Debug Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro will not assert.  It will simply execute the assert test, but
+ * because production-level-only asserts are active, it will not assert.
+ */
+#define SPI_assert_debug(x)
+
+/* ============================================================ */
+
+#else /* Not ASSERT_PROD */
+
+/*!
+ * \brief Debug-level Abort Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active.  This is the
+ * default).
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort()         assert(0)
+
+/*!
+ * \brief Debug-level Production Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active.  This is the
+ * default).
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert(x)       assert(x)
+
+/*!
+ * \brief Debug-level Debug Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active.  This is the
+ * default).
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert_debug(x) assert(x)
+
+#endif
+
+#endif
+
+#endif /* __CNK__ */
+
+
+__END_DECLS
+
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Counter.h b/arch/powerpc/include/spi/DMA_Counter.h
new file mode 100644
index 0000000..1e854cb
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Counter.h
@@ -0,0 +1,2988 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef	_DMA_COUNTER_H_ /* Prevent multiple inclusion */
+#define	_DMA_COUNTER_H_
+
+
+/*!
+ * \file spi/DMA_Counter.h
+ *
+ * \brief DMA SPI Counter Definitions and Inline Functions
+ *
+ * This include file contains inline functions that are used to interface with
+ * BG/P DMA injection and reception counters at the lowest level.
+ * Functions include
+ * - set and get a counter's value and base address
+ * - enable and disable a counter or group of counters
+ * - query whether a counter or group of counters has hit zero
+ * - clear a counter's or group of counters' hit-zero state
+ * - set and get a reception counter's maximum address
+ *
+ * Definitions:
+ * - A counter is a 32-bit value containing the number of bytes being
+ *   transferred from/to memory
+ * - Associated with a counter is a base address indicating where the data is
+ *   being transferred from/to
+ * - Associated with a reception counter is a max address bounding the DMA
+ *   transfer.
+ * - There are injection (iDMA) and reception (rDMA) counters
+ * - There are DMA_NUM_COUNTERS iDMA counters and DMA_NUM_COUNTERS rDMA
+ *   counters
+ * - A counter group consists of DMA_NUM_COUNTERS_PER_GROUP counters
+ * - There are DMA_NUM_COUNTER_GROUPS iDMA counter groups and
+ *   DMA_NUM_COUNTER_GROUPS rDMA counter groups
+ * - A subgroup consists of DMA_NUM_COUNTERS_PER_SUBGROUP counters.  This is
+ *   the unit of counter allocation.
+ * - The highest-level counter inlines in this include file work with virtual
+ *   addresses.  They are converted to physical addresses and placed into the
+ *   counter.
+ * - The counter's base and max addresses reside in the DMA memory map (DMA
+ *   SRAM).  The DMA_CounterHw_t structure, known as the hardware counter
+ *   structure maps a single counter in this storage.  They are "shadowed" by
+ *   these inline functions to a DMA_Counter_t structure in DDR memory,
+ *   known as the software counter structure, and their associated virtual
+ *   address is also stored in that structure for easy retrieval.  The
+ *   physical addresses really don't have to reside in this shadow structure,
+ *   but it is faster to access them there than from the DMA's SRAM.
+ * - The counter's base and max addresses are stored in the DMA SRAM as
+ *   16B-aligned 4-bit shifted physical addresses.  That is, the 36-bit
+ *   physical address is right shifted 4 bits, aligning it on a 16B boundary
+ *   leaving 32 bits.  The following naming conventions are used to store
+ *   addresses:
+ *   - pa_xxxx: Physical address (32-bit, 16B-aligned 4-bit shifted)
+ *   - va_xxxx: Virtual address (32 bits).
+ *
+ * \verbatim Picture of data structures:
+
+   ========DDR MEMORY===================|==========DMA SRAM MEMORY=============
+   ------------------------------       |
+   | DMA_CounterGroup_t         |       |
+   |                            |       |     --------------------------------
+   | status --------------------|-------|---->| DMA_CounterStatus_t          |
+   | counter[0..63]             |       |     --------------------------------
+   |   ------------------------ |       |
+   |   | DMA_Counter_t        | |       |     -----------------------------
+   | 0 | (software counter)   | |       |     | DMA_CounterHw_t           |
+   |   | counter_hw_ptr-------|-|-------|---->| (hardware counter)        |
+   |   ------------------------ |       |     -----------------------------
+   |             .              |       |
+   |             .              |       |
+   |             .              |       |
+   |   ------------------------ |       |
+   |   | DMA_Counter_t        | |       |     -----------------------------
+   |63 | (software counter)   | |       |     | DMA_CounterHw_t           |
+   |   | counter_hw_ptr-------|-|-------|---->| (hardware counter)        |
+   |   ------------------------ |       |     -----------------------------
+   |             .              |       |
+   ------------------------------       |
+
+   \endverbatim
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ *       mbar and msync.
+ *
+ *       MBAR is used to make sure that all writes to memory issued by the
+ *       calling core have been accepted by the memory system before
+ *       continuing.  This guarantees that writes and reads to/from different
+ *       addresses to go in defined order.
+ *
+ *       MBAR EXAMPLE 1:  When a store is done to DMA SRAM, it may not complete
+ *       for a period of time.  If a counter value is set, and then an injection
+ *       fifo tail pointer is set, DMA may see the tail pointer update and begin
+ *       the operation before the counter value has been set.  Inserting an mbar
+ *       between the setting of the counter and the setting of the tail pointer
+ *       guarantees that the counter will be set before the tail pointer is
+ *       updated.
+ *
+ *       MBAR EXAMPLE 2:  A counter hits zero.  We process the hit-zero and write
+ *       a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ *       status (different address).  The hit-zero status will still indicate
+ *       that it hit zero, even though we have already processed it, unless an
+ *       mbar is inserted between clearing the hit-zero and reading the hit-zero
+ *       status.
+ *
+ *       MBAR PHILOSOPHY:  After DMA SRAM is updated in the DMA inline functions,
+ *       they always do at least an mbar (possibly an msync instead...see below).
+ *
+ *       MSYNC does what mbar does, plus ensures consistency across cores.  That
+ *       is, it waits for snoops (invalidations of L1 cache) on the other cores
+ *       to complete before continuing.  This guarantees that all of the cores
+ *       will see a consistent view of memory after the msync.
+ *
+ *       MSYNC EXAMPLE:  When a reception counter has hit zero, we assume the
+ *       DMA'd data is available to be read by any core.  However, old copies of
+ *       that data may still be in the L1 caches.  Inserting an msync after
+ *       detecting that a counter has hit zero guarantees that the old data has
+ *       been removed from the L1 caches.
+ *
+ *       MSYNC PHILOSOPHY:  After the inline functions detect that a counter has
+ *       hit zero, they always do an msync.
+ *
+ *       SPECULATIVE EXECUTION OF MSYNC:  There are cases where msync is done
+ *       conditionally.  The CPU will begin execution of both sides of the
+ *       condition before the result of the condition has been determined.
+ *       Then, it will cancel the execution of one side once the result of the
+ *       condition has been determined.  This speculation is unwanted when
+ *       the first instruction on one side of the condition is msync because
+ *       cancelling an msync is similar to executing the complete msync.
+ *       To avoid this speculative execution of msync, we call
+ *       _bgp_msync_nonspeculative().  This will trick the CPU so it won't begin
+ *       the msync until the result of the condition is known.
+ *
+ *       CALLER ADVICE:  Users of these functions should not need to do
+ *       mbar/msync themselves, unless they are doing something like the
+ *       following:  Read a counter and operate on the result when the counter
+ *       hasn't reached zero.  The caller will need to perform an msync after
+ *       reading the counter in order to ensure that snoops have completed
+ *       on all CPUs before operating on the DMA'd data.
+ *
+ */
+
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ *   placed in the translation unit (eg. xxx.c)for use within that unit.
+ *   The function is not externalized for use by another unit...we want this
+ *   so we don't end up with multiple units exporting the same function,
+ *   which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ * \verbatim
+   #ifndef INLINE
+   # define INLINE extern inline
+   #endif
+   INLINE int max(int a, int b) {
+     return a > b ? a : b;
+   }
+   \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ * \verbatim
+   #define INLINE
+   #include "header.h"
+   \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+#ifndef __LINUX_KERNEL__
+
+#include <errno.h>
+#include <bpcore/ppc450_inlines.h>  /* For bgp_msync_nonspeculative() */
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#include <spi/DMA_Assert.h>
+#include <spi/bpcore_interface.h>   /* For _BGP_IC_DMA_NFT_G3_HIER_POS*/
+#include <spi/kernel_interface.h>   /* For Kernel_Virtual2Physical()  */
+#include <spi/linux_kernel_spi.h>
+#include <common/alignment.h>
+
+/* #include <asm/bluegene.h> */
+static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint)
+{
+        return ((group+1) << 5) | (gint & 0x1f);
+}
+
+
+/*
+ * ------------------------------------------------------------------------------
+ * Definitions
+ * ------------------------------------------------------------------------------
+ */
+
+/*!
+ * \brief Number of DMA counter groups
+ *
+ * There are 4 counter groups.
+ *
+ */
+#define DMA_NUM_COUNTER_GROUPS     4
+
+
+/*!
+ * \brief Number of DMA counters in a counter group
+ *
+ * There are 64 counters in a counter group.
+ *
+ */
+#define DMA_NUM_COUNTERS_PER_GROUP 64
+
+
+/*!
+ * \brief Number of DMA counters in a counter subgroup
+ *
+ * There are 8 counters in a counter subgroup.
+ *
+ */
+#define DMA_NUM_COUNTERS_PER_SUBGROUP 8
+
+
+/*!
+ * \brief Number of DMA counter subgroups in a group
+ *
+ * There are 8 subgroups in a counter group.
+ *
+ */
+#define DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP (DMA_NUM_COUNTERS_PER_GROUP / DMA_NUM_COUNTERS_PER_SUBGROUP)
+
+
+/*!
+ * \brief Number of DMA counter subgroups, in total, across all groups
+ *
+ * There are 32 subgroups in total.
+ *
+ */
+#define DMA_NUM_COUNTER_SUBGROUPS (DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP * DMA_NUM_COUNTER_GROUPS)
+
+
+/*!
+ * \brief Initial value for a DMA counter
+ *
+ * This value is somewhat arbitrary, but is chosen to be different from zero,
+ * because zero means the counter has hit zero, and may cause false interupts.
+ *
+ */
+#define DMA_NUM_COUNTERS ( DMA_NUM_COUNTER_GROUPS * DMA_NUM_COUNTERS_PER_GROUP)
+
+
+/*!
+ * \brief Initial value for a DMA counter
+ *
+ * This value is somewhat arbitrary, but is chosen to be different from zero,
+ * because zero means the counter has hit zero, and may cause false interupts.
+ *
+ */
+#define DMA_COUNTER_INIT_VAL 0xFFFFFFFF
+
+
+/*!
+ * \brief Max Number of Cores Per Node
+ *
+ * This is the maximum number of cores that can run on a compute node.
+ */
+#define DMA_MAX_NUM_CORES 4
+
+
+/*!
+ * \brief Returns the word number that the specified counter is in
+ *
+ * \param[in]  counter_id  The ID of the counter (0 to
+ *                         DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return The number of the word that the specified counter is in (0 or 1)
+ *
+ * Used as an index in the "enabled", "enable", "disable", "hit_zero", and
+ * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and
+ * the permissions field of the DMA_CounterGroup_t structure.
+ *
+ */
+#define DMA_COUNTER_GROUP_WORD_ID(counter_id) ((counter_id)>>5)
+
+
+/*!
+ * \brief Returns the bit within the word that the specified counter is in
+ *
+ * \param[in]  counter_id  The ID of the counter (0 to
+ *                         DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return The bit position within the word that the specified counter is
+ *         in (0-31)
+ *
+ * Used with the "enabled", "enable", "disable", "hit_zero", and
+ * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and
+ * the permissions" field of the DMA_CounterGroup_t structure.
+ *
+ */
+#define DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id) ((counter_id) & 0x0000001F)
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Structures
+ * -----------------------------------------------------------------------------
+ */
+
+/*!
+ * \brief Hardware DMA Counter
+ *
+ * This maps a DMA counter as it is in the DMA memory map (DMA SRAM).
+ *
+ */
+typedef struct DMA_CounterHw_t
+{
+  volatile unsigned  counter;   /*!< RW Value of the counter                */
+  volatile unsigned  increment; /*!< W Increment the counter by this value  */
+  volatile unsigned  pa_base;   /*!< RW Base address of the counter, 32 bit
+                                        16B-aligned 4-bit shifted address   */
+  volatile unsigned  pa_max;    /*!< RW Maximum payload address (rDMA only),
+				     16B-aligned 4-bit shifted address      */
+}
+DMA_CounterHw_t;
+
+
+/*!
+ * \brief DMA Counter Hardware Status structure
+ *
+ * This structure maps the DMA SRAM for a particular group of
+ * DMA_NUM_COUNTERS_PER_GROUP counters.
+ *
+ * This is a common structure between iDMA and rDMA.
+ *
+ * \see DMA_COUNTER_GROUP_WORD_ID
+ * \see DMA_COUNTER_GROUP_WORD_BIT_ID
+ *
+ */
+typedef struct DMA_CounterStatus_t
+{
+  volatile unsigned enabled[2];        /*!< R bitmask (1 bit/counter):
+                                              Counter is enabled (1=enabled)   */
+  volatile unsigned enable[2];         /*!< W bitmask (1 bit/counter):
+                                              Counter enable: writing a 1 to
+                                              bit i enables counter i.  This
+                                              changes the corrresponding bit
+                                              in enabled.                      */
+  volatile unsigned disable[2];        /*!< W bitmask (1 bit/counter):
+                                              Counter disble: writing a 1 to
+                                              bit i disbles counter i.  This
+                                              changes the corrresponding bit
+                                              in enabled.                      */
+  volatile unsigned reserved[2];       /*!< HOLE                               */
+  volatile unsigned hit_zero[2];       /*!< R bitmask (1 bit/counter):
+                                              Counter hit zero
+                                              (1=counter hit zero)             */
+  volatile unsigned clear_hit_zero[2]; /*!< W bitmask (1 bit/counter):
+                                              Clear counter hit zero: writing
+                                              a 1 to bit i clears the
+                                              corresponding bit in hit_zero.   */
+  volatile unsigned grp_status;        /*!< R bitmask (1 bit/subgroup):
+                                              bit i is 1 if or-reduce over
+                                              sub-group i of the hit_zero bits
+                                              anded with the enable bits.
+                                              Note this includes info about
+                                              all DMA_NUM_COUNTERS counters,
+                                              not just those in this group.    */
+}
+DMA_CounterStatus_t;
+
+
+/*!
+ * \brief Software DMA Counter Structure
+ *
+ * This structure provides a shadow (recent copy) of the hardware counter's
+ * base and max.  While accessing the actual hardware DMA counter's base and
+ * max is equivalent, it is slower than accessing them from here.
+ *
+ * Additionally, it stores the corresponding virtual addresses, for easy
+ * retrieval, since the hardware counter does not maintain the virtual
+ * address.
+ *
+ * Finally, it contains a pointer to the corresponding hardware counter in
+ * DMA SRAM.
+ *
+ */
+typedef struct DMA_Counter_t
+{
+  void         *va_base;    /*!< Shadow virtual address of the base            */
+  unsigned int  pa_base;    /*!< Shadow physical address of the base.
+                                 16B-aligned 4-bit shifted address.            */
+  void         *va_max;     /*!< Shadow virtual address of the max (rDMA only) */
+  unsigned int  pa_max;     /*!< Shadow physical address of the max (rDMA only)
+                                 16B-aligned 4-bit shifted address.            */
+  DMA_CounterHw_t *counter_hw_ptr; /*!< Pointer to the hardware counter        */
+}
+ALIGN_L1D_CACHE  DMA_Counter_t;
+/*!
+ * \todo  Re-think whether we need to align this structure on a L1 cache line boundary
+ *
+ */
+
+
+/*!
+ * \enum DMA_Type_t
+ * \brief DMA type (injection/reception) enum
+ *
+ */
+typedef enum DMA_Type_t
+{
+  DMA_Type_Injection = 0,  /*!< Injection type of DMA */
+  DMA_Type_Reception = 1   /*!< Reception type of DMA */
+
+}
+DMA_Type_t;
+
+
+/*!
+ * \brief DMA Counter Group Structure
+ *
+ * This structure defines a DMA Counter Group.  It is filled in by the kernel
+ * during the DMA_CounterGroupAllocate system call.  It points to a
+ * DMA Counter Status structure, and contains up to DMA_NUM_COUNTERS_PER_GROUP
+ * software DMA Counter structures making up this group.
+ *
+ * It also contains permission bits to use the counters, one bit per counter.
+ * When the permission bit is on, the corresponding counter belongs to this
+ * group and can be used.  Otherwise, the counter should not be used as part
+ * of this group.  These permission bits are used as follows:
+ *   1. Inline functions will ASSERT when an attempt is made
+ *      to use a counter that is not part of this group.
+ *   2. Inline functions will use the permission bits as a mask
+ *      to return status information only for the counters allocated
+ *      to this group.
+ * Use the DMA_COUNTER_GROUP_WORD_ID and DMA_COUNTER_GROUP_WORD_BIT_ID
+ * macros to locate the appropriate "permitted_counters" bit.
+ *
+ * Allocations are done in subgroups (groups of DMA_NUM_COUNTERS_PER_SUBGROUP
+ * counters).  This structure contains a bit mask of the subgroups that belong
+ * to this group.
+ *
+ * \see DMA_COUNTER_GROUP_WORD_ID
+ * \see DMA_COUNTER_GROUP_WORD_BIT_ID
+ *
+ */
+typedef struct DMA_CounterGroup_t
+{
+
+  DMA_CounterStatus_t *status_ptr;        /*!< Pointer to counter status       */
+  unsigned int permissions[2];            /*!< Bit i is 1 if permitted to use
+                                               counter i, 0 otherwise. One bit
+                                               per counter,
+                                               DMA_NUM_COUNTERS_PER_GROUP
+                                               counters.                       */
+  unsigned int grp_permissions;           /*!< Bit i is 1 if permitted to use
+                                               subgroup i, 0 otherwise. One
+                                               bit per subgroup, 8 subgroups.  */
+  unsigned int group_id;                  /*!< The id of this group (0 to
+					       DMA_NUM_COUNTER_GROUPS-1).      */
+  DMA_Type_t type;                        /*!< The type of the DMA (injection
+                                               or reception)                   */
+  DMA_Counter_t counter[DMA_NUM_COUNTERS_PER_GROUP]; /*!<
+                                               Software Counter Structures.
+                                               i-th structure's hardware
+                                               pointer is non-NULL if
+                                               permissions[i]=1, NULL if
+                                               permissions[i]=0.               */
+}
+DMA_CounterGroup_t;
+
+
+/*!
+ *
+ * \brief Counter Application Segment
+ *
+ * A segment of user-addressible memory.
+ * Each segment consists of a virtual address, physical address, and length
+ * defining a contiguous segment of storage that is accessible from the
+ * application.
+ */
+typedef struct DMA_CounterAppSegment_t
+{
+  unsigned int length;     /*!< Length in bytes of the segment                */
+  uint32_t     va_base;    /*!< Virtual address of the segment base           */
+  uint32_t     pa_base;    /*!< Shifted physical address of the segment base  */
+  uint32_t     va_max;     /*!< Virtual address of the last byte of segment   */
+} DMA_CounterAppSegment_t;
+
+
+/*!
+ *
+ * \brief Counter Application Segments
+ *
+ * An array of application segments.  There are N application segments per core
+ * on a node.  Thus there are N * (number of cores on a node) application
+ * segments in this array.  The first group of segments in the array correspond
+ * to core 0.  The second group, core 1, etc.
+ */
+extern DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray;
+
+
+/*!
+ * \brief Number of application segments for a core
+ *
+ * The number of application segments is the same for all cores.
+ */
+extern uint32_t                  DMA_CounterNumAppSegments;
+
+
+/*!
+ * \brief The index of the last application segment accessed for a core.
+ */
+extern int                       DMA_CounterCachedAppSegmentIndex[DMA_MAX_NUM_CORES];
+
+
+/*!
+ * \brief The Minimum 4-bit Shifted Physical Address Accessible From User Mode
+ */
+extern uint32_t                  DMA_CounterMinPaAccessibleFromUserMode[DMA_MAX_NUM_CORES];
+
+/*!
+ *
+ * \brief Initialize Counter Application Segments
+ *
+ * Initialize the array of application segments and the global pointer to it.
+ * This identifies the memory regions that the application can access.
+ *
+ * Also, initialize the minimum physical address accessible from user mode
+ * for each core.
+ *
+ * \retval  0            Success
+ * \retval  errorNumber  Failure
+ */
+int DMA_CounterInitAppSegments(void);
+
+
+/*!
+ *
+ * \brief Get Number of Counter Application Segments
+ *
+ * \returns  Number of application segments for a core.
+ */
+__INLINE__ uint32_t DMA_CounterGetNumAppSegments( void )
+{
+  return ( DMA_CounterNumAppSegments );
+}
+
+
+/*!
+ *
+ * \brief Get Pointer to Counter Application Segments
+ *
+ * \param[in]  Core number whose application segments pointer is to be
+ *             returned.
+ *
+ * \returns  Pointer to application segments
+ */
+__INLINE__ DMA_CounterAppSegment_t * DMA_CounterGetAppSegments( unsigned int coreNum )
+{
+  SPI_assert ( coreNum >= 0 );
+  SPI_assert ( coreNum <= DMA_MAX_NUM_CORES );
+
+  {
+  unsigned int index = coreNum * DMA_CounterGetNumAppSegments();
+  return ( & ( DMA_CounterAppSegmentArray [ index ] ) );
+  }
+}
+
+
+/*!
+ *
+ * \brief Get Virtual Addresses for the Min and Max Physical Addresses
+ *        for User Space
+ *
+ * Based on information in the DMA_CounterAppSegments array, return the
+ * virtual addresses associated with the min and max physical addresses
+ * allowed for user space.
+ *
+ * \param[out]  va_min  Pointer to a pointer.  Upon return, the pointer is
+ *                      set to the virtual address associated with the
+ *                      minimum physical address allowed for user space.
+ * \param[out]  va_max  Pointer to a pointer.  Upon return, the pointer is
+ *                      set to the virtual address associated with the
+ *                      maximum physical address allowed for user space.
+ *
+ * If the DMA_CounterNumAppSegments array has not been initialized yet
+ * (it is initialized in DMA_CounterGroupAllocate()), a value of 0 for the
+ * min and 0xFFFFFFFF max is returned.
+ */
+__INLINE__ void DMA_CounterGetMinMaxVa(void ** va_min,
+				       void ** va_max)
+{
+  /* Determine the core we are running on so the correct application
+   * segments are consulted
+   */
+  unsigned int              coreNum         = Kernel_PhysicalProcessorID();
+  DMA_CounterAppSegment_t * appSegmentArray = DMA_CounterGetAppSegments(coreNum);
+  uint32_t                  numAppSegments  = DMA_CounterGetNumAppSegments();
+
+  if ( appSegmentArray )
+  {
+    uint32_t minPaBase=0xFFFFFFFF, maxPa=0;
+    uint32_t segmentPaBase, segmentPaMax;
+    uint32_t i, minIndex=0, maxIndex=0;
+
+    for (i=0; i<numAppSegments; i++)
+    {
+      segmentPaBase = appSegmentArray[i].pa_base;
+      if ( segmentPaBase < minPaBase )
+      {
+	minPaBase = segmentPaBase;
+	minIndex  = i;
+      }
+
+      segmentPaMax = appSegmentArray[i].pa_base + (appSegmentArray[i].length >> 4);
+      if ( segmentPaMax > maxPa )
+      {
+	maxPa     = segmentPaMax;
+	maxIndex  = i;
+      }
+    }
+
+    *va_min = (void*)(appSegmentArray[minIndex].va_base);
+    *va_max = (void*)(appSegmentArray[maxIndex].va_max);
+
+/*        printf("coreNum=%d, va_min = 0x%08x, minIndex=%d, va_max = 0x%08x, maxIndex=%d, minPa=0x%08x maxPa=0x%08x\n",coreNum,(unsigned)*va_min, minIndex, (unsigned)*va_max, maxIndex, minPaBase, maxPa); */
+/*        fflush(stdout); */
+  }
+  else
+  {
+    *va_min = (void*)0;
+    *va_max = (void*)0xFFFFFFFF;
+  }
+}
+
+
+/*!
+ * \brief Convert a 32-bit virtual address to a 32-bit physical address
+ *
+ * This function is a wrapper around _bgp_Virtual2Physical(), only it combines
+ * its 36-bit output into a 32-bit physical address by right-shifting it 4 bits.
+ * Thus, the physical address returned corresponds to the input virtual address
+ * rounded down to the next lowest 16-byte boundary.
+ *
+ * \param[in]   VA     32-bit virtual address to be converted
+ * \param[in]   vsize  Size in bytes of virtual range
+ * \param[out]  pPA    Pointer to 32-bit physical address.  The output physical
+ *                     address is returned in the storage pointed to by pPA.
+ *
+ * \retval   0  Successful.  The output physical address is in *pPA
+ * \retval  -1  Invalid Virtual Address for this process.  *pPA unmodified.
+ * \retval  -2  The range from VA to (VA+vsize-1) is not physically
+ *              contiguous
+ * \retval  -3  Virtual Address is in Scratch, but no Scratch, or not enough
+ *              Scratch, is enabled.  *pPA unmodified.
+ *
+ */
+__INLINE__ int Kernel_VaTo4bitShiftedPa(void     *VA,
+					size_t    vsize,
+					uint32_t *pPA )
+{
+  int rc;
+  uint32_t ua_out, pa_out;
+
+  SPI_assert( pPA != NULL );
+
+  rc = Kernel_Virtual2Physical(VA,
+			       vsize,
+			       &ua_out,
+			       &pa_out );
+
+  if ( rc == 0 )
+    {
+      *pPA = (ua_out << 28) | (pa_out >> 4);
+    }
+
+  return (rc);
+}
+
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * The following are inline function wrappers around system calls that
+ * operate on DMA counters.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number being queried (0 to
+ *                             DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out]  num_subgroups  Pointer to an int where the number of free
+ *                             subgroups in the specified group is returned
+ * \param[out]  subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of num_subgroups subgroups is returned.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).  The
+ *                             caller must provide space for
+ *                             DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ *                             entire counter group is free.
+ *
+ * \retval  0  Successful.  num_subgroups and subgroups array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \note The kernel may need to synchronize with other cores performing
+ *       allocate or free syscalls.
+ *
+ */
+__INLINE__ int DMA_CounterGroupQueryFree(
+					 DMA_Type_t  type,
+					 int         grp,
+					 int        *num_subgroups,
+					 int        *subgroups
+					)
+{
+  return Kernel_CounterGroupQueryFree( (uint32_t)type,
+				       grp,
+				       (uint32_t*)num_subgroups,
+				       (uint32_t*)subgroups);
+}
+
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group.  Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.  Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled.  A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number whose counters are being allocated
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be allocated from the group
+ *                             (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be allocated is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to num_subgroups-1).
+ * \param[in]   target         The core that will receive the interrupt when a
+ *                             counter in this allocation hits zero
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   handler        A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             counter in this allocation hits zero.  This
+ *                             function must be coded to take 4 uint32_t
+ *                             parameters:
+ *                             - A pointer to storage specific to this
+ *                               handler.  This is the handler_parm
+ *                               specified on this allocation function.
+ *                             - Three unint32_t parameters that are not used.
+ *                             If handler is NULL, hit-zero interrupts will not
+ *                             be enabled for these counters.
+ * \param[in]   handler_parm   A pointer to storage that should be passed to the
+ *                             interrupt handling function (see handler
+ *                             parameter)
+ * \param[in]   interruptGroup A InterruptGroup_t that identifies the
+ *                             group of interrupts that the counters being
+ *                             allocated will become part of.
+ * \param[out]  cg_ptr         Pointer to a structure that is filled in upon
+ *                             successful return for use in other inline
+ *                             functions to operate on the allocated counters.
+ *                             \li counter -     Array of software counter
+ *                                               structures.  Each element
+ *                                               points to the corresponding
+ *                                               hardware counter in DMA SRAM.
+ *                                               Pointers are null if not
+ *                                               allocated).
+ *                                               Counters are initialized to
+ *                                               DMA_COUNTER_INIT_VAL,
+ *                                               disabled, their hit_zero bit
+ *                                               is off, base and max are NULL.
+ *                             \li status_ptr  - Points to status area within the
+ *                                               DMA memory map.
+ *                             \li permissions - Bits set for each allocated
+ *                                               counter
+ *                             \li grp_permissions - Permissions for each
+ *                                                   subgroup
+ *                             \li group_id    - The group number
+ *                             \li type        - The type of DMA (injection or
+ *                                               reception)
+ *
+ * \retval  0  Successful.  Counters allocated and cg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.  Nothing has been
+ *                            allocated.
+ *
+ * \note The kernel may need to synchronize with other cores performing queries
+ *       or frees.
+ *
+ */
+__INLINE__ int  DMA_CounterGroupAllocate(
+		        DMA_Type_t                type,
+			int                       grp,
+			int                       num_subgroups,
+			int                      *subgroups,
+			int                       target,
+			Kernel_CommThreadHandler  handler,
+			void                     *handler_parm,
+			Kernel_InterruptGroup_t   interruptGroup,
+			DMA_CounterGroup_t       *cg_ptr
+		       )
+{
+  int rc;
+  /*
+   * Initialize the Counter Application Segment array and its global pointer if
+   * it has not been initialized yet.
+   */
+  if ( DMA_CounterAppSegmentArray == NULL )
+  {
+    rc = DMA_CounterInitAppSegments();
+    if (rc) return(rc);
+  }
+
+  /*
+   * If an interrupt handler has been specified, invoke the system call
+   * to configure the kernel to invoke the handler when the hit zero
+   * interrupt fires.
+   */
+
+  if (handler)
+  {
+    /*
+     * Calculate the IRQ to be one of
+     * - 0: inj counter hit zero vector 0
+     * - 1: inj counter hit zero vector 1
+     * - 2: inj counter hit zero vector 2
+     * - 3: inj counter hit zero vector 3
+     *
+     * - 4: rec counter hit zero vector 0
+     * - 5: rec counter hit zero vector 1
+     * - 6: rec counter hit zero vector 2
+     * - 7: rec counter hit zero vector 3
+     * based on the counter type and the DMA group number.
+     */
+    unsigned irqInGroup = (type == DMA_Type_Injection) ? 0 + grp : 4 + grp;
+
+    /*
+     * Calculate an interrupt ID, which is the BIC interrupt group (3)
+     * combined with the IRQ number.
+     */
+/*     int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, */
+/* 					   irqInGroup); */
+    int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G3_HIER_POS,
+					   irqInGroup);
+
+    /*
+     * Calculate the opcode indicating
+     * - the target core for interrupt
+     * - to call the specified function when the interrupt fires
+     * - to disable interrupts before calling the specified function
+     * - to enable interrupts after callling the specified function
+     */
+    int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) |
+                   COMMTHRD_OPCODE_CALLFUNC |
+                   COMMTHRD_OPCODE_DISABLEINTONENTRY |
+                   COMMTHRD_OPCODE_ENABLEINTONPOOF  ;
+
+    /*
+     * Configure this interrupt with the kernel.
+     */
+    rc = Kernel_SetCommThreadConfig(interruptID,
+				    opcode,
+				    (uint32_t*)interruptGroup,
+				    handler,
+				    (uint32_t)handler_parm,
+				    (uint32_t)NULL,
+				    (uint32_t)NULL,
+				    (uint32_t)NULL);
+    if (rc) return rc;
+  }
+
+  /*
+   * Invoke the system call to allocate the counters.
+   * This system call also sets up the DMA DCRs to interrupt when the
+   * counters hit zero.
+   */
+  rc = Kernel_CounterGroupAllocate( (uint32_t)type,
+				    grp,
+				    num_subgroups,
+				    (uint32_t*)subgroups,
+				    target,
+				    (uint32_t) NULL, /* Handler.        Not used */
+				    (uint32_t*)NULL, /* Handler_parm.   Not used */
+				    (uint32_t) NULL, /* InterruptGroup. Not used */
+				    (uint32_t*)cg_ptr);
+  return rc;
+}
+
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group.  Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in]   grp            Group number whose counters are being freed
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be freed from the group
+ *                             (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be freed is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out]  cg_ptr         Pointer to the structure previously filled in when
+ *                             these counters were allocated.  Upon successful
+ *                             return, this structure is updated to reflect the
+ *                             freed counters:
+ *                             \li counter[]  -  Counter structures Pointers to
+ *                                               freed counters nulled.
+ *                             \li permissions - Bits cleared for each freed
+ *                                               counter.
+ *
+ * \retval  0  Successful.  Counters freed and cg_ptr structure updated as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \note The kernel may need to synchronize with other cores performing allocates
+ *       or queries.
+ */
+__INLINE__ int  DMA_CounterGroupFree(
+				     int                 grp,
+				     int                 num_subgroups,
+				     int                *subgroups,
+				     DMA_CounterGroup_t *cg_ptr
+				    )
+{
+  return Kernel_CounterGroupFree( grp,
+				  num_subgroups,
+				  (uint32_t*)subgroups,
+				  (uint32_t*)cg_ptr);
+}
+
+
+
+/*!
+ * \brief Enable or Disable Counter Overflow and Underflow Interrupts
+ *
+ * This function is a wrapper around a system call that enables or disables
+ * the 4 counter overflow/underflow interrupts for all counters:
+ * 1. Injection counter overflow
+ * 2. Injection counter underflow
+ * 3. Reception counter overflow
+ * 4. Reception counter underflow
+ *
+ * \param[in]  enable  Specifies whether to enable or disable the interrupts
+ *                     0 = Disable, 1 = Enable.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int DMA_CounterInterruptControl(unsigned int enable)
+{
+  return Kernel_ChgCounterInterruptEnables( (uint32_t)enable );
+
+}
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * The following inline functions operate directly on the Hardware DMA Counter.
+ * Note that MSYNC and MBAR are not performed by these hardware functions...
+ * it is up to the caller to perform them.
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Set DMA Hardware Counter Value
+ *
+ * Set a DMA hardware counter's value, given a pointer to the hardware counter.
+ *
+ * \param[in]  c_hw   Pointer to the hardware counter structure
+ * \param[in]  value  The value to be set into the counter
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueHw(
+				      DMA_CounterHw_t *c_hw,
+				      unsigned int     value
+				     )
+{
+  SPI_assert( c_hw != NULL );
+
+  c_hw->counter = value;
+}
+
+
+/*!
+ * \brief Set DMA Hardware Counter Base
+ *
+ * Set a DMA hardware counter's base, given a pointer to the hardware counter.
+ *
+ * \param[in]  c_hw     Pointer to the hardware counter structure
+ * \param[in]  pa_base  The base physical address to be associated with the
+ *                      counter.  16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetBaseHw(
+				     DMA_CounterHw_t *c_hw,
+				     unsigned int     pa_base
+				     )
+{
+  SPI_assert( c_hw != NULL );
+
+  c_hw->pa_base = pa_base;
+}
+
+
+/*!
+ * \brief Increment DMA Hardware Counter Value
+ *
+ * Increment a DMA hardware counter's value, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in]  c_hw  Pointer to the hardware counter structure
+ * \param[in]  incr  The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterIncrementHw(
+				       DMA_CounterHw_t *c_hw,
+				       unsigned int     incr
+				      )
+{
+  SPI_assert( c_hw != NULL );
+
+  c_hw->increment = incr;
+}
+
+
+/*!
+ * \brief Decrement DMA Hardware Counter Value
+ *
+ * Decrement a DMA hardware counter's value, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in]  c_hw  Pointer to the hardware counter structure
+ * \param[in]  decr  The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ * \note The counter overflow interrupt will fire as a result of this operation.
+ *       Consider disabling this interrupt.
+ *
+ */
+__INLINE__ void DMA_CounterDecrementHw(
+				       DMA_CounterHw_t *c_hw,
+				       unsigned int     decr
+				      )
+{
+  SPI_assert( c_hw != NULL );
+
+  /* Decrement the counter by incrementing with a large value, which will
+   * cause the counter to wrap.
+   */
+  c_hw->increment = (0 - decr);
+}
+
+
+/*!
+ * \brief Set Reception DMA Hardware Counter Max
+ *
+ * Set a reception DMA hardware counter's maximum payload address, given a
+ * pointer to the hardware counter.
+ *
+ * \param[in]  c_hw    Pointer to the hardware counter structure
+ * \param[in]  pa_max  The max physical address to be associated with the
+ *                     counter.  16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \pre The caller has ASSERTed that (c_hw != NULL)
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetMaxHw(
+				    DMA_CounterHw_t *c_hw,
+				    unsigned int     pa_max
+				   )
+{
+  c_hw->pa_max = pa_max;
+}
+
+
+/*!
+ * \brief Set DMA Hardware Counter Value and Base
+ *
+ * Set a DMA hardware counter's value and base, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in]  c_hw     Pointer to the hardware counter structure
+ * \param[in]  value    The value to be set into the counter
+ * \param[in]  pa_base  The base physical address to be associated with the
+ *                      counter.  16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueBaseHw(
+					  DMA_CounterHw_t *c_hw,
+					  unsigned int     value,
+					  unsigned int     pa_base
+					 )
+{
+  SPI_assert( c_hw != NULL );
+
+  c_hw->counter = value;
+  c_hw->pa_base = pa_base;
+
+}
+
+
+/*!
+ * \brief Set Reception DMA Hardware Counter Value, Base, and Max
+ *
+ * Set a reception DMA hardware counter's value, base, and max, given a pointer
+ * to the hardware counter.
+ *
+ * \param[in]  c_hw     Pointer to the hardware counter structure
+ * \param[in]  value    The value to be set into the counter
+ * \param[in]  pa_base  The base physical address to be associated with the
+ *                      counter.  16B-aligned 4-bit shifted physical address.
+ * \param[in]  pa_max   The max physical address to be associated with the
+ *                      counter.  16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueBaseMaxHw(
+					     DMA_CounterHw_t *c_hw,
+					     unsigned int     value,
+					     unsigned int     pa_base,
+					     unsigned int     pa_max
+					    )
+{
+  SPI_assert( c_hw != NULL );
+  SPI_assert( pa_max >= pa_base);
+
+  c_hw->counter = value;
+  c_hw->pa_base = pa_base;
+  c_hw->pa_max  = pa_max;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Counter Value
+ *
+ * Get a DMA hardware counter's value, given a pointer to the hardware counter.
+ *
+ * \param[in]  c_hw   Pointer to the hardware counter structure
+ *
+ * \retval  value  The current value of the counter
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueHw(
+					      const DMA_CounterHw_t *c_hw
+					     )
+{
+  SPI_assert( c_hw != NULL );
+
+  return( c_hw->counter );
+}
+
+
+/*!
+ * \brief Get DMA Hardware Counter Base
+ *
+ * Get a DMA hardware counter's base, given a pointer to the hardware counter.
+ *
+ * \param[in]  c_hw     Pointer to the hardware counter structure
+ *
+ * \retval  pa_base  The base physical address associated with the counter.
+ *                   16B-aligned 4-bit shifted physical address.
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetBaseHw(
+					     const DMA_CounterHw_t *c_hw
+					    )
+{
+  SPI_assert( c_hw != NULL );
+
+  return( c_hw->pa_base );
+}
+
+
+/*!
+ * \brief Get Reception DMA Hardware Counter Max
+ *
+ * Get a reception DMA hardware counter's max payload address, given a pointer
+ * to the hardware counter.
+ *
+ * \param[in]  c_hw     Pointer to the hardware counter structure
+ *
+ * \retval  pa_max  The max physical address associated with the counter.
+ *                  16B-aligned 4-bit shifted physical address.
+ *
+ * \note No MSYNC or MBAR is done in this function.  It is the responsibility
+ *       of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetMaxHw(
+					    const DMA_CounterHw_t *c_hw
+					   )
+{
+  SPI_assert( c_hw != NULL );
+
+  return( c_hw->pa_max );
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * The following inline functions operate indirectly on a hardware DMA counter
+ * through the Software DMA Counter structure.
+ *------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Counter Value
+ *
+ * Set a DMA counter's value, given a pointer to the software DMA counter
+ * structure.
+ *
+ * \param[in]  c_sw   Pointer to the software counter structure
+ * \param[in]  value  The value to be set into the counter
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_CounterSetValue(
+				    DMA_Counter_t *c_sw,
+				    unsigned int   value
+				   )
+{
+  SPI_assert( c_sw != NULL );
+
+  DMA_CounterSetValueHw(c_sw->counter_hw_ptr,
+			value);
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+
+}
+
+
+/*!
+ * \brief Set DMA Counter Base Address
+ *
+ * Set a DMA counter's base address, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in]  c_sw        Pointer to the software counter structure
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base and pa_base are set.
+ *       In the hardware counter structure, pa_base is set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ *       down to the next lowest 16B-aligned address.  The pa_base is the 4-bit
+ *       shifted version of va_base.
+ *
+ */
+__INLINE__ int DMA_CounterSetBase(
+				  DMA_Counter_t *c_sw,
+				  void          *va_base_in
+				 )
+{
+  int rc;
+
+  SPI_assert( c_sw != NULL );
+
+  /*
+   * 16-B align the virtual address and store result in software counter
+   * structure
+   */
+  c_sw->va_base = (char*)( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+  rc =  Kernel_VaTo4bitShiftedPa(c_sw->va_base,
+				 1,
+				 &(c_sw->pa_base) );
+  if ( rc != 0 )
+    {
+      errno = EFAULT;
+      return (-1);
+    }
+
+  /*
+   * Write physical address to the hardware counter
+   */
+  DMA_CounterSetBaseHw(c_sw->counter_hw_ptr,
+		       c_sw->pa_base);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+
+  return (0);
+
+}
+
+
+/*!
+ * \brief Increment DMA Counter
+ *
+ * Increment a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in]  c_sw   Pointer to the software counter structure
+ * \param[in]  incr   The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterIncrement(
+				     DMA_Counter_t *c_sw,
+				     unsigned int   incr
+				    )
+{
+  SPI_assert( c_sw != NULL );
+
+  DMA_CounterIncrementHw(c_sw->counter_hw_ptr,
+			 incr);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+}
+
+
+/*!
+ * \brief Decrement DMA Counter
+ *
+ * Decrement a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in]  c_sw   Pointer to the software counter structure
+ * \param[in]  decr   The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterDecrement(
+				     DMA_Counter_t *c_sw,
+				     unsigned int   decr
+				    )
+{
+  SPI_assert( c_sw != NULL );
+
+  DMA_CounterDecrementHw(c_sw->counter_hw_ptr,
+			 decr);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+}
+
+
+/*!
+ * \brief Set DMA Counter Max Address
+ *
+ * Set a DMA counter's max address, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in]  c_sw       Pointer to the software counter structure
+ * \param[in]  va_max_in  The max virtual address to be associated with the
+ *                        counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_max_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_max and pa_max are set.
+ *       In the hardware counter structure, pa_max is set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ * \note The va_max in the software counter structure is the va_max_in rounded
+ *       up to the next highest 16B-aligned address.  The pa_max is the 4-bit
+ *       shifted version of va_max.
+ *
+ */
+__INLINE__ int DMA_CounterSetMax(
+				 DMA_Counter_t *c_sw,
+				 void          *va_max_in
+				)
+{
+  int rc;
+
+  SPI_assert( c_sw != NULL );
+
+  /*
+   * Round up to 16B boundary and 16-B align the virtual address and store
+   * result in software counter structure.
+   */
+  c_sw->va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 );
+  if ( c_sw->va_max != va_max_in )  c_sw->va_max = (char*)c_sw->va_max + 0x00000010;
+
+  /*
+   * Get the 16B-aligned 4-bit shifted physical address from the virtual address.
+   */
+  rc = Kernel_VaTo4bitShiftedPa(c_sw->va_max,
+				1,
+				&(c_sw->pa_max) );
+
+  if ( rc != 0 )
+    {
+      errno = EFAULT;
+      return (-1);
+    }
+
+  /*
+   * Write physical address to the hardware counter
+   */
+  DMA_CounterSetMaxHw(c_sw->counter_hw_ptr,
+		      c_sw->pa_max);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+
+  return (0);
+
+}
+
+
+/*!
+ * \brief Set DMA Counter Value and Base Address
+ *
+ * Set a DMA counter's value and base address, given a pointer to the software
+ * counter structure.
+ *
+ * \param[in]  c_sw        Pointer to the software counter structure
+ * \param[in]  value       The value to be set into the counter
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base and pa_base are set.
+ *       In the hardware counter structure, pa_base and value are set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ *       down to the next lowest 16B-aligned address.  The pa_base is the 4-bit
+ *       shifted version of va_base.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBase(
+				       DMA_Counter_t *c_sw,
+				       unsigned int   value,
+				       void          *va_base_in
+				      )
+{
+  int rc=0;
+
+  SPI_assert( c_sw != NULL );
+
+  /*
+   * 16-B align the virtual address and store result in software counter
+   * structure
+   */
+  c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+  /*
+   * Get the 16B-aligned 4-bit shifted physical address from the virtual address.
+   */
+  rc = Kernel_VaTo4bitShiftedPa(c_sw->va_base,
+				1,
+				&(c_sw->pa_base) );
+  if ( rc != 0 )
+    {
+      errno = EFAULT;
+      return (-1);
+    }
+
+  /*
+   * Write the value and physical address to the hardware counter
+   */
+  DMA_CounterSetValueBaseHw(c_sw->counter_hw_ptr,
+			    value,
+			    c_sw->pa_base );
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+
+  return (0);
+}
+
+
+/*!
+ * \brief Set DMA Counter Value, Base Address, and Max Address
+ *
+ * Set a reception DMA counter's value, base address, and max address, given a
+ * pointer to the software counter structure.
+ *
+ * \param[in]  c_sw        Pointer to the software counter structure
+ * \param[in]  value       The value to be set into the counter
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ * \param[in]  va_max_in   The max virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in or va_max_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base, pa_base, va_max, and pa_max
+ *       are set.  In the hardware counter structure, pa_base, pa_max, and value
+ *       are set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ *       down to the next lowest 16B-aligned address.  The pa_base is the 4-bit
+ *       shifted version of va_base.
+ *
+ * \note The va_max in the software counter structure is the va_max_in rounded
+ *       up to the next highest 16B-aligned address.  The pa_max is the 4-bit
+ *       shifted version of va_max.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseMax(
+					  DMA_Counter_t *c_sw,
+					  unsigned int   value,
+					  void          *va_base_in,
+					  void          *va_max_in
+					 )
+{
+  int rc=0;
+  void *va_base, *va_max;
+  unsigned int pa_base, pa_max;
+
+  SPI_assert( c_sw != NULL );
+
+  /*
+   * Process the base address:
+   * - 16-B align the virtual address and store result in software counter
+   *   structure
+   * - Get the 16B-aligned 4-bit shifted physical address from the virtual
+   *   address.
+   */
+  va_base = c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+  rc = Kernel_VaTo4bitShiftedPa(va_base,
+				1,
+				&pa_base );
+  if ( rc != 0 )
+    {
+      errno = EFAULT;
+      return (-1);
+    }
+
+  c_sw->pa_base = pa_base;
+
+  /*
+   * Process the max address:
+   * - 16B align the virtual address and store result in software counter structure.
+   *   Note: we can't round up or the address may be one byte out of range.
+   * - Get the 16B-aligned 4-bit shifted physical address from the virtual
+   *   address.
+   */
+  va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 );
+
+  rc = Kernel_VaTo4bitShiftedPa(va_max,
+				1,
+				&pa_max );
+/*    printf("SetValueBaseMax: va_max_in=0x%08x, va_max=0x%08x, pa_max=0x%08x, rc=%d\n",(unsigned)va_max_in, (unsigned)va_max,pa_max,rc); */
+/*    fflush(stdout); */
+  if ( rc != 0 )
+    {
+      errno = EFAULT;
+      return (-1);
+    }
+
+  c_sw->pa_max = pa_max;
+
+  /*
+   * Write the value, base, and max to the hardware counter
+   */
+  DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr,
+			       value,
+			       pa_base,
+			       pa_max);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+
+  return (0);
+}
+
+
+/*!
+ * \brief Get DMA Counter Value
+ *
+ * Get a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in]  c_sw  Pointer to the software counter structure
+ *
+ * \retval  value  The value of the specified counter
+ *
+ * \note This function does an MSYNC after fetching the counter's value
+ *       to ensure that the data that was just DMA'd is available to all
+ *       cores.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValue(
+					    const DMA_Counter_t *c_sw
+					   )
+{
+  unsigned int val;
+
+  SPI_assert( c_sw != NULL );
+
+  val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr );
+
+  _bgp_msync();
+
+  return val;
+
+}
+
+
+/*!
+ * \brief Get DMA Counter Value with No Msync
+ *
+ * Get a DMA counter's value, given a pointer to the software counter
+ * structure.  No Msync is done.  It is up to the caller to do it,
+ * if necessary.
+ *
+ * \param[in]  c_sw  Pointer to the software counter structure
+ *
+ * \retval  value  The value of the specified counter
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueNoMsync(
+						   const DMA_Counter_t *c_sw
+						  )
+{
+  unsigned int val;
+
+  SPI_assert( c_sw != NULL );
+
+  val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr );
+
+  return val;
+
+}
+
+
+/*!
+ * \brief Get DMA Base Address
+ *
+ * Get a DMA counter's base virtual address, given a pointer to the software
+ * counter structure.
+ *
+ * \param[in]  c_sw  Pointer to the software counter structure
+ *
+ * \retval  va_base  The base virtual address associated with the specified
+ *                   counter
+ *
+ * \note This returns the shadow va_base directly out of the software counter
+ *       structure.  This should correspond with the physical address in the
+ *       hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ *       version of the actual base virtual address of the buffer the caller is
+ *       working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetBase(
+				     const DMA_Counter_t *c_sw
+				    )
+{
+  SPI_assert( c_sw != NULL );
+
+  return( c_sw->va_base );
+}
+
+
+/*!
+ * \brief Get Reception DMA Max Address
+ *
+ * Get a reception DMA counter's max virtual address, given a pointer to
+ * the software counter structure.
+ *
+ * \param[in]  c_sw  Pointer to the software counter structure
+ *
+ * \retval  va_max  The max virtual address associated with the specified
+ *                  counter
+ *
+ * \note This returns the shadow va_max directly out of the software counter
+ *       structure.  This should correspond with the physical address in the
+ *       hardware counter, but it is a rounded-up-to-the-next-16B-boundary
+ *       version of the actual max virtual address of the buffer the caller is
+ *       working with.
+ *
+ */
+__INLINE__ void *DMA_CounterGetMax(
+				   const DMA_Counter_t *c_sw
+				  )
+{
+  SPI_assert( c_sw != NULL );
+
+  return( c_sw->va_max );
+}
+
+
+/*!
+ * \brief Get Offset from DMA Base Address
+ *
+ * Given a virtual address, get the offset from the base address associated with
+ * a counter.
+ *
+ * \param[in]  c_sw  Pointer to the software counter structure
+ * \param[in]  va    Virtual address whose offset from the counter's base is
+ *                   to be returned.
+ * \param[in]  length   The number of bytes in the buffer pointed to by va.
+ * \param[in]  coreNum  The number of the core in which the virtual
+ *                      address resides (0 to DMA_MAX_NUM_CORES).
+ *
+ * \retval  offset   The offset of the va from the counter's base.
+ *
+ * \note This uses the counter's physical base address and the application's
+ *       memory segments (see DMA_CounterAppSegment_t).
+ *
+ * \note It is assumed that if the coreNum is not our core, then the counter's
+ *       base address (used in calculating the offset) is the smallest physical
+ *       address accessible from user space on coreNum
+ *       (DMA_CounterMinPaAccessibleFromUserMode[coreNum]).
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetOffsetFromBase(
+						     const DMA_Counter_t *c_sw,
+						     void                *va,
+						     unsigned int         length,
+						     unsigned int         coreNum
+						    )
+{
+  SPI_assert( c_sw != NULL );
+  SPI_assert( va   != NULL );
+  SPI_assert ( coreNum >= 0 );
+  SPI_assert ( coreNum <= DMA_MAX_NUM_CORES );
+  {
+  DMA_CounterAppSegment_t *appSegmentArray = DMA_CounterGetAppSegments( coreNum );
+  uint32_t                 numAppSegments;
+  uint32_t                 i;
+  uint32_t                 segmentVaBase;
+  uint32_t                 offset;
+  uint32_t                 ourCoreNum = Kernel_PhysicalProcessorID();
+  uint32_t                 counterPaBase;
+
+
+  /* Determine which application segment the virtual address is in. */
+  /* First, check the last app segment accessed.                    */
+  i             = DMA_CounterCachedAppSegmentIndex[coreNum];
+  segmentVaBase = appSegmentArray[i].va_base;
+  if ( ! ( ((uint32_t)va >= segmentVaBase) &&
+	   ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) ) )
+  {
+    /* It is not the last app segment accessed.  Search them. */
+    numAppSegments = DMA_CounterGetNumAppSegments();
+    for (i=0; i<numAppSegments; i++)
+    {
+      segmentVaBase = appSegmentArray[i].va_base;
+      if ( ((uint32_t)va >= segmentVaBase) &&
+	   ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) )
+	break;
+    }
+    SPI_assert(i < numAppSegments);
+    DMA_CounterCachedAppSegmentIndex[coreNum] = i;
+  }
+
+  /*
+   * Make sure buffer fits in app segment.
+   */
+  if ( ( (uint32_t)va + length - 1 ) > appSegmentArray[i].va_max )
+  {
+    printf("DMA_CounterGetOffsetFromBase: Buffer 0x%08x of length %d is out of bounds.  Check length.\n",
+	   (unsigned)va, length);
+    SPI_assert(0);
+  }
+
+  /*
+   * If coreNum is our core, use the offset from our core's counter base to
+   * calculate the DMA offset.
+   * Otherwise, assume the counter base is the smallest physical address
+   * accessible from user space on coreNum and use that.
+   */
+  if ( ourCoreNum == coreNum )
+    counterPaBase = c_sw->pa_base;
+  else
+    counterPaBase = DMA_CounterMinPaAccessibleFromUserMode[coreNum];
+
+  /*
+   * If the base physical address of the application segment found above is
+   * greater than or equal to the counter's base physical address (typical
+   * case), proceed with the calculation based on that.
+   *
+   * Otherwise, use a slightly different calculation (see else leg).
+   */
+  if ( appSegmentArray[i].pa_base >= counterPaBase )
+  {
+    /*
+     * Calculate the offset from the counter base:
+     * - offset from app segment's virtual address base (va - segmentVaBase) +
+     * - segment's physical base (shifted) - counter's base (shifted) * 16
+     */
+    offset =
+      ((unsigned)va - segmentVaBase) +
+      ( (appSegmentArray[i].pa_base - counterPaBase) << 4 );
+
+/*     printf("GetOffsetFromBase:  va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */
+/* 	   segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */
+/*     fflush(stdout); */
+  }
+  /*
+   * Handle the case where the counter's base exceeds the app segment's base.
+   * This occurs when the counter's base is set to the base of the buffer
+   * rather than the min base of all the app segments.
+   */
+  else
+  {
+    offset =
+      ((unsigned)va - segmentVaBase) -
+      ( (counterPaBase - appSegmentArray[i].pa_base) << 4 );
+
+/*     printf("GetOffsetFromBase2:  va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */
+/* 	   segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */
+/*     fflush(stdout); */
+  }
+
+  return ( offset );
+  }
+}
+
+
+
+
+/*
+ * ------------------------------------------------------------------------------
+ *
+ * The following functions access counters by specifying the group pointer and
+ * counter_id.
+ *
+ * ------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Counter Value using a Counter ID
+ *
+ * Set a DMA counter's value, given a counter group structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated
+ * \param[in]  counter_id  Identifier of the counter being set
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  value       The value to be set into the counter
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueById(
+					DMA_CounterGroup_t *cg_ptr,
+					int                 counter_id,
+					unsigned int        value
+				       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  DMA_CounterSetValue( &cg_ptr->counter[counter_id],
+		       value );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Set DMA Counter Base Address using a Counter ID
+ *
+ * Set a DMA counter's base address, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being set
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetBaseById(
+				      DMA_CounterGroup_t *cg_ptr,
+				      int                 counter_id,
+				      void               *va_base_in
+				     )
+{
+  int rc;
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  rc = DMA_CounterSetBase( &cg_ptr->counter[counter_id],
+			   va_base_in );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+
+  return rc;
+}
+
+
+/*!
+ * \brief Increment DMA Counter using a Counter ID
+ *
+ * Increment a DMA counter's value, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being incremented
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  incr        The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterIncrementById(
+					 DMA_CounterGroup_t *cg_ptr,
+					 int                 counter_id,
+					 unsigned int        incr
+					)
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  DMA_CounterIncrement( &cg_ptr->counter[counter_id],
+			incr );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Decrement DMA Counter using a Counter ID
+ *
+ * Decrement a DMA counter's value, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being decremented
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  decr        The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterDecrementById(
+					 DMA_CounterGroup_t *cg_ptr,
+					 int                 counter_id,
+					 unsigned int        decr
+					)
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  DMA_CounterDecrement( &cg_ptr->counter[counter_id],
+			decr );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Set Reception DMA Counter Max Address using a Counter ID
+ *
+ * Set a reception DMA counter's base address, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being set
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  va_max_in   The max virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_max_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetMaxById(
+				     DMA_CounterGroup_t *cg_ptr,
+				     int                 counter_id,
+				     void               *va_max_in
+				    )
+{
+  int rc;
+
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  rc = DMA_CounterSetMax( &cg_ptr->counter[counter_id],
+			  va_max_in );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+
+  return rc;
+
+}
+
+
+/*!
+ * \brief Get DMA Counter Value using a Counter ID
+ *
+ * Get a DMA counter's value, given a counter group structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when
+ *                         the counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval  value  The value of the counter
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueById(
+					const DMA_CounterGroup_t *cg_ptr,
+			                const int                 counter_id
+				       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  return ( DMA_CounterGetValue( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Get DMA Counter Base Address using a Counter ID
+ *
+ * Get a DMA counter's base virtual address, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval  va_base  The base virtual address associated with the specified
+ *                   counter
+ *
+ * \note This returns the shadow va_base directly out of the software counter
+ *       structure.  This should correspond with the physical address in the
+ *       hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ *       version of the actual base virtual address of the buffer the caller is
+ *       working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetBaseById(
+				const DMA_CounterGroup_t *cg_ptr,
+				int                       counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  return( DMA_CounterGetBase( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Get Offset from DMA Base Address using a Counter ID
+ *
+ * Given a virtual address, get the offset from the base address associated with
+ * the specified counter.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when
+ *                          the counter was allocated
+ * \param[in]  counter_id   Identifier of the counter
+ *                          (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  va           Virtual address whose offset from the counter's base
+ *                          is to be returned.
+ * \param[in]  length       The number of bytes in the buffer pointed to by va.
+ * \param[in]  coreNum      The number of the core in which the virtual
+ *                          address resides (0 to DMA_MAX_NUM_CORES).
+ *
+ * \retval  offset   The offset of the va from the counter's base.
+ *
+ * \note This works with the shadow va_base directly out of the software counter
+ *       structure.  This should correspond with the physical address in the
+ *       hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ *       version of the actual base virtual address of the buffer the caller is
+ *       working with.
+ *
+ * \note No effort is given to flag the case where va is less than the base
+ *       address.  In that case, (va - va_base) is returned, whatever that is.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetOffsetFromBaseById(
+				const DMA_CounterGroup_t *cg_ptr,
+				int                       counter_id,
+   			        void                     *va,
+				unsigned int              length,
+				unsigned int              coreNum
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+/*   printf("Getting offset from counter %d for core %d\n",counter_id,coreNum); */
+  return( DMA_CounterGetOffsetFromBase( &cg_ptr->counter[counter_id],
+					va,
+					length,
+					coreNum ) );
+}
+
+
+/*!
+ * \brief Get Reception DMA Counter Max Address Using a Counter ID
+ *
+ * Get a reception DMA counter's maximum virtual address, given a counter group
+ * structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval  va_max  The virtual address of the max of the counter
+ *
+ * \note This returns the shadow va_max directly out of the software counter
+ *       structure.  This should correspond with the physical address in the
+ *       hardware counter, but it is a rounded-up-to-the-next-16B-boundary
+ *       version of the actual max virtual address of the buffer the caller is
+ *       working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetMaxById(
+				const DMA_CounterGroup_t *cg_ptr,
+				const int                 counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  return ( DMA_CounterGetMax( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Set DMA Counter Value and Base Address using a Counter ID
+ *
+ * Set a DMA counter's value and base address, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being set
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  value       The value to be set into the counter
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseById(
+					   DMA_CounterGroup_t *cg_ptr,
+					   int                 counter_id,
+					   unsigned int        value,
+					   void               *va_base_in
+					  )
+{
+  int rc;
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  rc =  DMA_CounterSetValueBase( &cg_ptr->counter[counter_id],
+				 value,
+				 va_base_in );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+
+  return rc;
+}
+
+
+
+
+/*!
+ * \brief Set Reception DMA Counter Value, Base Address, and Max Address  using
+ *        a Counter ID
+ *
+ * Set a reception DMA counter's value, base address, and max address, given a
+ * counter group structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being set
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in]  value       The value to be set into the counter
+ * \param[in]  va_base_in  The base virtual address to be associated with the
+ *                         counter.
+ * \param[in]  va_max_in   The max virtual address to be associated with the
+ *                         counter.
+ *
+ * \retval  0   Success
+ * \retval  -1  Failure.  errno contains the reason.  Most likely EFAULT due to
+ *              the va_base_in or va_max_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseMaxById(
+				DMA_CounterGroup_t *cg_ptr,
+				int                 counter_id,
+				unsigned int        value,
+				void               *va_base_in,
+				void               *va_max_in
+			       )
+{
+  int rc;
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+  rc =  DMA_CounterSetValueBaseMax( &cg_ptr->counter[counter_id],
+				    value,
+				    va_base_in,
+				    va_max_in );
+
+   /*  Note: it is assumed that the above function call performs an MBAR */
+
+  return rc;
+
+}
+
+
+/*!
+ * \brief Enable DMA Counter using a Counter ID
+ *
+ * Enable a DMA counter, given a counter group structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being enabled
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetEnableById(
+				DMA_CounterGroup_t *cg_ptr,
+				int                 counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  {
+  /* Enable the counter by writing 1 to the appropriate bit */
+  int r =  DMA_COUNTER_GROUP_WORD_ID(counter_id);
+  int c =  DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+  cg_ptr->status_ptr->enable[r] = _BN(c);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+  }
+}
+
+
+/*!
+ * \brief Disable DMA Counter using a Counter ID
+ *
+ * Disable a DMA counter, given a counter group structure and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being disabled
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetDisableById(
+				DMA_CounterGroup_t *cg_ptr,
+				int                 counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  {
+  /* Disable the counter by writing 1 to the appropriate bit */
+  int r =  DMA_COUNTER_GROUP_WORD_ID(counter_id);
+  int c =  DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+  cg_ptr->status_ptr->disable[r] = _BN(c);
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+  }
+}
+
+
+/*!
+ * \brief Determine Whether a DMA Counter is Enabled using a Counter ID
+ *
+ * Determine whether a DMA counter is enabled, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being queried
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval  0  The counter is disabled
+ * \retval  1  The counter is enabled
+ *
+ */
+__INLINE__ int DMA_CounterGetEnabledById(
+				const DMA_CounterGroup_t *cg_ptr,
+				int                       counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  {
+  /* Return 0 or 1 if counter is disabled/enabled */
+  int r =  DMA_COUNTER_GROUP_WORD_ID(counter_id);
+  int c =  DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+  if ( ( cg_ptr->status_ptr->enabled[r] & _BN(c) ) == 0 ) {return 0;}
+  else { return 1;}
+  }
+}
+
+
+/*!
+ * \brief Determine Whether a DMA Counter is Has Hit Zero using a Counter ID
+ *
+ * Determine whether a DMA counter has hit zero, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter being queried
+ *                         (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval  0  The counter has not hit zero
+ * \retval  1  The counter has hit zero
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ *       zero to ensure that the data that was just DMA'd is available to all
+ *       cores.  The msync is only done if this is a reception counter group,
+ *       since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ int DMA_CounterGetHitZeroById(
+				const DMA_CounterGroup_t *cg_ptr,
+				int                       counter_id
+			       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  {
+  /* Return 0 or 1 if counter has hit zero */
+  int r =  DMA_COUNTER_GROUP_WORD_ID(counter_id);
+  int c =  DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+  if ( ( cg_ptr->status_ptr->hit_zero[r] & _BN(c) ) == 0 ) {return 0;}
+  else {
+    /* By convention, we assume that if counter has hit zero, then it will be
+     * used.  This requires an msync to ensure snoops from the DMA arbiter
+     * have hit the cores.  That is, the data that was just DMA'd is available
+     * to all cores.
+     *
+     * Furthermore, If we just put a _bgp_msync() here, it could get
+     * speculatively executed and withdrawn even if the counter hasn't hit zero,
+     * so we call a special version of this function that ensures the speculation
+     * does not occur.
+     *
+     * It only needs to be done for reception counters since there is nothing
+     * to sync when sending data.
+     */
+    if ( cg_ptr->type == DMA_Type_Reception ) _bgp_msync_nonspeculative();
+    return 1;
+  }
+  }
+}
+
+
+/*!
+ * \brief Clear a DMA Counter's Hit Zero Status using a Counter ID
+ *
+ * Clear a DMA counter's "hit zero" status, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counter was allocated.
+ * \param[in]  counter_id  Identifier of the counter whose "hit zero" status is
+ *                         being cleared (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterClearHitZeroById(
+					DMA_CounterGroup_t *cg_ptr,
+					int                 counter_id
+				       )
+{
+  SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  {
+  /* Clear the hit zero bit of a counter */
+  int r =  DMA_COUNTER_GROUP_WORD_ID(counter_id);
+  int c =  DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+  cg_ptr->status_ptr->clear_hit_zero[r] = _BN(c) ;
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+  }
+}
+
+
+/*
+ * ------------------------------------------------------------------------------
+ *
+ * The following functions manipulate or get the status of multiple counters
+ *
+ * ------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Enable Multiple DMA Counters
+ *
+ * Enable multiple DMA counters, given a counter group structure and mask.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counter was allocated.
+ * \param[in]  reg          Identifies the "word" (0 or 1) of the counters
+ *                          being manipulated.  This is the index into the
+ *                          enable array.
+ * \param[in]  counterBits  Identifies which counters in the "word" are being
+ *                          manipulated.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetEnable(
+				     DMA_CounterGroup_t *cg_ptr,
+				     int                 reg,
+				     unsigned int        counterBits
+				    )
+{
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+  SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  cg_ptr->status_ptr->enable[reg] = counterBits;
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+}
+
+
+/*!
+ * \brief Disable Multiple DMA Counters
+ *
+ * Disable multiple DMA counters, given a counter group structure and mask.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counter was allocated.
+ * \param[in]  reg          Identifies the "word" (0 or 1) of the counters
+ *                          being manipulated.  This is the index into the
+ *                          disable array.
+ * \param[in]  counterBits  Identifies which counters in the "word" are being
+ *                          manipulated.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetDisable(DMA_CounterGroup_t *cg_ptr,
+				      int                 reg,
+				      unsigned int        counterBits
+				     )
+{
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+  SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  cg_ptr->status_ptr->disable[reg] = counterBits;
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+}
+
+
+/*!
+ * \brief Get Enabled DMA Counters
+ *
+ * Get the enabled status of DMA counters, given a counter group structure
+ * and "word".
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counter was allocated.
+ * \param[in]  reg          Identifies the "word" (0 or 1) of the counters
+ *                          being queried.  This is the index into the
+ *                          enabled array.
+ *
+ * \return  32 bit mask indicating which counters in the specified word are enabled.
+ *          Only the counters that the caller has allocated will have their status
+ *          returned.  The status for other counters will be 0.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetEnabled(
+				const DMA_CounterGroup_t *cg_ptr,
+				int                       reg
+			       )
+{
+  SPI_assert( ( ( cg_ptr != NULL ) &&
+	    ( ( reg == 0 ) || ( reg == 1 ) ) ) );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  return (cg_ptr->permissions[reg] & cg_ptr->status_ptr->enabled[reg]);
+}
+
+
+/*!
+ * \brief Get Hit Zero Status of DMA Counters
+ *
+ * Get the "hit zero" status of DMA counters, given a counter group structure
+ * and "word".
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counter was allocated.
+ * \param[in]  reg          Identifies the "word" (0 or 1) of the counters
+ *                          being queried.  This is the index into the
+ *                          hit zero array.
+ *
+ * \return  32 bit mask indicating which counters in the specified word hit zero.
+ *          Only the counters that the caller has allocated will have their status
+ *          returned.  The status for other counters will be 0.
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ *       zero to ensure that the data that was just DMA'd is available to all
+ *       cores.  The msync is only done if this is a reception counter group,
+ *       since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetHitZero(
+					      const DMA_CounterGroup_t *cg_ptr,
+					      int                       reg
+					     )
+{
+  unsigned int x;
+
+  SPI_assert( ( ( cg_ptr != NULL ) &&
+	    ( ( reg == 0 ) || ( reg == 1 ) ) ) );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  x =  cg_ptr->status_ptr->hit_zero[reg];
+
+  if ( x != 0 ) {
+
+    x &= cg_ptr->permissions[reg];
+
+    if ( ( cg_ptr->type == DMA_Type_Reception ) &&
+	 ( x != 0 ) )
+      _bgp_msync_nonspeculative();
+
+  }
+
+  return (x);
+}
+
+
+/*!
+ * \brief Get Hit Zero Status of All DMA Counters In the Specified Group
+ *
+ * Get the "hit zero" status of all DMA counters in the group specified by the
+ * counter group structure.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when
+ *                          the counter was allocated.
+ * \param[in,out]  word0    Pointer to the first status word, for the first 32
+ *                          counters.
+ * \param[in,out]  word1    Pointer to the second status word, for the second 32
+ *                          counters.
+ *
+ * \return  word0 and word1 are set to the status of the counters.
+ *          Only the counters that the caller has allocated will have their
+ *          status returned.  The status for other counters will be 0.
+ *
+ * \note This function does an MSYNC after determining that at least 1 counter
+ *       has hit zero to ensure that the data that was just DMA'd is available
+ *       to all cores.  The msync is only done if this is a reception counter
+ *       group, since there is nothing to sync for injection counters that have
+ *       hit zero.
+ *
+ */
+__INLINE__ void DMA_CounterGetAllHitZero(
+					 const DMA_CounterGroup_t *cg_ptr,
+					 unsigned int             *word0,
+					 unsigned int             *word1
+					)
+{
+  unsigned int x,y;
+
+  SPI_assert( ( cg_ptr != NULL ) &&
+	      ( word0  != NULL ) &&
+	      ( word1  != NULL ) );
+  SPI_assert( cg_ptr->status_ptr != 0 );
+
+  x = cg_ptr->status_ptr->hit_zero[0];
+  y = cg_ptr->status_ptr->hit_zero[1];
+
+  if ( (x | y) != 0 ) {
+    x &= cg_ptr->permissions[0];
+    y &= cg_ptr->permissions[1];
+
+    if ( ( cg_ptr->type == DMA_Type_Reception ) &&
+	 ( (x | y) != 0 ) )
+      _bgp_msync_nonspeculative();
+  }
+
+  *word0 = x;
+  *word1 = y;
+
+  return;
+}
+
+
+/*!
+ * \brief Clear Hit Zero Status of DMA Counters
+ *
+ * Clear the "hit zero" status of DMA counters, given a counter group structure,
+ * a "word", and a mask of counters.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counter was allocated.
+ * \param[in]  reg          Identifies the "word" (0 or 1) of the counters
+ *                          being manipulated.  This is the index into the
+ *                          clear_hit_zero array.
+ * \param[in]  counterBits  Identifies which counters in the "word" are being
+ *                          manipulated.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterGroupClearHitZero(
+				DMA_CounterGroup_t *cg_ptr,
+				int                 reg,
+				unsigned int        counterBits
+			       )
+{
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+  SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  cg_ptr->status_ptr->clear_hit_zero[reg] = counterBits;
+
+  _bgp_mbar();    /* Make sure these writes have been accepted by the memory */
+                  /* system before continuing                                */
+}
+
+
+/*!
+ * \brief Get DMA Counter Group Status
+ *
+ * Get the DMA Counter Group Status, given a counter group structure.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counters were allocated.
+ *
+ * \return  32 bit mask indicating which subgroups have counters that are enabled and
+ *          have hit zero.  Only the subgroups that the caller has allocated will have
+ *          their status returned.  The status for other subgroups will be 0.
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ *       zero to ensure that the data that was just DMA'd is available to all
+ *       cores.  The msync is only done if this is a reception counter group,
+ *       since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetGroupStatus(
+				const DMA_CounterGroup_t *cg_ptr
+			       )
+{
+  unsigned int x;
+
+  SPI_assert( cg_ptr != NULL );
+  SPI_assert( cg_ptr->status_ptr != 0);
+
+  x = cg_ptr->status_ptr->grp_status;
+
+  if ( x != 0 ) {
+
+    x &= cg_ptr->grp_permissions;
+
+    if (  ( cg_ptr->type == DMA_Type_Reception ) &&
+	  ( x != 0 ) )
+      _bgp_msync_nonspeculative();
+
+  }
+
+  return x;
+}
+
+
+/*!
+ * \brief Get DMA Counter Group Number
+ *
+ * Get the DMA Counter Group number, given a counter group structure.
+ *
+ * \param[in]  cg_ptr       Pointer to the structure previously filled in when the
+ *                          counters were allocated.
+ *
+ * \return  The DMA Counter Group number
+ *
+ */
+__INLINE__ int DMA_CounterGetGroupNum(
+				      const DMA_CounterGroup_t *cg_ptr
+				     )
+{
+  SPI_assert( cg_ptr != NULL );
+
+  return cg_ptr->group_id;
+}
+
+
+/*!
+ * \brief Get DMA Counter Global Id
+ *
+ * Get the global Id of a DMA Counter, given a counter group structure and a counter Id.
+ *
+ * \param[in]  cg_ptr      Pointer to the structure previously filled in when the
+ *                         counters were allocated.
+ * \param[in]  counter_id  Identifier of the counter
+ *
+ * \return  The DMA Counter Global Id (0 to DMA_NUM_COUNTERS-1)
+ *
+ */
+__INLINE__ int DMA_CounterGetGlobalId(
+				      const DMA_CounterGroup_t *cg_ptr,
+				      int                       counter_id
+				     )
+{
+  SPI_assert( ( cg_ptr != NULL ) &&
+	  ( counter_id >= 0 ) &&
+	  ( counter_id < DMA_NUM_COUNTERS_PER_GROUP ) );
+
+  return( counter_id + (DMA_NUM_COUNTERS_PER_GROUP * cg_ptr->group_id) );
+}
+
+
+/*!
+ * \brief Get DMA Counter Local Id
+ *
+ * Get the local Id of a DMA Counter, given a counter group structure and a Global
+ * counter Id.
+ *
+ * \param[in]  counter_id  Global Identifier of the counter (0 to DMA_NUM_COUNTERS-1)
+ *
+ * \return  The DMA Counter Local Id (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ */
+__INLINE__ int DMA_CounterGetLocalId(
+				     int counter_id
+				    )
+{
+  return( counter_id % DMA_NUM_COUNTERS_PER_GROUP );
+}
+
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Descriptors.h b/arch/powerpc/include/spi/DMA_Descriptors.h
new file mode 100644
index 0000000..ae9fc11
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Descriptors.h
@@ -0,0 +1,1505 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_DESCRIPTORS_H_ /* Prevent multiple inclusion */
+#define _DMA_DESCRIPTORS_H_
+
+/*!
+ * \file spi/DMA_Descriptors.h
+ *
+ * \brief DMA SPI Descriptor Definitions and Inline Functions
+ *
+ * This header file contains the definition of the DMA_InjDescriptor_t, which is
+ * put into the tail of an injection fifo to initiate a DMA transfer.
+ *
+ * The following defines the terms used for describing the various kinds of
+ * descriptors:
+ * - "Torus" means the transfer is between nodes.
+ * - "Local" means the transfer is within the same node.
+ * - "Direct-put" means the data is put directly into the destination node's
+ *   memory.
+ * - "MemFifo" means the packets are put into the destination node's reception
+ *   fifo.
+ * - "Remote-get" means the packet payload contains an injection descriptor
+ *   to be injected into the destination node's injection fifo.
+ * - Prefetch-only" means the payload is just pre-fetched into L3.  It is not
+ *   transferred to the destination node.
+ *
+ * The following are the functions provided for creating injection descriptors:
+ * - DMA_TorusDirectPutDescriptor
+ * - DMA_LocalDirectPutDescriptor
+ * - DMA_LocalPrefetchOnlyDescriptor
+ * - DMA_TorusRemoteGetDescriptor
+ * - DMA_LocalRemoteGetDescriptor
+ * - DMA_TorusMemFifoDescriptor
+ * - DMA_LocalMemFifoDescriptor
+ * - DMA_TorusDirectPutBcastDescriptor
+ * - DMA_TorusMemFifoBcastDescriptor
+ *
+ *
+ * There are also functions for setting or changing specific values in the
+ * injection descriptors.
+ *
+ */
+
+
+
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ *   placed in the translation unit (eg. xxx.c)for use within that unit.
+ *   The function is not externalized for use by another unit...we want this
+ *   so we don't end up with multiple units exporting the same function,
+ *   which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ * \verbatim
+   #ifndef INLINE
+   # define INLINE extern inline
+   #endif
+   INLINE int max(int a, int b) {
+     return a > b ? a : b;
+   }
+   \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ * \verbatim
+   #define INLINE
+   #include "header.h"
+   \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <bpcore/bgp_types.h>
+#include <common/alignment.h>
+#include <common/bgp_bitnumbers.h>
+#include <spi/DMA_Packet.h>
+#include <spi/DMA_Assert.h>
+
+
+
+
+/*!
+ * \brief Packet Header - Checksum Skip Bytes
+ *
+ * Default number of 2 byte units to skip from the top of a packet before
+ * including the packet bytes into the running checksum of the torus
+ * injection fifo where this packet is injected.
+ *
+ * 8 corresponds to skipping 16 bytes, which is the DMA packet header size
+ * (hardware header + software header).
+ */
+#define DMA_CSUM_SKIP  8
+
+
+/*!
+ * \brief Packet Header - Checksum Skip Packet
+ *
+ * Default value for the torus injection checksum skip packet bit.
+ *   - 0 includes the packet (excluding the portion designated by DMA_CSUM_SKIP)
+ *     in the checksum.
+ *   - 1 excludes the entire packet from the checksum.
+ */
+#define DMA_CSUM_BIT   0
+
+
+
+
+/*!
+ * \brief DMA Injection Descriptor Structure
+ *
+ */
+typedef struct DMA_InjDescriptor_t
+{
+  union {
+    unsigned word1;                 /*!< For accessing fields as 32-bit word  */
+
+    struct {
+      unsigned rsvd0          : 24; /*!< 3 bytes: unused                      */
+
+      unsigned rsvd1          :  6; /*!< Bits 0-5: unused flags               */
+
+      unsigned prefetch_only  :  1; /*!< Bit 6: prefetch only, on local
+                                         memcopy:
+                                         0 = Data is both read and written,
+                                         1 = Data is only read.
+                                         This bit is ignored for torus
+                                         packets.                             */
+
+      unsigned local_memcopy  :  1; /*!< Bit 7: local memory copy bit:
+                                         0 = The message is a torus message,
+                                         1 = The message is a local copy.     */
+    };
+  };
+
+  union {
+    unsigned word2;                 /*!< For accessing fields as 32-bit word  */
+
+    struct {
+      unsigned rsvd2          : 24; /*!< 3 bytes: unused                      */
+
+      unsigned idma_counterId :  8; /*!< 1 byte: Injection Counter Id.        */
+    };
+  };
+
+  unsigned base_offset        : 32; /*!< 4 bytes: pointer to base address of
+                                         message payload.  This gets added to
+                                         the base address associated with the
+                                         idma_counterId injection counter.    */
+
+  unsigned msg_length         : 32; /*!< 4 bytes: message length (in bytes)   */
+
+  DMA_PacketHeader_t hwHdr;         /*!< DMA Hardware Packet Header           */
+
+}
+DMA_InjDescriptor_t ALIGN_QUADWORD;
+/*!
+ * \todo Change to ALIGN_L1D_CACHE when it works.
+ *
+ */
+
+
+/*!
+ * \brief Static Info from Personality
+ *
+ * The following structure defines information from the personality.
+ * It is intended to be static so, once the info is retrieved from
+ * the personality, it does not need to be retrieved again (it is a
+ * system call to retrieve personality info).
+ *
+ */
+typedef struct DMA_PersonalityInfo_t
+{
+  unsigned int personalityRetrieved; /*!< 0 = Personality Info not
+                                              retrieved into this
+                                              structure yet.
+                                          1 = Personality Info in this
+                                              structure is valid.             */
+  uint8_t      nodeXCoordinate;      /*!< X coord of the calling node.        */
+  uint8_t      nodeYCoordinate;      /*!< Y coord of the calling node.        */
+  uint8_t      nodeZCoordinate;      /*!< Z coord of the calling node.        */
+  uint8_t      xNodes;               /*!< X dimension of the block.           */
+  uint8_t      yNodes;               /*!< Y dimension of the block.           */
+  uint8_t      zNodes;               /*!< Z dimension of the block.           */
+}
+DMA_PersonalityInfo_t;
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Message
+ *
+ * A torus direct put message is one that is sent to another node and its data
+ * is directly put into memory by the DMA on the destination node...it does
+ * not go into a reception fifo.
+ *
+ * A torus direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+ */
+int  DMA_TorusDirectPutDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         x,
+				  unsigned int         y,
+				  unsigned int         z,
+				  unsigned int         hints,
+				  unsigned int         vc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_ctr_grp_id,
+				  unsigned int         recv_ctr_id,
+				  unsigned int         recv_offset,
+				  unsigned int         msg_len
+				 );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Direct Put Message
+ *
+ * A local direct put message is one that is targeted within the same node, and
+ * its data is directly put into memory by the DMA...it does not go into a
+ * reception fifo.  This is essentially a memcpy via DMA.
+ *
+ * A local direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id".
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length..  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       The only use for the pid bits is for debug, ie, if headers are
+ *       being saved.
+ */
+int  DMA_LocalDirectPutDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_ctr_grp_id,
+				  unsigned int         recv_ctr_id,
+				  unsigned int         recv_offset,
+				  unsigned int         msg_len
+				 );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message
+ *
+ * A local prefetch is one in which the DMA simply prefetches the send buffer
+ * into L3.
+ *
+ * A local prefetch DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 1
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the message being
+ *                       prefetched.  This counter contains the base address of
+ *                       the message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = 0 (not used).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ */
+int  DMA_LocalPrefetchOnlyDescriptor(
+				     DMA_InjDescriptor_t *desc,
+				     unsigned int         inj_ctr_grp_id,
+				     unsigned int         inj_ctr_id,
+				     unsigned int         send_offset,
+				     unsigned int         msg_len
+				    );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Remote-Get Message
+ *
+ * A torus remote-get message is one that is sent to another node and its data
+ * is directly put by the DMA into an injection fifo on the destination
+ * node...it does not go into a reception fifo.  Therefore, the payload of this
+ * message is one (or more) descriptors for another message that is to be sent
+ * back to the originating node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor.  Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * It is also assumed that the payload is NOT checksummed, since it is not
+ * always reproducible.  Things like idma_counterId and base_offset may be
+ * different on another run, making checksumming inconsistent.
+ *
+ * A torus remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = 32.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used because Sk is 1).
+ *   - Sk              = 1 (do not checksum this packet).
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_inj_fifo_id" (see note).
+ *   - Chunks          = Set to (2)-1 = 1.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 1.
+ *   - iDMA_Fifo_ID    = Injection fifo ID where the payload will be injected.
+ *                       Set based on caller's recv_inj_ctr_grp_id and
+ *                       recv_inj_ctr_id.
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_inj_fifo_grp_id  Injection fifo group ID where payload
+ *                                       will be injected on destination node
+ *                                       (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in]      recv_inj_fifo_id      Injection fifo ID (within the
+ *                                       recv_inj_fifo_grp_id group)
+ *                                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_inj_fifo_grp_id:
+ *       - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+ */
+int  DMA_TorusRemoteGetDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         x,
+				  unsigned int         y,
+				  unsigned int         z,
+				  unsigned int         hints,
+				  unsigned int         vc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_inj_fifo_grp_id,
+				  unsigned int         recv_inj_fifo_id
+				 );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Remote-Get Message
+ *
+ * A local remote-get message is one whose data is directly put by the DMA into
+ * an injection fifo on the local node...it does not go into a reception fifo.
+ * Therefore, the payload of this message is one (or more) descriptors for
+ * another message that is to be injected on the local node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor.  Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * A local remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = 32.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_inj_fifo_id" (see note).
+ *   - Chunks          = Set to (2)-1 = 1.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 1.
+ *   - iDMA_Fifo_ID    = Injection fifo ID where the payload will be injected.
+ *                       Set based on caller's inj_ctr_grp_id and inj_ctr_id.
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_inj_fifo_grp_id  Injection fifo group ID where payload
+ *                                       will be injected on local node
+ *                                       (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in]      recv_inj_fifo_id      Injection fifo ID (within the
+ *                                       recv_inj_fifo_grp_id group)
+ *                                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ *       hardware packet header are determined by the recv_inj_fifo_grp_id:
+ *       - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *
+ */
+int  DMA_LocalRemoteGetDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_inj_fifo_grp_id,
+				  unsigned int         recv_inj_fifo_id
+				 );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Message
+ *
+ * A torus memory fifo message is one that is sent to another node and its data
+ * is put into a reception memory fifo by the DMA on the destination node.
+ *
+ * A torus memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_fifo_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on the destination node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on the
+ *                                  destination node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusMemFifoDescriptor(
+				DMA_InjDescriptor_t *desc,
+				unsigned int         x,
+				unsigned int         y,
+				unsigned int         z,
+				unsigned int         recv_fifo_grp_id,
+				unsigned int         hints,
+				unsigned int         vc,
+				unsigned int         sw_arg,
+				unsigned int         function_id,
+				unsigned int         inj_ctr_grp_id,
+				unsigned int         inj_ctr_id,
+				unsigned int         send_offset,
+				unsigned int         msg_len
+			       );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Memory Fifo Message
+ *
+ * A local memory fifo message is one whose data is put into a reception
+ * memory fifo on the same node by the DMA.
+ *
+ * A local memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = Set based on caller's "recv_fifo_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on this local node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on this
+ *                                  local node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+*/
+int  DMA_LocalMemFifoDescriptor(
+				DMA_InjDescriptor_t *desc,
+				unsigned int         recv_fifo_grp_id,
+				unsigned int         sw_arg,
+				unsigned int         function_id,
+				unsigned int         inj_ctr_grp_id,
+				unsigned int         inj_ctr_id,
+				unsigned int         send_offset,
+				unsigned int         msg_len
+			       );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message
+ *
+ * A torus direct put broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data
+ * is directly put into memory on the nodes along that line by the DMA on those
+ * nodes...it does not go into a reception fifo.  Only one hint bit can be
+ * specified, dictating the direction (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum.  Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus direct-put broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = 0 (Deterministic).
+ *   - VC              = Virtual Channel: Deterministic Bubble Normal.
+ *   - X,Y,Z           = Set according to the hints:
+ *                       Two of the directions are set to this node's
+ *                       coordinates (no movement in those directions).
+ *                       One direction is set to the dest specified
+ *                       by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      dest             The final torus destination coordinate
+ *                                  along the line specified by the hints.
+ *                                  Should not exceed the number of nodes in
+ *                                  the direction of travel.
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Only one bit may be specified.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusDirectPutBcastDescriptor(
+				       DMA_InjDescriptor_t *desc,
+				       unsigned int         dest,
+				       unsigned int         hints,
+				       unsigned int         inj_ctr_grp_id,
+				       unsigned int         inj_ctr_id,
+				       unsigned int         send_offset,
+				       unsigned int         recv_ctr_grp_id,
+				       unsigned int         recv_ctr_id,
+				       unsigned int         recv_offset,
+				       unsigned int         msg_len
+				      );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message
+ *
+ * A torus memory fifo broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data is
+ * put into a reception memory fifo by the DMA on the destination nodes along
+ * that line.  Only one hint bit can be specified, dictating the direction
+ * (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum.  Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus memory fifo broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_fifo_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = 0 (Deterministic).
+ *   - VC              = Virtual Channel: Deterministic Bubble Normal.
+ *   - X,Y,Z           = Set according to the hints:
+ *                       Two of the directions are set to this node's
+ *                       coordinates (no movement in those directions).
+ *                       One direction is set to the dest specified
+ *                       by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on the destination node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      dest             The final torus destination coordinate
+ *                                  along the line specified by the hints.
+ *                                  Should not exceed the number of nodes in
+ *                                  the direction of travel.
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Only one bit may be specified.
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on the
+ *                                  destination node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusMemFifoBcastDescriptor(
+				     DMA_InjDescriptor_t *desc,
+				     unsigned int         dest,
+				     unsigned int         recv_fifo_grp_id,
+				     unsigned int         hints,
+				     unsigned int         sw_arg,
+				     unsigned int         function_id,
+				     unsigned int         inj_ctr_grp_id,
+				     unsigned int         inj_ctr_id,
+				     unsigned int         send_offset,
+				     unsigned int         msg_len
+				    );
+
+
+/*!
+ * \brief Set or Change the Hint Bits in a Fifo Descriptor
+ *
+ * \param[in,out]  desc   Pointer to descriptor to be set or changed.
+ * \param[in]      hints  Hint bits to be set.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetHints(
+			     DMA_InjDescriptor_t *desc,
+			     unsigned int         hints
+			    )
+{
+  SPI_assert( desc != NULL );
+  desc->hwHdr.Hint = hints;
+
+}
+
+
+/*!
+ * \brief Set or Change the Virtual Channel and Dynamic Bit in a Descriptor
+ *
+ * \param[in,out]  desc  Pointer to descriptor to be set or changed.
+ * \param[in]      vc    Input virtual channel
+ *                       - 0 = Virtual channel dynamic 0
+ *                       - 1 = Virtual channel dynamic 1
+ *                       - 2 = Virtual channel deterministic bubble
+ *                       - 3 = Virtual channel deterministic priority
+ *
+ * \return None
+ *
+ * \post The Dynamic bit is set according to the specified virtual channel.
+ *
+ */
+__INLINE__ void DMA_SetVc(
+			  DMA_InjDescriptor_t *desc,
+			  unsigned int         vc
+			 )
+{
+  SPI_assert( desc != NULL );
+
+  switch(vc) {
+   case DMA_PACKET_VC_D0:
+   case DMA_PACKET_VC_D1:
+     desc->hwHdr.Dynamic =1;
+     break;
+
+   case DMA_PACKET_VC_BN:
+   case DMA_PACKET_VC_BP:
+    desc->hwHdr.Dynamic =0;
+    break;
+
+   default:
+     SPI_assert(0);
+  }
+  desc->hwHdr.VC = vc;
+
+}
+
+
+/*!
+ * \brief Set Descriptor Pid Bits
+ *
+ * Given a pointer to the descriptor and the receive-side counter group number,
+ * set the Pid0 and Pid1 bits in the torus hardware header portion of the
+ * descriptor.
+ *
+ * \param[in]  desc  Pointer to injection descriptor
+ * \param[in]  g     Reception-side counter group number
+ *                   (0 through DMA_NUM_COUNTER_GROUPS).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetDescriptorPids(
+				      DMA_InjDescriptor_t *desc,
+				      unsigned int         g
+				     )
+{
+  /* Set the pid bits according to the group id g */
+  desc->hwHdr.Pid0 = _GN(g,30);
+  desc->hwHdr.Pid1 = _GN(g,31);
+/* ---------------------------------
+  The above code performs the following:
+
+  switch(g) {
+  case 0:
+    desc->hwHdr.Pid0      = 0;
+    desc->hwHdr.Pid1      = 0;
+    break;
+
+  case 1
+    desc->hwHdr.Pid0      = 0;
+    desc->hwHdr.Pid1      = 1;
+    break;
+
+  case 2
+    desc->hwHdr.Pid0      = 1;
+    desc->hwHdr.Pid1      = 0;
+    break;
+
+  case 3
+    desc->hwHdr.Pid0      = 1;
+    desc->hwHdr.Pid1      = 1;
+    break;
+
+  default:
+    SPI_assert(0);
+
+  }
+  --------------------------------- */
+}
+
+
+/*!
+ * \brief Set or Change the Number of Chunks in a Fifo Descriptor
+ *
+ * \param[in,out]  desc           Pointer to the descriptor to be set or
+ *                                changed.
+ * \param[in]      packet_chunks  Number of 32B chunks in the packet
+ *                                (1 through 8).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetChunks(
+			      DMA_InjDescriptor_t *desc,
+			      unsigned int         packet_chunks
+			     )
+{
+  SPI_assert( desc != NULL );
+  SPI_assert( packet_chunks >=1);
+  SPI_assert( packet_chunks <=8);
+  desc->hwHdr.Chunks = (packet_chunks-1) ;
+}
+
+
+/*!
+ * \brief Set or Change the Message Length in a Fifo Descriptor
+ *
+ * \param[in,out]  desc     Pointer to the descriptor to be set or changed.
+ * \param[in]      msg_len  Number of bytes in the payload of the message.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetMessageLength(
+				     DMA_InjDescriptor_t *desc,
+				     unsigned int msg_len
+				    )
+{
+  SPI_assert( desc != NULL );
+
+  desc->msg_length= msg_len;
+}
+
+
+/*!
+ * \brief Change the Checksum Characteristics in a Fifo Descriptor
+ *
+ * \param[in,out]  desc       Pointer to the descriptor to be changed.
+ * \param[in]      csum_skip  The number of 2-bytes to skip in the checksum
+ *                            (7 bits).
+ * \param[in]      skip       The checksum skip attribute:
+ *                            0 = The packets participates in the injection
+ *                                checksum.
+ *                            1 = The packet does not participate in the
+ *                                injection checksum.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetInjCsum(
+			       DMA_InjDescriptor_t *desc,
+			       unsigned int         csum_skip,
+			       unsigned int         skip
+			      )
+{
+  SPI_assert( desc != NULL );
+  SPI_assert( skip <=1 );
+
+  desc->hwHdr.CSum_Skip = csum_skip;
+  desc->hwHdr.Sk        = skip;
+
+}
+
+
+/*!
+ * \brief Determine the Number of Packet Chunks for the First Packet of a
+ *        Message
+ *
+ * Compute the best (largest) packet size in units of 32B chunks given the
+ * message length.
+ *
+ * \param[in]  msg_len  Message length
+ *
+ * \retval  numPacketChunks  Number of 32B chunks needed in the first packet
+ *                           of a message whose length is msg_len.
+ *                           This will be a number from 1 through 8.
+ * \retval  0                This is considered an error, resulting from a
+ *                           msg_len = 0.  The DMA must send at least 1 byte.
+ */
+__INLINE__ int  DMA_PacketChunks(
+				 unsigned msg_len
+				)
+{
+  /* Do most common case first */
+  if (msg_len > 208) return 8;
+
+  /* Error case...the DMA must send at least one byte of data */
+  SPI_assert( msg_len > 0);
+
+  /* Basically add in the packet header and round to 32B multiple */
+  {
+  int chunks = ( msg_len - 1 + sizeof(DMA_PacketHeader_t) ) / 32;
+  return (1+chunks);
+  }
+
+}
+
+
+/*!
+ * \brief Zero Out All Fields a Descriptor
+ *
+ * \param[in]  desc  Pointer to descriptor to be zero'd.
+ *
+ * \post The descriptor is zero'd.
+ *
+ */
+__INLINE__ void  DMA_ZeroOutDescriptor(
+				       DMA_InjDescriptor_t *desc
+				      )
+{
+  /*
+   * Possible optimizations:
+   * There are 32 bytes in the descriptor and it should be L1 aligned.
+   * SPI_assert(( desc & 0x000000FF) == 0); // check alignment, not needed if can't
+   *                                    // easily use double hummer.
+   * _bgp_dcache_zero_line(desc);       //Not allowed with SWOA
+   * Should be a better way to do this.
+   */
+
+  SPI_assert( desc != NULL );
+
+  {
+  int *addr = (int *) desc ;
+
+  /* Generates 8 stw's */
+  addr[0] = 0;
+  addr[1] = 0;
+  addr[2] = 0;
+  addr[3] = 0;
+  addr[4] = 0;
+  addr[5] = 0;
+  addr[6] = 0;
+  addr[7] = 0;
+  }
+
+}
+
+
+
+/*!
+ * \brief Update the Offset and Length in a Descriptor
+ *
+ * \param[in]  desc    Pointer to descriptor to be updated.
+ * \param[in]  offset  The new offset value.
+ * \param[in]  length  The new length value.
+ *
+ * \post The descriptor is updated.
+ *
+ */
+__INLINE__ void DMA_DescriptorUpdateOffsetLength (DMA_InjDescriptor_t *desc,
+						  unsigned offset,
+						  unsigned length)
+{
+  desc->base_offset = offset;
+  desc->msg_length  = length;
+}
+
+
+
+/*!
+ * \brief Set the Put Offset in a Descriptor
+ *
+ * This sets the "put_offset" field of the software packet header in the
+ * provided descriptor.  This field is placed into the packet header by
+ * the DMA.  In the first packet, this field is placed into the packet
+ * unchanged.  In each subsequent packet, the DMA adds to this field
+ * the number of payload bytes from the previous packet.
+ *
+ * \param[in]  desc    Pointer to descriptor.
+ * \param[in]  offset  The offset value to be set.
+ *
+ * \post The Put Offset in the descriptor is set.
+ *
+ */
+__INLINE__ void DMA_DescriptorSetPutOffset (DMA_InjDescriptor_t *desc,
+					    unsigned offset)
+{
+  desc->hwHdr.Put_Offset = offset;
+}
+
+__END_DECLS
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Fifo.h b/arch/powerpc/include/spi/DMA_Fifo.h
new file mode 100644
index 0000000..c8e7f9e
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Fifo.h
@@ -0,0 +1,1011 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef	_DMA_FIFO_H_ /* Prevent multiple inclusion */
+#define	_DMA_FIFO_H_
+
+
+/*!
+ * \file spi/DMA_Fifo.h
+ *
+ * \brief DMA SPI Fifo Definitions and Inline Functions Common to Injection
+ *        and Reception Fifos
+ *
+ * This include file contains data structures and inline functions that are
+ * common among injection and reception fifos.  The inlines are used to
+ * interface with the fifos at the lowest level.
+ *
+ * There are two levels of access:  hardware and software.  For direct
+ * hardware access, the DMA_FifoHW_t structure describes fields that reside
+ * in the "hardware fifo" in DMA SRAM.  For normal software access, the
+ * DMA_Fifo_t structure contains a pointer to the hardware structure,
+ * shadows (snapshot copies) of the fields in the hardware structure, and
+ * size information calculated from the shadows.
+ *
+ * \verbatim Picture of fifo structures
+
+   ========DDR MEMORY===================|==========DMA SRAM MEMORY==========
+   ------------------------------       |
+   | DMA_Fifo_t                 |       |
+   |                            |       |
+   |   Software Fifo            |       |
+   |                            |       |
+   |                            |       |     -----------------------------
+   |   fifo_hw_ptr--------------|-------|---->| DMA_FifoHW_t              |
+   |                            |       |     |                           |
+   |                            |       |     |   Hardware Fifo           |
+   |   Shadow Pointers          |       |     -----------------------------
+   |             .              |       |
+   ------------------------------       |
+
+   \endverbatim
+ *
+ * For normal messaging software, one should access the DMA using the
+ * DMA_Fifo_t, DMA_InjFifo_t, or DMA_RecFifo_t structures since
+ * they maintain shadows.  This include file contains inline functions that
+ * operate on the DMA_Fifo_t for this purpose.  Functions include:
+ * - get va_start, va_head, va_tail, va_end, fifo size, fifo free_space
+ * - set va_head, va_tail
+ * - update fifo free-space based upon current shadows
+ *
+ * However, for bringup or diagnostic software, there is a need for direct
+ * access to the hardware fifos.  This include file contains functions that
+ * operate on the DMA_FifoHW_t for this purpose.  Functions include:
+ * - get pa_start, pa_head, pa_tail, pa_end
+ * - set pa_start, pa_head, pa_tail, pa_end
+ * While it probably doesn't make sense to have a stand-alone
+ * DMA_FifoSetStartPa() or DMA_FifoSetEndPa() since this dynamically
+ * messes up the fifo, causing unpredictable results.  But bringup or
+ * diagnostic software will need this (with dma disabled, or the fifo
+ * disabled).  Therefore we provide direct interfaces using physical
+ * addresses and no shadows (for speed).
+ *
+ * Definitions:
+ * - A fifo represents a contiguous block of DDR memory
+ * - A fifo has a starting address and an ending address (defines the memory
+ *   block)
+ * - An injection fifo is a series of 32-byte descriptors.
+ * - Injection consists of copying a 32-byte descriptor into the next available
+ *   slot (pointed to by the tail) and incrementing the tail pointer.
+ * - The DMA engine asynchronously processes descriptors, beginning with the
+ *   descriptor pointed to by head, and ending with the descriptor just prior
+ *   to tail.
+ * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos
+ *   (separate interfaces)
+ * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups
+ * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group
+ * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node
+ * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups
+ * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group
+ * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node
+ * - A "shadow" refers to a copy of the elements of the fifo (start, end, head,
+ *   tail) that is maintained by these inline functions.  The shadows may be
+ *   used to calculate other values such as free space.  The shadows are updated
+ *   by these inlines whenever the hardware fifo is read or written.
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ *       mbar and msync.
+ *
+ *       MBAR is used to make sure that all writes to memory issued by the
+ *       calling core have been accepted by the memory system before
+ *       continuing.  This guarantees that writes and reads to/from different
+ *       addresses to go in defined order.
+ *
+ *       MBAR EXAMPLE 1:  When a store is done to DMA SRAM, it may not complete
+ *       for a period of time.  If a counter value is set, and then an injection
+ *       fifo tail pointer is set, DMA may see the tail pointer update and begin
+ *       the operation before the counter value has been set.  Inserting an mbar
+ *       between the setting of the counter and the setting of the tail pointer
+ *       guarantees that the counter will be set before the tail pointer is
+ *       updated.
+ *
+ *       MBAR EXAMPLE 2:  A counter hits zero.  We process the hit-zero and write
+ *       a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ *       status (different address).  The hit-zero status will still indicate
+ *       that it hit zero, even though we have already processed it, unless an
+ *       mbar is inserted between clearing the hit-zero and reading the hit-zero
+ *       status.
+ *
+ *       MBAR PHILOSOPHY:  After DMA SRAM is updated in the DMA inline functions,
+ *       they always do at least an mbar (possibly an msync instead...see below).
+ *
+ *       MSYNC does what mbar does, plus ensures consistency across cores.  That
+ *       is, it waits for snoops (invalidations of L1 cache) on the other cores
+ *       to complete before continuing.  This guarantees that all of the cores
+ *       will see a consistent view of memory after the msync.
+ *
+ *       MSYNC EXAMPLE:  When a reception counter has hit zero, we assume the
+ *       DMA'd data is available to be read by any core.  However, old copies of
+ *       that data may still be in the L1 caches.  Inserting an msync after
+ *       detecting that a counter has hit zero guarantees that the old data has
+ *       been removed from the L1 caches.
+ *
+ *       MSYNC PHILOSOPHY:  After the inline functions detect that a counter has
+ *       hit zero, they always do an msync.
+ *
+ *       SPECULATIVE EXECUTION OF MSYNC:  There are cases where msync is done
+ *       conditionally.  The CPU will begin execution of both sides of the
+ *       condition before the result of the condition has been determined.
+ *       Then, it will cancel the execution of one side once the result of the
+ *       condition has been determined.  This speculation is unwanted when
+ *       the first instruction on one side of the condition is msync because
+ *       cancelling an msync is similar to executing the complete msync.
+ *       To avoid this speculative execution of msync, we call
+ *       _bgp_msync_nonspeculative().  This will trick the CPU so it won't begin
+ *       the msync until the result of the condition is known.
+ *
+ *       CALLER ADVICE:  Users of these functions should not need to do
+ *       mbar/msync themselves, unless they are doing something like the
+ *       following:  Read a counter and operate on the result when the counter
+ *       hasn't reached zero.  The caller will need to perform an msync after
+ *       reading the counter in order to ensure that snoops have completed
+ *       on all CPUs before operating on the DMA'd data.
+ */
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ *   placed in the translation unit (eg. xxx.c)for use within that unit.
+ *   The function is not externalized for use by another unit...we want this
+ *   so we don't end up with multiple units exporting the same function,
+ *   which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+   \verbatim
+   #ifndef INLINE
+   # define INLINE extern inline
+   #endif
+   INLINE int max(int a, int b) {
+     return a > b ? a : b;
+   }
+   \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+   \verbatim
+   #define INLINE
+   #include "header.h"
+   \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <spi/DMA_Assert.h>
+#include <spi/kernel_interface.h>
+
+
+
+/*!
+ * \brief Number of fifo groups
+ */
+#define DMA_NUM_FIFO_GROUPS 4
+
+
+/*!
+ * \brief Hardware DMA Fifo
+ *
+ * This maps the hardware fifo (the DMA SRAM) for a fifo.  These fields are
+ * common for injection and reception fifos.
+ *
+ * The fifo represents a physically contiguous block of memory.
+ *
+ */
+typedef struct DMA_FifoHW_t
+{
+  volatile unsigned  pa_start; /*!< RW fifo start address.
+                                       16B-aligned 4-bit shifted address.     */
+
+  volatile unsigned  pa_end;   /*!< RW fifo end address.
+                                       16B-aligned 4-bit shifted address.     */
+
+  volatile unsigned  pa_head;  /*!< RW fifo head pointer.
+                                       16B-aligned 4-bit shifted address.
+                                       Injection fifo head moved by DMA.
+                                       Reception fifo head moved by cores.
+                                       Remote get injection fifo head moved
+                                       by DMA.                                */
+
+  volatile unsigned  pa_tail;  /*!< RW fifo tail pointer.
+                                       16B-aligned 4-bit shifted address.
+                                       Injection fifo tail moved by cores.
+                                       Reception fifo tail moved by DMA.
+                                       Remote get injection fifo tail moved
+                                       by DMA.                                */
+}
+DMA_FifoHW_t;
+
+
+/*!
+ * \brief Software DMA Fifo structure
+ *
+ * This structure contains a pointer to the hardware fifo, and other fields that
+ * describe software's view of the fifo.  These fields are common for injection
+ * and reception fifos.
+ *
+ * \todo   Some more careful thought should be given how to group these so as to
+ *         get best memory system performance.
+ *         eg.  Probably want to ALIGN_L3_CACHE the fifo_hw_ptr.
+ *         Might want to have an assert to check that sizeof( DMA_Fifo_t)
+ *         is 32.
+ *         COMMENT:  I think below definition puts the entire structure in one
+ *                   L1 line.
+ */
+typedef struct DMA_Fifo_t
+{
+  DMA_FifoHW_t *fifo_hw_ptr;     /*!< Pointer to hardware fifo.               */
+
+  unsigned int free_space;       /*!< Shadow of how much free space is in the
+                                      fifo, in units of 16B quads.            */
+
+  unsigned int fifo_size;        /*!< Shadow of how much total space is in the
+                                      fifo, in units of 16B quads.            */
+
+  unsigned int pa_start;         /*!< Physical address of the start. (shadow)
+                                      16B-aligned 4-bit shifted address.
+                                      Enables simple calculation of va_head,
+                                      va_tail, and va_end.                    */
+  /*!
+   * \note  The following 4 fields are shadows of the hardware fifo.
+   *        They should be in the same L1 cache line for performance.
+   *        They are updated by the inline functions in this file upon each
+   *        read or write to the fifo.
+   */
+  void *va_start;                /*!< Shadow of the virtual address start of
+                                      the fifo.  Must be 32B aligned.         */
+
+  void *va_head;                 /*!< Shadow of the virtual address head of
+                                      the fifo.                               */
+
+  void *va_tail;                 /*!< Shadow of the virtual address tail of
+                                      the fifo.                               */
+
+  void *va_end;                  /*!< Shadow of the virtual address end  of
+                                      the fifo.  Must be 32B aligned.         */
+
+}
+/*!
+ * With above, there should be 8 fields x 4 bytes/field = 32 bytes in the
+ * structure.  Below ensures these 32 bytes are in the same cache line.
+ */
+ALIGN_L1D_CACHE DMA_Fifo_t;
+
+/*
+ *------------------------------------------------------------------------------
+ * The following functions operate on fields in the hardware and software fifo
+ * structures.
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Update DMA Fifo Free Space from the Shadow
+ *
+ * Force a recalculation of a DMA fifo's amount of free space, given a software
+ * fifo structure.
+ *
+ * \param[in]  f_ptr      Pointer to the software fifo structure
+ *
+ * \return  None
+ *
+ * \note  WARNING:  The calculation is based on the current shadow values of the
+ *                  head and tail, not the actual hardware values.
+ *
+ */
+__INLINE__ void DMA_FifoUpdateFreeSpaceFromShadow(
+						  DMA_Fifo_t *f_ptr
+						 )
+{
+  SPI_assert( f_ptr           != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  /*
+   * Recompute the amount of free space in the fifo, given the current shadows.
+   */
+
+  if ( f_ptr->va_tail >= f_ptr->va_head)
+    {
+      f_ptr->free_space = f_ptr->fifo_size -
+	                    ( ( (unsigned)(f_ptr->va_tail) -
+			        (unsigned)(f_ptr->va_head) ) >> 4 );
+    }
+  else
+    {
+      f_ptr->free_space = ( (unsigned)(f_ptr->va_head) -
+                            (unsigned)(f_ptr->va_tail) ) >> 4;
+    }
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Start Virtual Address from the Shadow
+ *
+ * Get a DMA fifo's "start" virtual address, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_start  The virtual address of the start of the fifo
+ *
+ * \note WARNING: This function does not read the DMA SRAM, but instead returns
+ *                the current shadow va_start.  To actually issue a read to the
+ *                DMA, use DMA_FifoGetStartPa().
+ */
+__INLINE__ void * DMA_FifoGetStartFromShadow(
+					     DMA_Fifo_t *f_ptr
+					    )
+{
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  return  f_ptr->va_start;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Head Virtual Address
+ *
+ * Get a DMA fifo's "head" virtual address, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_head  The virtual address of the head of the fifo
+ *
+ * \post va_head is recalculated from the current hardware head, updated in
+ *       the software fifo structure, and returned.  Additionally, the free
+ *       space in the software fifo structure is updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetHead(
+				  DMA_Fifo_t *f_ptr
+				 )
+{
+  unsigned int val;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  /* Read the DMA to get the head.
+   * Recompute va_head based upon the va_start and the current hardware head.
+   * Update free_space.
+   */
+
+  val = f_ptr->fifo_hw_ptr->pa_head;
+
+  f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start +
+			    ( ( val - f_ptr->pa_start ) << 4 ) );
+
+  DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+  return  f_ptr->va_head;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Head Virtual Address Without Updating Free Space
+ *
+ * Get a DMA fifo's "head" virtual address, given a software fifo structure,
+ * without updating the fifo's free space.  It is up to the caller to ensure
+ * this update occurs later, if necessary.
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_head  The virtual address of the head of the fifo
+ *
+ * \post va_head is recalculated from the current hardware head, updated in
+ *       the software fifo structure, and returned.
+ *
+ */
+__INLINE__ void * DMA_FifoGetHeadNoFreeSpaceUpdate(
+						   DMA_Fifo_t *f_ptr
+						  )
+{
+  unsigned int val;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  /* Read the DMA to get the head.
+   * Recompute va_head based upon the va_start and the current hardware head.
+   */
+
+  val = f_ptr->fifo_hw_ptr->pa_head;
+
+  f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start +
+			    ( ( val - f_ptr->pa_start ) << 4 ) );
+
+  return  f_ptr->va_head;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_tail  The virtual address of the tail of the fifo
+ *
+ * \post va_tail is recalculated from the current hardware tail, updated in
+ *       the software fifo structure, and returned.  Additionally, the free
+ *       space in the software fifo structure is updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTail(
+				  DMA_Fifo_t *f_ptr
+				 )
+{
+  unsigned int val;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  /* Read the DMA to get the tail.
+   * Recompute va_tail based upon the va_start and the current hardware tail.
+   * Update free_space.
+   */
+
+  val = f_ptr->fifo_hw_ptr->pa_tail;
+
+  f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start +
+			    ( ( val - f_ptr->pa_start ) << 4 ) );
+
+  DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+  return  f_ptr->va_tail;
+
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address Without Updating Free Space
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure,
+ * without updating the fifo's free space.  It is up to the caller to
+ * invoke DMA_FifoUpdateFreeSpaceFromShadow() at a later time.
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_tail  The virtual address of the tail of the fifo
+ *
+ * \post va_tail is recalculated from the current hardware tail, updated in
+ *       the software fifo structure, and returned.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTailNoFreeSpaceUpdate(
+						   DMA_Fifo_t *f_ptr
+						  )
+{
+  unsigned int val;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  /* Read the DMA to get the tail.
+   * Recompute va_tail based upon the va_start and the current hardware tail.
+   */
+
+  val = f_ptr->fifo_hw_ptr->pa_tail;
+
+  f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start +
+			    ( ( val - f_ptr->pa_start ) << 4 ) );
+
+  return  f_ptr->va_tail;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address from Shadow
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_tail  The virtual address of the tail of the fifo
+ *
+ * \post va_tail is obtained from the shadow, NOT recalculated from the
+ *       current hardware tail.  The free space in the software fifo
+ *       structure is NOT updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTailFromShadow(
+					    DMA_Fifo_t *f_ptr
+					   )
+{
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  return  f_ptr->va_tail;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo End Virtual Address from the Shadow
+ *
+ * Get a DMA fifo's "end" virtual address, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  va_end  The virtual address of the end of the fifo
+ *
+ * \note WARNING: This function does not read the DMA SRAM, but instead returns
+ *                the current shadow va_end.  To actually issue a read to the
+ *                DMA, use DMA_FifoGetEndPa().
+ */
+__INLINE__ void * DMA_FifoGetEndFromShadow(
+					   DMA_Fifo_t *f_ptr
+					  )
+{
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  return  f_ptr->va_end;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Size
+ *
+ * Get a DMA fifo's size, given a software fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the software fifo structure
+ *
+ * \retval  size  The size of the DMA fifo, in units of 16B quads.
+ *
+ * \note WARNING: This function does not calculate the size based on the DMA
+ *                SRAM's current start and end values, but instead returns the
+ *                size that was calculated when the fifo was initialized.
+ */
+__INLINE__ unsigned int DMA_FifoGetSize(
+					DMA_Fifo_t *f_ptr
+				       )
+{
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+  return  f_ptr->fifo_size;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Free Space With No Update Calculation
+ *
+ * Get a DMA fifo's amount of free space, given a software fifo structure.
+ * Do not perform update calculations.
+ *
+ * \param[in]  f_ptr      Pointer to the software fifo structure
+ *
+ * \retval  freeSpace  The amount of free space in the fifo, in units of
+ *                     16B quads.
+ */
+__INLINE__ unsigned int DMA_FifoGetFreeSpaceNoUpdateCalculation(
+                                             DMA_Fifo_t   *f_ptr
+                                            )
+{
+  SPI_assert( f_ptr              != NULL );
+
+  return f_ptr->free_space;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Free Space
+ *
+ * Get a DMA fifo's amount of free space, given a software fifo structure
+ *
+ * \param[in]  f_ptr      Pointer to the software fifo structure
+ * \param[in]  read_head  Indicates whether to read the head from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware head
+ *                          - 0 means to use the current head shadow
+ * \param[in]  read_tail  Indicates whether to read the tail from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware tail
+ *                          - 0 means to use the current tail shadow
+ *
+ * \retval  freeSpace  The amount of free space in the fifo, in units of
+ *                     16B quads.
+ *
+ * \note If both read_head and read_tail are false, the amount of free space is
+ *       calculated based on the current shadow values of head and tail.
+ */
+__INLINE__ unsigned int DMA_FifoGetFreeSpace(
+					     DMA_Fifo_t   *f_ptr,
+					     unsigned int  read_head,
+					     unsigned int  read_tail
+					    )
+{
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+  SPI_assert( read_head == 1 || read_head == 0 );
+  SPI_assert( read_tail == 1 || read_tail == 0 );
+
+  /*
+   * If both read_head and read_tail are 0, return the current shadow.
+   * If read_head != 0, read the head of the fifo first and recompute free space.
+   * If read_tail != 0, read the tail of the fifo first and recompute free space.
+   */
+
+  if ( (read_head == 0) && ( read_tail == 0) )
+    DMA_FifoUpdateFreeSpaceFromShadow( f_ptr);
+  else
+    {
+      if ( read_head == 1) DMA_FifoGetHead(f_ptr);    /* This does an update    */
+                                                      /* of the free space.     */
+      if ( read_tail == 1) DMA_FifoGetTail(f_ptr);    /* This does an update    */
+                                                      /* of the free space.     */
+    }
+
+  return f_ptr->free_space;
+
+}
+
+
+/*!
+ * \brief Set DMA Fifo Head
+ *
+ * Set a DMA fifo's "head", given a software fifo structure
+ *
+ * \param[in]  f_ptr    Pointer to the software fifo structure
+ * \param[in]  va_head  Virtual address of the head to be set
+ *
+ * \return  None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ *       and the fifo free space is recalculated.
+ *
+ * \note Normally, for an injection fifo, the dma manipulates the head, but in
+ *       optimized persistant communications the core can do it if it is sure
+ *       the fifo is empty at the time this is called.
+ */
+__INLINE__ void DMA_FifoSetHead(
+				DMA_Fifo_t *f_ptr,
+				void       *va_head
+			       )
+{
+  unsigned int pa_head;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+  SPI_assert( va_head >= f_ptr->va_start &&
+	   va_head <  f_ptr->va_end );
+
+  /*
+   * Calculate new pa_head based on the shadow pa_start and va_start.
+   */
+  pa_head = f_ptr->pa_start + ( ( (unsigned)va_head -
+				  (unsigned)f_ptr->va_start ) >> 4 );
+
+  /*
+   * Set the hardware head
+   */
+  f_ptr->fifo_hw_ptr->pa_head = pa_head;
+  _bgp_mbar();
+
+  /*
+   * Update the software fifo structure's head and free space.
+   */
+  f_ptr->va_head             = va_head;
+
+  DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+}
+
+
+/*!
+ * \brief Set DMA Fifo Tail
+ *
+ * Set a DMA fifo's "tail", given a software fifo structure
+ *
+ * \param[in]  f_ptr    Pointer to the software fifo structure
+ * \param[in]  va_tail  Virtual address of the tail to be set
+ *
+ * \return  None
+ *
+ * \post va_tail is set in both the hardware and software fifo structures,
+ *       and the fifo free space is recalculated.
+ *
+ */
+__INLINE__ void DMA_FifoSetTail(
+				DMA_Fifo_t *f_ptr,
+				void       *va_tail
+			       )
+{
+  unsigned int pa_tail;
+
+  SPI_assert( f_ptr              != NULL );
+  SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+  SPI_assert( va_tail >= f_ptr->va_start &&
+	   va_tail <  f_ptr->va_end );
+
+  /*
+   * Calculate new pa_tail based on the shadow pa_start and va_start.
+   */
+  pa_tail = f_ptr->pa_start + ( ( (unsigned)va_tail -
+				  (unsigned)f_ptr->va_start ) >> 4 );
+
+  /*
+   * Set the hardware tail
+   */
+  f_ptr->fifo_hw_ptr->pa_tail = pa_tail;
+  _bgp_mbar();
+
+  /*
+   * Update the software fifo structure's tail and free space.
+   */
+  f_ptr->va_tail             = va_tail;
+
+  DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+}
+
+
+
+
+/*
+ *------------------------------------------------------------------------------
+ * The following functions operate directly on the hardware fifo.  Normally,
+ * users should use the software fifo routines (previously defined), but for
+ * bringup or diagnostics, it may be desirable to use these.
+ *------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Start
+ *
+ * Set a DMA fifo's "start", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ * \param[in]  pa_start     Physical address of the start to be set.
+ *                          16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetStartPa(
+				   DMA_FifoHW_t *fifo_hw_ptr,
+				   unsigned int  pa_start
+				  )
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  fifo_hw_ptr->pa_start = pa_start;
+
+  _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Head
+ *
+ * Set a DMA fifo's "head", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ * \param[in]  pa_head      Physical address of the head to be set.
+ *                          16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetHeadPa(
+				  DMA_FifoHW_t *fifo_hw_ptr,
+				  unsigned int  pa_head
+				 )
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  fifo_hw_ptr->pa_head = pa_head;
+
+  _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Tail
+ *
+ * Set a DMA fifo's "tail", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ * \param[in]  pa_tail      Physical address of the tail to be set.
+ *                          16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetTailPa(
+				  DMA_FifoHW_t *fifo_hw_ptr,
+				  unsigned int  pa_tail
+				 )
+
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  fifo_hw_ptr->pa_tail = pa_tail;
+
+  _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo End
+ *
+ * Set a DMA fifo's "end", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ * \param[in]  pa_end       Physical address of the end to be set.
+ *                          16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetEndPa(
+				 DMA_FifoHW_t *fifo_hw_ptr,
+				 unsigned int  pa_end
+				)
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  fifo_hw_ptr->pa_end = pa_end;
+
+  _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Start
+ *
+ * Get a DMA fifo's "start", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ *
+ * \retval  pa_start  Physical address of the fifo start.
+ *                    16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetStartPa(
+					   DMA_FifoHW_t *fifo_hw_ptr
+					  )
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  return fifo_hw_ptr->pa_start;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Head
+ *
+ * Get a DMA fifo's "head", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ *
+ * \retval  pa_head  Physical address of the fifo head.
+ *                   16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetHeadPa(
+					  DMA_FifoHW_t *fifo_hw_ptr
+					 )
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  return fifo_hw_ptr->pa_head;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Tail
+ *
+ * Get a DMA fifo's "tail", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ *
+ * \retval  pa_tail  Physical address of the fifo tail.
+ *                   16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetTailPa(
+					  DMA_FifoHW_t *fifo_hw_ptr
+					 )
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  return fifo_hw_ptr->pa_tail;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo End
+ *
+ * Get a DMA fifo's "end", given a hardware fifo structure
+ *
+ * \param[in]  fifo_hw_ptr  Pointer to the hardware fifo structure
+ *
+ * \retval  pa_end  Physical address of the fifo end.
+ *                  16B-aligned 4-bit shifted physical address.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetEndPa(
+					 DMA_FifoHW_t *fifo_hw_ptr
+					)
+{
+  SPI_assert( fifo_hw_ptr != NULL );
+
+  return fifo_hw_ptr->pa_end;
+}
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_InjFifo.h b/arch/powerpc/include/spi/DMA_InjFifo.h
new file mode 100644
index 0000000..ca68879
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_InjFifo.h
@@ -0,0 +1,2475 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef	_DMA_INJFIFO_H_  /*  Prevent multiple inclusion */
+#define	_DMA_INJFIFO_H_
+
+
+/*!
+ * \file spi/DMA_InjFifo.h
+ *
+ * \brief DMA SPI Injection Fifo Definitions and Inline Functions
+ *
+ * This include file contains inline functions that are used to interface with
+ * BG/P DMA injection fifos at the lowest level.
+ * Functions include
+ * - initialize
+ * - get fifo start, head, tail, end, size, free space, descriptor count
+ * - set fifo head, tail, start PA, head PA, tail PA, end PA
+ * - increment tail
+ * - inject descriptor(s)
+ * - query status: not empty, available, threshold crossed, activated,
+ *   descriptor done.
+ * - set status: clear threshold crossed, activate, deactivate
+ *
+ * Data structures are defined to manipulate the injection fifos:
+ * - An injection fifo group structure defining a group of injection fifos
+ * - Within the group are injection fifo structures
+ * - Within each injection fifo structure is a software fifo structure
+ * - Each software fifo structure points to its corresponding hardware
+ *   fifo structure in the DMA SRAM
+ *
+ * \verbatim Picture of data structures:
+
+   ========DDR MEMORY===================|==========DMA SRAM MEMORY==========
+   ------------------------------       |
+   | DMA_InjFifoGroup_t         |       |
+   |                            |       |     -----------------------------
+   | status --------------------|-------|---->| DMA_InjFifoStatus_t       |
+   | fifo[0..31]                |       |     -----------------------------
+   |   ------------------------ |       |
+   |   | DMA_InjFifo_t        | |       |
+   |   |                      | |       |
+   | 0 |  ------------------- | |       |     -----------------------------
+   |   |  | DMA_Fifo_t      |-|-|-------|---->| DMA_FifoHW_t              |
+   |   |  ------------------- | |       |     -----------------------------
+   |   ------------------------ |       |
+   |             .              |       |
+   |             .              |       |
+   |             .              |       |
+   |   ------------------------ |       |
+   |   | DMA_InjFifo_t        | |       |
+   |   |                      | |       |
+   |31 |  ------------------- | |       |     -----------------------------
+   |   |  | DMA_Fifo_t      |-|-|-------|---->| DMA_FifoHW_t              |
+   |   |  ------------------- | |       |     -----------------------------
+   |   ------------------------ |       |
+   ------------------------------       |
+
+   \endverbatim
+ *
+ * Definitions:
+ * - A fifo represents a contiguous block of DDR memory
+ * - A fifo has a starting address and an ending address (defines the memory
+ *   block)
+ * - An injection fifo is a series of 32-byte descriptors.  There is a count
+ *   of the number of descriptors ever injected into this fifo.  It will never
+ *   wrap in the expected lifetime of a job.
+ * - Injection consists of copying a 32-byte descriptor into the next available
+ *   slot (pointed to by the tail), incrementing the tail pointer, and
+ *   incrementing the descriptor count for the fifo.
+ * - The DMA engine asynchronously processes descriptors, beginning with the
+ *   descriptor pointed to by head, and ending with the descriptor just prior
+ *   to tail.
+ * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos
+ *   (separate interfaces)
+ * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups
+ * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group
+ * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node
+ * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups
+ * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group
+ * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node
+ * - A "shadow" refers to a copy of the elements of the fifo (start, end, head,
+ *   tail) that is maintained by these inline functions.  The shadows may be
+ *   used to calculate other values such as free space.  The shadows are updated
+ *   by these inlines whenever the hardware fifo is read or written.
+ *
+ * \note These functions do not try to detect things that software shouldn't do,
+ *       like injecting a descriptor into a remote_get fifo, since the hardware
+ *       doesn't distinguish between remote get fifos and normal injection
+ *       fifos.  That sort of checking should be done in a higher level.
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ *       mbar and msync.
+ *
+ *       MBAR is used to make sure that all writes to memory issued by the
+ *       calling core have been accepted by the memory system before
+ *       continuing.  This guarantees that writes and reads to/from different
+ *       addresses to go in defined order.
+ *
+ *       MBAR EXAMPLE 1:  When a store is done to DMA SRAM, it may not complete
+ *       for a period of time.  If a counter value is set, and then an injection
+ *       fifo tail pointer is set, DMA may see the tail pointer update and begin
+ *       the operation before the counter value has been set.  Inserting an mbar
+ *       between the setting of the counter and the setting of the tail pointer
+ *       guarantees that the counter will be set before the tail pointer is
+ *       updated.
+ *
+ *       MBAR EXAMPLE 2:  A counter hits zero.  We process the hit-zero and write
+ *       a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ *       status (different address).  The hit-zero status will still indicate
+ *       that it hit zero, even though we have already processed it, unless an
+ *       mbar is inserted between clearing the hit-zero and reading the hit-zero
+ *       status.
+ *
+ *       MBAR PHILOSOPHY:  After DMA SRAM is updated in the DMA inline functions,
+ *       they always do at least an mbar (possibly an msync instead...see below).
+ *
+ *       MSYNC does what mbar does, plus ensures consistency across cores.  That
+ *       is, it waits for snoops (invalidations of L1 cache) on the other cores
+ *       to complete before continuing.  This guarantees that all of the cores
+ *       will see a consistent view of memory after the msync.
+ *
+ *       MSYNC EXAMPLE:  When a reception counter has hit zero, we assume the
+ *       DMA'd data is available to be read by any core.  However, old copies of
+ *       that data may still be in the L1 caches.  Inserting an msync after
+ *       detecting that a counter has hit zero guarantees that the old data has
+ *       been removed from the L1 caches.
+ *
+ *       MSYNC PHILOSOPHY:  After the inline functions detect that a counter has
+ *       hit zero, they always do an msync.
+ *
+ *       SPECULATIVE EXECUTION OF MSYNC:  There are cases where msync is done
+ *       conditionally.  The CPU will begin execution of both sides of the
+ *       condition before the result of the condition has been determined.
+ *       Then, it will cancel the execution of one side once the result of the
+ *       condition has been determined.  This speculation is unwanted when
+ *       the first instruction on one side of the condition is msync because
+ *       cancelling an msync is similar to executing the complete msync.
+ *       To avoid this speculative execution of msync, we call
+ *       _bgp_msync_nonspeculative().  This will trick the CPU so it won't begin
+ *       the msync until the result of the condition is known.
+ *
+ *       CALLER ADVICE:  Users of these functions should not need to do
+ *       mbar/msync themselves, unless they are doing something like the
+ *       following:  Read a counter and operate on the result when the counter
+ *       hasn't reached zero.  The caller will need to perform an msync after
+ *       reading the counter in order to ensure that snoops have completed
+ *       on all CPUs before operating on the DMA'd data.
+ *
+ * \note General discussion on injection fifo interrupts.  Both the warning
+ * threshold crossed and full fifo interrupts...
+ *
+ * For remote gets, a fifo is considered available if it has at least 512 bytes
+ * free (32 16B quads). An arriving remote get can be written if there are 512
+ * bytes free, but after that the available goes low and no further remote gets
+ * can be written to any fifo.  Furthermore, if any injection fifo has less than
+ * 512 bytes free, the fifo becomes unavailable and any arriving remote get
+ * packet will cause an interrupt to fire and the rDMA will stop.
+ *
+ * Specifically, if an injection fifo has less than 512 B (by either injecting
+ * or remote gets) the iDMA will continue to operate and the rDMA will continue
+ * to operate until any remote get packet arrives to any fifo, at which point
+ * an interrupt fires and the rDMA stops.
+ *
+ * Note that these interrupts were put in for warnings of remote get fifos
+ * becoming nearly full. However the time between when the warning fires and the
+ * condition is cleared may be long, reconfiguring an almost full remote get
+ * fifo is difficult, and recovery from full remote get injection fifos is very
+ * difficult.  Since software can prevent this, and since recovery is so
+ * difficult, we consider injection fifo threshold crossing interrupts and
+ * injection fifo full interrupts to be fatal. Thus there is no handler function
+ * in the injection fifo allocation routine.
+ *
+ * So software needs to manage injection and remote get fifo space so that there
+ * are always at least 512 bytes of free space in every fifo. To accomplish
+ * this, software needs to guarantee it won't inject descriptors if doing so
+ * would trigger an interrupt or make the fifo unavailable.
+  *
+ * This can be done by setting the interrupt threshold to 0 (interrupt fires if
+ * free space <= threshhold), and not injecting if after injection there are
+ * less than DMA_MIN_INJECT_SIZE_IN_QUADS (=32) slots.  Furthermore, remote
+ * get space should not be allocated if doing so might result in strictly less
+ * than DMA_MIN_INJECT_SIZE_IN_QUADS slots.
+ *
+ */
+
+
+
+#include <common/namespace.h>
+/* #include <memory.h> */
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ *   placed in the translation unit (eg. xxx.c)for use within that unit.
+ *   The function is not externalized for use by another unit...we want this
+ *   so we don't end up with multiple units exporting the same function,
+ *   which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+   \verbatim
+   #ifndef INLINE
+   # define INLINE extern inline
+   #endif
+   INLINE int max(int a, int b) {
+     return a > b ? a : b;
+   }
+   \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+   \verbatim
+   #define INLINE
+   #include "header.h"
+   \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+#include <spi/DMA_Assert.h>
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_Descriptors.h>
+
+/*
+ * You can save a few cycles by using the parallel floating point unit to do the 'memcpy'
+ * as part of injecting a descriptor into a FIFO; but you then need to quadword-align the source memory
+ * and you may need to save/restore the FP context. Setting k_use_fp_to_inject to 0 arranges for the
+ * generated code to use integer registers for the 'memcpy'.
+ */
+enum {
+	k_use_fp_to_inject = 0
+};
+
+
+/*!
+ * \brief Number of Injection Fifo Groups
+ */
+#define DMA_NUM_INJ_FIFO_GROUPS     4
+
+
+/*!
+ * \brief Number of Injection Fifos per Group
+ */
+#define DMA_NUM_INJ_FIFOS_PER_GROUP 32
+
+
+/*!
+ * \brief Number of Injection Fifos (total)
+ */
+#define DMA_NUM_INJ_FIFOS (DMA_NUM_INJ_FIFO_GROUPS*DMA_NUM_INJ_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Minimum Free Space Required After Injection
+ *
+ * This is the number of 16-byte quads that need to be free in a fifo after
+ * injection of a descriptor.
+ */
+#define DMA_MIN_INJECT_SIZE_IN_QUADS 32
+
+
+/*!
+ * \brief Number of 16-byte quads in a fifo descriptor
+ *
+ */
+#define DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS 2
+
+
+/*!
+ * \brief Number of bytes in a fifo descriptor
+ *
+ */
+#define DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS*16
+
+
+/*!
+ * \brief Minimum size of a fifo, somewhat arbitrary
+ */
+#define DMA_MIN_INJ_FIFO_SIZE_IN_BYTES (256*4)
+
+
+/*!
+ * \brief Injection DMA Fifo Structure
+ *
+ * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h)
+ * and other fields that are specific to an injection fifo used by software.
+ *
+ * \todo   Some more careful thought should be given how to group these so as to
+ *         get best memory system performance.
+ *         eg.  Probably want to ALIGN_L3_CACHE the fifo_hw_ptr.
+ *
+ */
+typedef struct DMA_InjFifo_t
+{
+  DMA_Fifo_t         dma_fifo;   /*!< Common software fifo structure          */
+  unsigned short int fifo_id;    /*!< The fifo identifier (0 to
+                                      DMA_NUM_INJ_FIFOS_PER_GROUP-1).         */
+
+  unsigned long long desc_count; /*!< The number of descriptors that have
+                                      ever been injected into this fifo.      */
+
+  unsigned int occupiedSize;     /*!< The number of 16B quads in the fifo that
+                                      are logically occupied.  This does not
+                                      include the DMA_MIN_INJECT_SIZE_IN_QUADS
+                                      that always remains logically occupied. */
+  /*!
+   * \note The following fields contain info about the fifo that affects the
+   *       DCR values configuring the fifo.
+   */
+  unsigned short int priority;   /*!< 0 = Normal priority, 1 = High priority.
+                                      The DMA uses this to determine which
+                                      injection fifo to serve next.
+                                      Reflected in DCR addresses
+                                      _BGP_DCR_iDMA_FIFO_PRIORITY(i), where i
+                                      is the group_id.  0xD32 - 0xD35.
+                                      Fifo j is high priority if bit j in the
+                                      DCR is 1, otherwise it is normal
+                                      priority.                               */
+
+  unsigned short int local;      /*!< 0 = non-local, 1 = local.
+                                      If 0, this fifo uses the torus and
+                                      ts_inj_map must be non-zero.
+                                      If 1, this fifo is used for tranfsers
+                                      local to the node only.
+                                      Reflected in DCR addresses
+                                      _BGP_DCR_iDMA_LOCAL_COPY(i), where i
+                                      is the group_id.  0xD5C - 0xD5F.
+                                      Fifo j is for local transfers if bit j
+                                      in the DCR is 1, otherwise it is for
+                                      torus transfers.                        */
+
+  unsigned char ts_inj_map;      /*!< 8 bit vector mask indicating which torus
+                                      fifos can be used by this DMA fifo.
+                                      Reflected in DCR addresses
+                                      _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k) where k
+                                      is the fifo_id.  0xD3C - 0xD5B.
+                                      Fifo k can inject in torus fifo j if
+                                      bit j of the k'th DCR byte is 1.        */
+}
+DMA_InjFifo_t;
+
+
+/*!
+ * \brief DMA Injection Fifo Status structure
+ *
+ * This structure maps the DMA SRAM for a particular group of
+ * DMA_NUM_INJ_FIFOS_PER_GROUP fifos.
+ *
+ */
+typedef struct DMA_InjFifoStatus_t
+{
+  volatile unsigned  not_empty;              /*!< R bitmask, 1 bit/fifo:
+                                                    Injection FIFO not empty. */
+
+  volatile unsigned  reserved_0;             /*!< HOLE                        */
+
+  volatile unsigned  available;              /*!< R bitmask, 1 bit/fifo:
+                                                    Injection FIFO available. */
+
+  volatile unsigned  reserved_1;             /*!< HOLE                        */
+
+  volatile unsigned  threshold_crossed;      /*!< R bitmask, 1 bit/fifo:
+                                                    Threshold crossed.        */
+
+  volatile unsigned  reserved_2;             /*!< HOLE                        */
+
+  volatile unsigned  clear_threshold_crossed;/*!< W bitmask, 1 bit/fifo:
+                                                    Clear threshold crossed.  */
+
+  volatile unsigned  reserved_3;             /*!< HOLE                        */
+
+  volatile unsigned  activated;              /*!< R bitmask, 1 bit/fifo:
+                                                    Retrieve activated fifos. */
+
+  volatile unsigned  activate;               /*!< W bitmask, 1 bit/fifo:
+                                                    Set "1" to activate fifo. */
+
+  volatile unsigned  deactivate;             /*!< W bitmask, 1 bit/fifo:
+                                                    Set "1" to deactivate fifo*/
+}
+DMA_InjFifoStatus_t;
+
+
+/*!
+ * \brief DMA Injection Fifo Group Structure
+ *
+ * This structure defines a DMA InjFifo Group.  It points to a
+ * DMA InjFifo Status structure, and contains DMA_NUM_INJ_FIFOS_PER_GROUP
+ * DMA InjFifo structures.
+ *
+ * It is passed into the DMA_InjFifoGroupAllocate system call.
+ * The system call sets up the requested fifos, and fills in this fifo group
+ * structure, including the appropriate DMA InjFifo structures within it.
+ *
+ * It also contains permission bits to use the fifos, one bit per fifo.
+ * When the permission bit is on, the corresponding fifo belongs to this
+ * group and can be used.  Otherwise, the fifo should not be used as part
+ * of this group.  These permission bits are used as follows:
+ *   1. Inline functions will ASSERT when an attempt is made
+ *      to use a fifo that is not part of this group.
+ *   2. Inline functions will use the permission bits as a mask
+ *      to return status information only for fifos that are allocated
+ *      to this group.
+ *
+ */
+typedef struct DMA_InjFifoGroup_t
+{
+  DMA_InjFifoStatus_t *status_ptr;  /*!< Pointer to fifo status.              */
+
+  DMA_InjFifo_t        fifos[DMA_NUM_INJ_FIFOS_PER_GROUP];/*!< Array
+                                         of fifo structures.  The i-th struct
+                                         is defined and usable only if
+                                         bit i of permissions = 1.            */
+
+  unsigned int permissions;         /*!< Permissions bit vector.  Bit i is 1
+                                         if permitted to use fifo i.  The fifo
+                                         is allocated to this group.          */
+
+  unsigned int group_id;            /*!< The id of this group (0 to
+                                         DMA_NUM_INJ_FIFO_GROUPS-1).          */
+}
+DMA_InjFifoGroup_t;
+
+
+/*!
+ * \brief Remote Get Fifo Full Handler Function Prototype
+ *
+ * A function with this signature receives control when one or more remote
+ * get fifos have filled.  This function should do the following to help
+ * make space in the fifo(s):
+ * 1. Determine if there are any remote get fifos full or nearly full.
+ * 2. For each such fifo:
+ *    1. Allocate a larger fifo
+ *    2. Copy the descriptors from the old fifo to the new fifo
+ *    3. Call DMA_InjFifoInitById() to register the new fifo with the DMA
+ *    4. Call DMA_InjFifoSetTailById() to set the new fifo's tail pointer
+ *    5. Free the old fifo
+ *
+ * A function of this type can be registered on DMA_InjFifoGroupAllocate().
+ *
+ * \param[in]  fg_ptr  Pointer to the fifo group associated with this fifo.
+ * \param[in]  f_num   The fifo number that has filled.  This is
+ *                     relative to the DMA fifo group
+ *                     (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  handler_param  An opaque pointer provided by the caller who
+ *                            registered this handler.
+ */
+typedef void (*DMA_InjFifoRgetFifoFullHandler_t)(
+					  DMA_InjFifoGroup_t *fg_ptr,
+                                          int                 f_num,
+                                          void               *handler_parm
+					 );
+
+
+/*!
+ * \brief Remote Get Fifo Full Handler Table Entry
+ *
+ * This defines an entry in the Remote Get Fifo Full Handler Table.
+ * It identifies the fifo group pointer associated with the full fifo,
+ * and the pointer to the handler function to receive control to handle
+ * the fifo full condition and the opaque pointer to be passed to the
+ * handler function when it is called.  The core number of the core that
+ * will process the condition is associated with each entry.
+ */
+typedef struct DMA_InjFifoRgetFifoFullHandlerEntry_t
+{
+  DMA_InjFifoGroup_t               *fg_ptr;  /*!< Pointer to injection fifo group    */
+  DMA_InjFifoRgetFifoFullHandler_t  handler; /*!< Pointer to handler function */
+  void                             *handler_parm; /*!< Pointer to be passed to
+                                                       the handler.           */
+  uint32_t                          core_num;/*!< Core number of the core that
+                                                  will process the condition. */
+} DMA_InjFifoRgetFifoFullHandlerEntry_t;
+
+
+/*!
+ *
+ * \brief Remote Get Fifo Full Handler Table
+ *
+ * An array of entries, one per injection fifo.  Each entry specifies the fifo
+ * group structure and the handler function that will receive control to
+ * handle a remote get fifo full condition for fifos in that fifo group.
+ */
+extern DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS];
+
+
+/*!
+ * \brief Remote Get Fifo Full Init Has Been Done Indicator
+ *
+ *  0 means the initialization has not been done.
+ *  1 means the initialization has been done.
+ */
+extern int DMA_InjFifoRgetFifoFullInitHasBeenDone;
+
+
+/*!
+ * \brief Remote Get Fifo Full Initialization
+ *
+ * Initialize data structures and interrupt handlers to handle a remote get
+ * fifo full condition.
+ *
+ * \param[in]  interruptGroup  The handle that identifies the remote get fifo
+ *                             full interrupts (only one interrupt, in this
+ *                             case, group 3, irq 24).
+ * \param[in] rget_barrier     A function pointer to a function that implments
+ *                             the barrier that is used by the handler function
+ *                             to synchronize all cores in the node as they
+ *                             each handle the interrupt (it is a broadcasted
+ *                             interrupt).
+ * \param[in] rget_barrier_arg The generic arg to pass to the barrier function.
+ */
+void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t   interruptGroup,
+				  void                    (*rget_barrier)(void *),
+                                  void                     *rget_barrier_arg );
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in]   grp            Group number being queried
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out]  num_fifos      Pointer to an int where the number of free
+ *                             fifos in the specified group is returned
+ * \param[out]  fifo_ids       Pointer to an array of num_fifos short ints where
+ *                             the list of free fifos is returned.
+ *                             Each short int is the fifo number
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *                             The caller must provide space for
+ *                             DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ *                             in case the entire fifo group is free.
+ *
+ * \retval  0  Successful.  num_fifos and fifo_ids array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ */
+__INLINE__ int  DMA_InjFifoGroupQueryFree(
+					  int  grp,
+					  int *num_fifos,
+					  int *fifo_ids
+					 )
+{
+  return Kernel_InjFifoGroupQueryFree( grp,
+				       (uint32_t*)num_fifos,
+				       (uint32_t*)fifo_ids);
+}
+
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group.  Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to.  A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be allocated from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be allocated is provided.
+ *                           Each int is the fifo number
+ *                           (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]   priorities   Pointer to an array of num_fifos short ints where
+ *                           the list of priorities to be assigned to the fifos
+ *                           is provided.  Each short int indicates the priority
+ *                           to be assigned to each of the fifos identified in
+ *                           the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in]   locals       Pointer to an array of num_fifos short ints where
+ *                           an indication is provided of whether each fifo will
+ *                           be used for local transfers (within the same node)
+ *                           or torus transfers.  Each short int indicates the
+ *                           local/non-local attribute to be assigned to each of
+ *                           the fifos identified in the fifo_ids array (0 is
+ *                           non-local, 1 is local).  If 0, the corresponding
+ *                           array element in ts_inj_maps indicates which torus
+ *                           fifos can be injected.
+ * \param[in]   ts_inj_maps  Pointer to an array of num_fifos chars where
+ *                           the torus fifos that can be injected are specified
+ *                           for each fifo.  Each char specifies which of
+ *                           the 8 torus injection fifos can be injected when a
+ *                           descriptor is injected into the DMA injection fifo.
+ *                           Must be non-zero when the corresponding "locals"
+ *                           is 0.
+ *                           Bits 0-3 are for torus group 0.
+ *                           Bits 4-7 are for torus group 1.
+ *                           Bits 3 and 7 are the high priority fifos.
+ * \param[in]   rget_handler Pointer to a function with prototype
+ *                           DMA_InjFifoRgetFifoFullHandler_t that will handle
+ *                           a remote get fifo full condition for fifos in this
+ *                           fifo group.  If NULL is specified, the condition
+ *                           will not be handled.
+ * \param[in]   rget_handler_parm   A pointer to opaque storage that will be
+ *                                  passed to the rget_handler.
+ * \param[in]   rget_interruptGroup  A InterruptGroup_t that identifies the
+ *                           group of interrupts that handle the remote get
+ *                           fifo full condition.  It is only one interrupt:
+ *                           group 3, irq 24.
+ * \param[in]   rget_barrier Function point to a function that implements
+ *                           a barrier that is used by the rget fifo full
+ *                           interrupt handler.  This barrier should be across
+ *                           all cores of all active processes on this compute node.
+ * \param[in]   rget_barrier_arg Generic arg to pass to barrier function.
+ * \param[out]  fg_ptr       Pointer to a structure that is filled in upon
+ *                           successful return for use in other inline functions
+ *                           to operate on the allocated fifos.
+ *                           \li fifos - Array of fifo structures.  Structures
+ *                                       for allocated fifos are initialized as
+ *                                       documented below.  Structures for
+ *                                       fifos not allocated by this instance of
+ *                                       this syscall are initialized to binary
+ *                                       zeros.  Allocated fifos are enabled.
+ *                           \li status_ptr  - Points to status area within the
+ *                                             DMA memory map.
+ *                           \li permissions - Bits indicating which fifos were
+ *                                             allocated during this syscall.
+ *                           \li group_id    - The id of this group.
+ *
+ * \retval  0  Successful.  Fifos allocated and fg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ *         initialized as follows:
+ *         - status_ptr points to the appropriate fifo group DMA memory map
+ *         - fifo structures array.  Fifo structures for fifos not allocated
+ *           during this syscall are initialized to binary zeros.  Fifo
+ *           structures for fifos allocated during this syscall are initialized:
+ *             - fifo_hw_ptr points to the DMA memory map for this fifo.  The
+ *               hardware start, end, head, and tail are set to zero by the
+ *               kernel.
+ *             - All other fields in the structure are set to zero by the kernel
+ *               except priority, local, and ts_inj_map are set to reflect what
+ *               was requested in the priorities, locals, and ts_inj_maps
+ *               syscall parameters.
+ *
+ */
+__INLINE__ int  DMA_InjFifoGroupAllocate(
+			 int                               grp,
+			 int                               num_fifos,
+		         int                              *fifo_ids,
+			 unsigned short int               *priorities,
+			 unsigned short int               *locals,
+			 unsigned char                    *ts_inj_maps,
+                         DMA_InjFifoRgetFifoFullHandler_t  rget_handler,
+                         void                             *rget_handler_parm,
+                         Kernel_InterruptGroup_t           rget_interruptGroup,
+                         void                            (*rget_barrier)(void *),
+                         void                             *rget_barrier_arg,
+			 DMA_InjFifoGroup_t               *fg_ptr
+					)
+{
+  int rc;
+  int i, global_fifo_id;
+
+  rc = Kernel_InjFifoGroupAllocate( grp,
+				    num_fifos,
+				    (uint32_t*)fifo_ids,
+				    (uint16_t*)priorities,
+				    (uint16_t*)locals,
+				    (uint8_t*)ts_inj_maps,
+				    (uint32_t*)fg_ptr);
+
+  if ( rc == 0 )
+  {
+     /*
+      * If a remote get fifo full handler has been provided, update the table
+      * to indicate that this handler will handle full conditions on the fifos
+      * just allocated.
+      */
+     if ( rget_handler )
+     {
+        /*
+         * If rget handler init has not been done, do it:
+         */
+        if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 )
+          DMA_InjFifoRgetFifoFullInit( rget_interruptGroup,
+				       rget_barrier,
+				       rget_barrier_arg );
+
+        for (i=0; i<num_fifos; i++)
+        {
+           global_fifo_id = (grp * DMA_NUM_INJ_FIFOS_PER_GROUP) + fifo_ids[i];
+           DMA_RgetFifoFullHandlerTable[global_fifo_id].fg_ptr  = fg_ptr;
+           DMA_RgetFifoFullHandlerTable[global_fifo_id].handler = rget_handler;
+	   DMA_RgetFifoFullHandlerTable[global_fifo_id].handler_parm =
+                                                             rget_handler_parm;
+           DMA_RgetFifoFullHandlerTable[global_fifo_id].core_num=
+                                                  Kernel_PhysicalProcessorID();
+        }
+
+	/*
+	 * Indicate done with initialization.
+	 */
+	DMA_InjFifoRgetFifoFullInitHasBeenDone = 1;
+     }
+  }
+
+  return(rc);
+}
+
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be freed from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be freed is provided.
+ *                           Each int is the fifo number (0 to num_fifos-1).
+ * \param[in]   fg_ptr       Pointer to the structure previously filled in when
+ *                           these fifos were allocated.  Upon successful
+ *                           return, this structure is updated to reflect the
+ *                           freed fifos:
+ *                           \li fifos - Structures for freed fifos zero'd.
+ *                                       Freed fifos are disabled.
+ *                           \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval  0  Successful.  Fifos freed and fg_ptr structure updated as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \note  This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+__INLINE__ int  DMA_InjFifoGroupFree(
+				     int                 grp,
+				     int                 num_fifos,
+				     int                *fifo_ids,
+				     DMA_InjFifoGroup_t *fg_ptr
+				    )
+{
+  return Kernel_InjFifoGroupFree( grp,
+				  num_fifos,
+				  (uint32_t*)fifo_ids,
+				  (uint32_t*)fg_ptr);
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a pointer to the injection fifo structure
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Injection Fifo Head
+ *
+ * Set a DMA injection fifo's "head", given an injection fifo structure
+ *
+ * \param[in]  f_ptr    Pointer to the injection fifo structure
+ * \param[in]  va_head  Virtual address of the head to be set
+ *
+ * \return  None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ *       and the fifo free space is recalculated.
+ *
+ * \note Normally, for an injection fifo, the dma manipulates the head, but in
+ *       optimized persistant communications the core can do it if it is sure
+ *       the fifo is empty at the time this is called.
+ */
+__INLINE__ void DMA_InjFifoSetHead(
+				   DMA_InjFifo_t *f_ptr,
+				   void        *va_head
+				  )
+{
+  SPI_assert( f_ptr != NULL );
+
+  DMA_FifoSetHead( &f_ptr->dma_fifo,
+		   va_head );
+}
+
+
+/*!
+ * \brief Increment DMA Injection Fifo Tail
+ *
+ * Increment a DMA injection fifo's "tail", given an injection fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ * \param[in]  incr   The number of quads (16 byte units) to increment the
+ *                    tail pointer by.  This value must be even (ie. descriptors
+ *                    are 32 bytes).
+ *
+ * \retval  None
+ *
+ * \post va_tail is set in both the hardware and software fifo structures,
+ *       the fifo free space is recalculated, and the fifo's descriptor count
+ *       is incremented according to the incr.
+ *
+ * \note This function does not check if there is free space in the fifo
+ *       for this many quads.  It must be preceeded by a check of the
+ *       free space.
+ */
+__INLINE__ void DMA_InjFifoIncrementTail(
+					 DMA_InjFifo_t *f_ptr,
+					 unsigned int   incr
+					)
+{
+  SPI_assert( f_ptr != NULL );
+  SPI_assert( (incr & 0x1) == 0 );
+
+  {
+  void *va_tail = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+
+  void *va_end  = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+
+  unsigned int incr_bytes = incr << 4;
+
+  unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_tail;
+
+  /*
+   * Note:  The following check must be >= instead of just >.  We never want
+   *        the tail to be equal to the end so we can always copy a descriptor
+   *        to the tail, safely.
+   */
+  if ( incr_bytes >= bytes_to_end )
+    {
+      va_tail = (char *)
+	          ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) +
+		    ( incr_bytes - bytes_to_end ) );
+    }
+  else
+    {
+      va_tail = (char *)( (unsigned)va_tail + incr_bytes );
+    }
+
+  DMA_FifoSetTail( &f_ptr->dma_fifo,
+		   va_tail );
+
+  f_ptr->desc_count += (incr >> 1);
+  }
+
+}
+
+
+/*!
+ * \brief Get DMA Injection Fifo Descriptor Count
+ *
+ * Get a DMA injection fifo's "descriptor count", given an injection fifo
+ * structure
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ *
+ * \retval  desc_count  The descriptor count for the specified fifo
+ *
+ */
+__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCount(
+						      DMA_InjFifo_t *f_ptr
+						     )
+{
+  SPI_assert( f_ptr != NULL );
+
+  return f_ptr->desc_count;
+}
+
+
+/*!
+ * \brief Is DMA Descriptor Done
+ *
+ * Return whether a specified descriptor is still in the specified injection
+ * fifo (not done).  The descriptor is identified by the descriptor count
+ * immediately after the descriptor was injected into the fifo (returned by
+ * DMA_InjFifoIncrementTail().
+ *
+ * \param[in]  f_ptr       Pointer to the injection fifo structure
+ * \param[in]  desc_count  The descriptor count immediately after the
+ *                         descriptor in question was injected into
+ *                         the fifo.
+ * \param[in]  update      0  Do not update the fifo's shadow information.
+ *                         1  Update the fifo's shadow information.
+ *                         It is a performance optimization to only update the
+ *                         shadow information once for a group of descriptors
+ *                         being processed.
+ *
+ * \retval  0  False.  The descriptor identified by desc_count is not done.
+ *                     It is still in the fifo.
+ * \retval  1  True.   The descriptor identified by desc_count is done.
+ *                     It is no longer in the fifo.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoIsDescriptorDone(
+					    DMA_InjFifo_t      *f_ptr,
+					    unsigned long long  desc_count,
+					    unsigned int        update
+					   )
+{
+  unsigned long long num_desc_in_fifo;
+  unsigned int free_space;
+  DMA_Fifo_t  *fifo_ptr;
+
+  SPI_assert( f_ptr != NULL );
+
+  fifo_ptr = &(f_ptr->dma_fifo);
+
+  /* If caller wants a fresh look in the fifo, update its free space.
+   * Otherwise, fetch the free space based on shadows.
+   */
+  if (update)
+    free_space = DMA_FifoGetFreeSpace (fifo_ptr, 1, 0);
+  else
+    free_space = DMA_FifoGetFreeSpaceNoUpdateCalculation(fifo_ptr);
+
+  /* Compute the desc_count of the oldest descriptor in the fifo (minus 1)
+   * Note:  Each desc is a 32B unit and the below are 16B entities
+   */
+  num_desc_in_fifo = ( DMA_FifoGetSize(fifo_ptr) - free_space ) / 2;
+
+  /* Determine if the specified desc_count is still in the fifo.
+   * We take the current descriptor count for this fifo and subtract the
+   * number of descriptors still in the fifo.  This is the descriptor count
+   * of the oldest descriptor still remaining in the fifo (minus 1).
+   * We compare that with the caller's desc_count to determine if the
+   * caller's descriptor is still in the fifo.
+   */
+  if ( desc_count <= (DMA_InjFifoGetDescriptorCount(f_ptr) - num_desc_in_fifo) )
+    return (1); /* Descriptor is done */
+  else
+    return (0); /* Descriptor is not done */
+
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Reserve Descriptor Storage
+ *
+ * Reserve storage in a DMA injection fifo for a remote get descriptor, given
+ * an injection fifo structure.
+ *
+ * \param[in]  f_ptr   Pointer to the injection fifo structure
+ *
+ * \retval  0   Successful.  There was enough space in the fifo and the
+ *              storage was reserved.
+ * \retval -1   Unsuccessful.  There was not enough space in the fifo.
+ *
+ * \note Internally, this increments the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ int DMA_InjFifoReserveDescriptorStorage(
+						   DMA_InjFifo_t *f_ptr
+						  )
+{
+  SPI_assert( f_ptr != NULL );
+
+  if ( (DMA_FifoGetSize(&f_ptr->dma_fifo) - f_ptr->occupiedSize) >=
+       (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) ) {
+     f_ptr->occupiedSize += DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+    return (0);
+  }
+  else {
+     return (-1);
+  }
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Free Descriptor Storage Reservation
+ *
+ * Free a reservation for storage for a remote get descriptor in a DMA injection
+ * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById().
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ *
+ * \return  None
+ *
+ * \note Internally, this decrements the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservation(
+						 DMA_InjFifo_t *f_ptr
+						)
+{
+  SPI_assert( f_ptr != NULL );
+  SPI_assert( f_ptr->occupiedSize >= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+  f_ptr->occupiedSize -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+}
+
+
+/*!
+ * \brief Check If An Injection Fifo Has Space For Injection
+ *
+ * Check if an injection fifo has enough space for a single descriptor to be
+ * injected.
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ *
+ * \retval  hasSpace  An indicator of whether the fifo has space for a
+ *                    descriptor.
+ *                    - 0 (false) means the fifo is full.
+ *                    - 1 (true)  means the fifo has space.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoHasSpace(
+					    DMA_InjFifo_t       *f_ptr
+					   )
+{
+  SPI_assert( f_ptr != NULL );
+
+  {
+  unsigned int free_space;
+
+  /* Get the free space in the fifo using the shadow value */
+  free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+				     0, /* Use shadow head */
+				     0);/* use shadow tail */
+
+  /*
+   * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we
+   * are going to inject), there is still at least the minimum allowable free
+   * space left in the fifo, go ahead and inject.  We want at least
+   * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection.
+   *
+   * Otherwise, read the hardware head pointer and recalculate the free space,
+   * and check again.  Note:  We don't need to read the hardware tail
+   * pointer because only software updates that, and we recalculate the
+   * free space at that time.
+   *
+   * If there is still not enough room in the fifo, return 0, indicating that
+   * the descriptor could not be injected.
+   *
+   */
+  if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+                    DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS )
+    {
+      free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+					 1,  /* Use hardware head */
+					 0); /* Use shadow tail   */
+
+      if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+	                DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+    }
+
+  return 1;  /*  There is space in the fifo. */
+  }
+}
+
+/* a 32-byte memcpy */
+static inline void DMA_DescriptorToFifo(char *store_ptr, const char *load_ptr)
+{
+	int * store_int=(int *) store_ptr ;
+	int * load_int= (int *) load_ptr ;
+	int v0 = load_int[0] ;
+	int v1 = load_int[1] ;
+	store_int[0] = v0 ;
+	v0 = load_int[2] ;
+	store_int[1] = v1 ;
+	v1 = load_int[3] ;
+	store_int[2] = v0 ;
+	v0 = load_int[4] ;
+	store_int[3] = v1 ;
+	v1 = load_int[5] ;
+	store_int[4] = v0 ;
+	v0 = load_int[6] ;
+	store_int[5] = v1 ;
+	v1 = load_int[7] ;
+	store_int[6] = v0 ;
+	store_int[7] = v1 ;
+}
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for
+ *        Space
+ *
+ * Inject a descriptor into a DMA injection fifo, given an injection fifo
+ * structure, without checking to see if there is enough space in the fifo.
+ * It is assumed that the caller has already checked for enough space using
+ * the DMA_InjFifoHasSpace() function.
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ * \param[in]  desc   A pointer to the descriptor to be injected.
+ *                    Must be 16-byte aligned.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - 1 means it was successfully injected.
+ *
+ * \see DMA_InjFifoHasSpace()
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheck(
+					   DMA_InjFifo_t       *f_ptr,
+					   DMA_InjDescriptor_t *desc
+					  )
+{
+  SPI_assert( f_ptr != NULL );
+  SPI_assert( desc  != NULL );
+
+  {
+  char *load_ptr, *store_ptr;
+
+  /*
+   * Copy the descriptor to the current va_tail of the fifo.
+   * Msync to ensure the descriptor has been written to memory and the L1 caches
+   * are in sync.
+   * Move the tail past the descriptor so the DMA knows the descriptor is there.
+   *   - handle wrapping
+   *   - update free space
+   *
+   */
+
+  if( k_use_fp_to_inject)
+	  {
+		  if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */
+		    {
+		      load_ptr  = (char*)desc;
+		      store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+		      _bgp_QuadLoad ( load_ptr,     0 );
+		      _bgp_QuadLoad ( load_ptr+16,  1 );
+		      _bgp_QuadStore( store_ptr,    0 );
+		      _bgp_QuadStore( store_ptr+16, 1 );
+		    }
+		  else
+		    {
+		      memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),
+			      desc,
+			      DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+		    }
+	  }
+  else
+	  {
+		  DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ;
+	  }
+
+   /*  _bgp_msync();  mbar is good enough */
+  _bgp_mbar();
+
+  DMA_InjFifoIncrementTail( f_ptr,
+			    DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+  return 1; /* Success */
+  }
+}
+
+
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo
+ *
+ * Inject a descriptor into a DMA injection fifo, given an injection fifo
+ * structure
+ *
+ * \param[in]  f_ptr  Pointer to the injection fifo structure
+ * \param[in]  desc   A pointer to the descriptor to be injected.
+ *                    Must be 16-byte aligned.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - 0 means it was not injected, most likely because
+ *                             the fifo is full.
+ *                           - 1 means it was successfully injected
+ *
+ * Caution: If you call this function two or more times in quick
+ * succession to try to put descriptors into a FIFO, occasionally
+ * one of the descriptors appears not to be acted on by the hardware.
+ * An alternative is to use DMA_InjFifoInjectDescriptors with a vector
+ * of descriptors; this appears to do the job reliably.
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptor(
+					   DMA_InjFifo_t       *f_ptr,
+					   DMA_InjDescriptor_t *desc
+					  )
+{
+  SPI_assert( f_ptr != NULL );
+  SPI_assert( desc  != NULL );
+
+  {
+  unsigned int free_space;
+  char *load_ptr, *store_ptr;
+
+  /* Get the free space in the fifo using the shadow value */
+  free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+				     0, /* Use shadow head */
+				     0);/* use shadow tail */
+
+  /*
+   * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we
+   * are going to inject), there is still at least the minimum allowable free
+   * space left in the fifo, go ahead and inject.  We want at least
+   * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection.
+   *
+   * Otherwise, read the hardware head pointer and recalculate the free space,
+   * and check again.  Note:  We don't need to read the hardware tail
+   * pointer because only software updates that, and we recalculate the
+   * free space at that time.
+   *
+   * If there is still not enough room in the fifo, return 0, indicating that
+   * the descriptor could not be injected.
+   *
+   */
+  if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+                    DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS )
+    {
+      free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+					 1,  /* Use hardware head */
+					 0); /* Use shadow tail   */
+
+      if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+	                DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+    }
+
+  /*
+   * We have enough room in the fifo.
+   * Copy the descriptor to the current va_tail of the fifo.
+   * Msync to ensure the descriptor has been written to memory and the L1 caches
+   * are in sync.
+   * Move the tail past the descriptor so the DMA knows the descriptor is there.
+   *   - handle wrapping
+   *   - update free space
+   *
+   */
+
+  if( k_use_fp_to_inject)
+	  {
+		  if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */
+		    {
+		      load_ptr  = (char*)desc;
+		      store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+		      _bgp_QuadLoad ( load_ptr,     0 );
+		      _bgp_QuadLoad ( load_ptr+16,  1 );
+		      _bgp_QuadStore( store_ptr,    0 );
+		      _bgp_QuadStore( store_ptr+16, 1 );
+		    }
+		  else
+		    {
+		      memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),
+			      desc,
+			      DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+		    }
+	  }
+	else
+	  {
+		  DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ;
+	  }
+
+   /*  _bgp_msync();  mbar is good enough */
+  _bgp_mbar();
+
+  DMA_InjFifoIncrementTail( f_ptr,
+			    DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+  return 1; /* Success */
+  }
+}
+
+
+/*!
+ * \brief Inject Multiple Descriptors into a DMA Injection Fifo
+ *
+ * Inject multiple descriptors into a DMA injection fifo, given an injection fifo
+ * structure
+ *
+ * \param[in]  f_ptr     Pointer to the injection fifo structure
+ * \param[in]  num_desc  Number of descriptors to be injected
+ * \param[in]  desc      A pointer to an array of pointers to descriptors to be
+ *                       injected.  The descriptors must be 16-byte aligned.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - less than num_desc means some were not injected,
+ *                             most likely because the fifo is full.
+ *                           - num_desc means all were successfully injected
+ *
+ */
+#if 0
+__INLINE__ int DMA_InjFifoInjectDescriptors(
+					    DMA_InjFifo_t        *f_ptr,
+					    int                   num_desc,
+					    DMA_InjDescriptor_t **desc
+					   )
+{
+   int i;
+   int rc=0 ;
+   for(i=0;i<num_desc;i+=1)
+	   {
+		   int rc0=DMA_InjFifoInjectDescriptor(f_ptr,desc[i]) ;
+		   rc += rc0 ;
+	   }
+   return rc ;
+}
+#else
+__INLINE__ int DMA_InjFifoInjectDescriptors(
+					    DMA_InjFifo_t        *f_ptr,
+					    int                   num_desc,
+					    DMA_InjDescriptor_t **desc
+					   )
+{
+  unsigned int  free_space;
+  unsigned int  total_space_needed_in_quads;
+  void         *va_tail;
+  void         *va_end;
+  void         *va_start;
+  char         *target;
+  unsigned int  num_quads_to_inject, num_quads_remaining;
+  int           i;
+  char         *load_ptr, *store_ptr;
+
+  SPI_assert( f_ptr != NULL );
+  SPI_assert( desc  != NULL );
+  SPI_assert( num_desc > 0  );
+
+  /* Get the free space in the fifo using the shadow value */
+  free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+				     0, /* Use shadow head */
+				     0);/* Use shadow tail */
+
+  total_space_needed_in_quads = num_desc *
+                                  DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+
+  /*
+   * If after injecting all descriptors (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS
+   * per descriptor is the amount we are going to inject), there is still at
+   * least the minimum allowable free space left in the fifo, go ahead and
+   * inject.  We want at least DMA_MIN_INJECT_SIZE_IN_QUADS free space
+   * after injection.
+   *
+   * Otherwise, read the hardware head pointer and recalculate the free space,
+   * and check again.
+   *
+   * If there is still not enough room in the fifo for any descriptors,
+   * return 0.  Otherwise, continue and inject as many descriptors as possible.
+   *
+   */
+  if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+                      total_space_needed_in_quads )
+    {
+      free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+				         1,  /* Use hardware head */
+					 0); /* Use shadow tail   */
+
+      if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+	                  DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+    }
+
+  /*
+   * We have enough room in the fifo for at least some descriptors.
+   * Copy the descriptors (as many as will fit) to the current va_tail of the
+   * fifo.
+   * Msync to ensure the descriptor has been written to memory and the L1 caches
+   * are in sync.
+   * Move the tail past the descriptor so the DMA knows the descriptor is there.
+   *   - handle wrapping
+   *   - update free space
+   *
+   */
+  va_tail             = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+  va_start            = DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo );
+  va_end              = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+  target              = (char*)va_tail;
+
+  if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + total_space_needed_in_quads ) {
+     num_quads_to_inject = free_space - DMA_MIN_INJECT_SIZE_IN_QUADS;
+  }
+  else {
+    num_quads_to_inject = total_space_needed_in_quads;
+  }
+  num_quads_remaining = num_quads_to_inject;
+  i                   = 0;
+
+  while ( num_quads_remaining > 0 )
+    {
+      SPI_assert( desc[i] != NULL );
+
+      if( k_use_fp_to_inject)
+    	  {
+	      if ( ( (unsigned)desc[i] & 0xFFFFFFF0 ) == (unsigned)desc[i] ) /* 16B aligned? */
+		{
+		  load_ptr  = (char*)desc[i];
+		  store_ptr = (char*)target;
+		  _bgp_QuadLoad ( load_ptr,     0 );
+		  _bgp_QuadLoad ( load_ptr+16,  1 );
+		  _bgp_QuadStore( store_ptr,    0 );
+		  _bgp_QuadStore( store_ptr+16, 1 );
+		}
+	      else
+		{
+		  memcpy( (char*)target,
+			  desc[i],
+			  DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+		}
+	  }
+      else
+	  {
+		  DMA_DescriptorToFifo(target,(char*)(desc[i])) ;
+	  }
+
+      i++;
+      num_quads_remaining -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+      target += DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES;
+      if ( target >= (char*)va_end )
+	target = (char*)va_start;
+    }
+
+   /*  _bgp_msync();  mbar good enough */
+  _bgp_mbar();
+
+  DMA_InjFifoIncrementTail( f_ptr,
+			    num_quads_to_inject );
+
+  return i; /* Success */
+
+}
+#endif
+
+/*!
+ * \brief Get DMA Injection Fifo Group Number
+ *
+ * Get the DMA Injection Fifo Group number, given an injection fifo group
+ * structure.
+ *
+ * \param[in]  fg_ptr       Pointer to the structure previously filled in when the
+ *                          group was allocated.
+ *
+ * \return  The DMA Injection Fifo Group number
+ *
+ */
+__INLINE__ int DMA_InjFifoGetGroupNum(
+				      const DMA_InjFifoGroup_t *fg_ptr
+				     )
+{
+  SPI_assert( fg_ptr != NULL );
+
+  return fg_ptr->group_id;
+}
+
+
+/*!
+ * \brief Get the "Not Empty" Status of an Injection Fifo Group
+ *
+ * Get the "Not Empty" status of the fifos that the specified fifo group has
+ * permission to use.
+ *
+ * \param[in]  fg_ptr     Pointer to the injection fifo group structure
+ *
+ * \retval  notEmptyStatus  A 32-bit value, one bit per fifo.
+ *                          Bit i is 1 if the specified fifo group has
+ *                          permission to use fifo i and fifo i is not
+ *                          empty.
+ *                          Bit i is 0 if the specified fifo group either
+ *                          does not have permission to use fifo i, or fifo i
+ *                          is empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetNotEmpty(
+					   DMA_InjFifoGroup_t *fg_ptr
+					  )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+
+  return ( fg_ptr->status_ptr->not_empty & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Get the "available" Status of an Injection Fifo Group
+ *
+ * Get the "available" status of the fifos that the specified fifo group has
+ * permission to use.  "available" means that the fifo is enabled and
+ * activated.
+ *
+ * \param[in]  fg_ptr     Pointer to the injection fifo group structure
+ *
+ * \retval  availableStatus  A 32-bit value, one bit per fifo.
+ *                           Bit i is 1 if the specified fifo group has
+ *                           permission to use fifo i and fifo i is available
+ *                           Bit i is 0 if the specified fifo group either
+ *                           does not have permission to use fifo i, or fifo i
+ *                           is not available.
+ *
+ * \note Normally, there should be a 1 in every position except those that
+ *       the specified fifo group does not have permission to use.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetAvailable(
+					    DMA_InjFifoGroup_t *fg_ptr
+					   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+
+  return ( fg_ptr->status_ptr->available & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Get the "threshold crossed" Status of an Injection Fifo Group
+ *
+ * Get the "threshold crossed" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in]  fg_ptr     Pointer to the injection fifo group structure
+ *
+ * \retval  thresholdCrossedStatus  A 32-bit value, one bit per fifo.
+ *                           Bit i is 1 if the specified fifo group has
+ *                           permission to use fifo i and fifo i has crossed
+ *                           a threshold.
+ *                           Bit i is 0 if the specified fifo group either
+ *                           does not have permission to use fifo i, or fifo i
+ *                           has not crossed a threshold.
+ *
+ * \note Normally, there should be a 0 in every position.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetThresholdCrossed(
+						   DMA_InjFifoGroup_t *fg_ptr
+						  )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+
+  return ( fg_ptr->status_ptr->threshold_crossed & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Set the "clear threshold crossed" Status of an Injection Fifo Group
+ *
+ * Set the "clear threshold crossed" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in]  fg_ptr  Pointer to the injection fifo group structure
+ * \param[in]  clr     A 32-bit value, one bit per fifo.
+ *                     Only bits that the specified fifo group has
+ *                     permission to use should be set to 1.
+ *                     Set bit i to 1 to clear the threshold crossed status
+ *                     of fifo i.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetClearThresholdCrossed(
+						    DMA_InjFifoGroup_t *fg_ptr,
+						    unsigned int      clr
+						   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (clr & fg_ptr->permissions) == clr );
+
+  fg_ptr->status_ptr->clear_threshold_crossed = clr;
+
+}
+
+
+/*!
+ * \brief Get the "activated" Status of an Injection Fifo Group
+ *
+ * Get the "activated" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in]  fg_ptr     Pointer to the injection fifo group structure
+ *
+ * \retval  activatedStatus  A 32-bit value, one bit per fifo.
+ *                           Bit i is 1 if the specified fifo group has
+ *                           permission to use fifo i and fifo i is activated
+ *                           Bit i is 0 if the specified fifo group either
+ *                           does not have permission to use fifo i, or fifo i
+ *                           is not activated.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetActivated(
+					    DMA_InjFifoGroup_t *fg_ptr
+					   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+
+  return ( fg_ptr->status_ptr->activated & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Set the "activate" Status of an Injection Fifo Group
+ *
+ * Set the "activate" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in]  fg_ptr  Pointer to the injection fifo group structure
+ * \param[in]  act     A 32-bit value, one bit per fifo.
+ *                     Only bits that the specified fifo group has
+ *                     permission to use should be set to 1.
+ *                     Set bit i to 1 to activate fifo i.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetActivate(
+				       DMA_InjFifoGroup_t *fg_ptr,
+				       unsigned int        act
+				      )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (act & fg_ptr->permissions) == act );
+
+  fg_ptr->status_ptr->activate = act;
+
+}
+
+
+/*!
+ * \brief Set the "deactivate" Status of an Injection Fifo Group
+ *
+ * Set the "deactivate" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in]  fg_ptr     Pointer to the injection fifo group structure
+ * \param[in]  deact      A 32-bit value, one bit per fifo.
+ *                        Only bits that the specified fifo group has
+ *                        permission to use should be set to 1.
+ *                        Set bit i to 1 to deactivate fifo i.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetDeactivate(
+					 DMA_InjFifoGroup_t *fg_ptr,
+					 unsigned int        deact
+				      )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (deact & fg_ptr->permissions) == deact );
+
+  fg_ptr->status_ptr->deactivate = deact;
+
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a fifo group and a fifo ID
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ *   end.
+ * - Compute fifo size and free space.
+ * - Initialize descriptor count.
+ * - Activate the fifo.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval   0  Successful.
+ * \retval  -1  Unsuccessful.  Error checks include
+ *              - va_start < va_end
+ *              - va_start <= va_head <=
+ *                  (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *              - va_start and va_end are 32-byte aligned
+ *              - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ *                                          DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+__INLINE__ int   DMA_InjFifoInitById(
+				     DMA_InjFifoGroup_t *fg_ptr,
+				     int                 fifo_id,
+				     void               *va_start,
+				     void               *va_head,
+				     void               *va_end
+				    )
+{
+  int rc;
+
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fifo_id >= 0 && fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP );
+  SPI_assert( (fg_ptr->permissions & _BN(fifo_id)) != 0 );
+  SPI_assert( va_start < va_end );
+  SPI_assert( va_start <= va_head );
+  SPI_assert( ((uint32_t) va_head)  <= ( ((uint32_t) va_end) - DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES) );
+  SPI_assert( ( ( (uint32_t) va_start) & 0xFFFFFFE0)  == (uint32_t) va_start );
+  SPI_assert( ( ( (uint32_t) va_end  ) & 0xFFFFFFE0)  == (uint32_t) va_end );
+  SPI_assert( ( (unsigned)va_end - (unsigned)va_start ) >=
+	   ( (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) * 16 ) );
+
+  /*
+   * Initialize the fifo by invoking a system call.  This system call
+   * deactivates the fifo, initializes the start, end, head, and tail,
+   * and activates the fifo.
+   */
+
+   rc = Kernel_InjFifoInitById(
+			       (uint32_t*)fg_ptr,
+			       fifo_id,
+			       (uint32_t*)va_start,
+			       (uint32_t*)va_head,
+			       (uint32_t*) va_end
+			      );
+
+  if (rc) return rc;
+
+  /* Initialize the descriptor count */
+  fg_ptr->fifos[fifo_id].desc_count = 0;
+
+  return 0;
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Start Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA injection fifo's start pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_start  The virtual address of the start of the fifo
+ *
+ */
+__INLINE__ void * DMA_InjFifoGetStartFromShadowById(
+						    DMA_InjFifoGroup_t *fg_ptr,
+						    int                 fifo_id
+						   )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Head Pointer Using a Fifo Id
+ *
+ * Get a DMA injection fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_head  The virtual address of the head of the fifo.
+ *
+ */
+__INLINE__ void * DMA_InjFifoGetHeadById(
+					 DMA_InjFifoGroup_t *fg_ptr,
+					 int                 fifo_id
+					)
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Get a DMA injection fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_tail  The virtual address of the tail of the fifo
+ *
+ */
+__INLINE__ void *DMA_InjFifoGetTailById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return   DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo End Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA injection fifo's end pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_end  The virtual address of the end of the fifo
+ *
+ */
+__INLINE__ void *DMA_InjFifoGetEndById(
+				       DMA_InjFifoGroup_t *fg_ptr,
+				       int                 fifo_id
+				      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return   DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Size Using a Fifo Id
+ *
+ * Get a DMA injection fifo's size, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  size   The size of the DMA fifo, in units of 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoGetSizeById(
+					       DMA_InjFifoGroup_t *fg_ptr,
+					       int                 fifo_id
+					      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return   DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Free Space Using a Fifo Id
+ *
+ * Get a DMA injection fifo's free space, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  read_head  Indicates whether to read the head from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware head
+ *                          - 0 means to use the current head shadow
+ * \param[in]  read_tail  Indicates whether to read the tail from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware tail
+ *                          - 0 means to use the current tail shadow
+ *
+ * \retval  freeSpace  The amount of free space in the fifo, in units of
+ *                     16B quads.
+ *
+ */
+__INLINE__ unsigned int  DMA_InjFifoGetFreeSpaceById(
+					 DMA_InjFifoGroup_t *fg_ptr,
+					 int                 fifo_id,
+					 unsigned int        read_head,
+					 unsigned int        read_tail
+					)
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo,
+			       read_head,
+			       read_tail );
+}
+
+
+/*!
+ * \brief Set DMA InjFifo Head Pointer Using a Fifo Id
+ *
+ * Set a DMA injection fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  va_head  The virtual address of the head of the fifo.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetHeadById(
+				       DMA_InjFifoGroup_t *fg_ptr,
+				       int                 fifo_id,
+				       void               *va_head
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoSetHead( &fg_ptr->fifos[fifo_id],
+		      va_head);
+}
+
+
+/*!
+ * \brief Set DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Set a DMA injection fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  va_tail  The virtual address of the tail of the fifo.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetTailById(
+				       DMA_InjFifoGroup_t *fg_ptr,
+				       int                 fifo_id,
+				       void               *va_tail
+				      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo,
+		   va_tail);
+}
+
+
+/*!
+ * \brief Increment DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Increment a DMA injection fifo's tail pointer, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  incr     The number of quads (16 byte units) to increment the
+ *                      tail pointer by.
+ *
+ * \return  None
+ *
+ * \note This function does not check if there is free space in the fifo
+ *       for this many quads.  It must be preceeded by a check of the
+ *       free space.
+*/
+__INLINE__ void DMA_InjFifoIncrementTailById(
+					     DMA_InjFifoGroup_t *fg_ptr,
+					     int                 fifo_id,
+					     unsigned int        incr
+					    )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoIncrementTail( &fg_ptr->fifos[fifo_id],
+			    incr);
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Descriptor Count Using a Fifo Id
+ *
+ * Get a DMA injection fifo's descriptor count, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ */
+__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCountById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return  DMA_InjFifoGetDescriptorCount( &fg_ptr->fifos[fifo_id] );
+}
+
+
+/*!
+ * \brief Is DMA Descriptor Done Using a Fifo Id
+ *
+ * Return whether a specified descriptor is still in the specified injection
+ * fifo (not done).  The descriptor is identified by the descriptor count
+ * immediately after the descriptor was injected into the fifo (returned by
+ * DMA_InjFifoIncrementTail().
+ *
+ * \param[in]  fg_ptr      Pointer to the fifo group structure
+ * \param[in]  fifo_id     Id of the fifo within the group
+ *                         (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  desc_count  The descriptor count immediately after the
+ *                         descriptor in question was injected into
+ *                         the fifo.
+ * \param[in]  update      0  Do not update the fifo's shadow information.
+ *                         1  Update the fifo's shadow information.
+ *                         It is a performance optimization to only update the
+ *                         shadow information once for a group of descriptors
+ *                         being processed.
+ *
+ * \retval  0  False.  The descriptor identified by desc_count is not done.
+ *                     It is still in the fifo.
+ * \retval  1  True.   The descriptor identified by desc_count is done.
+ *                     It is no longer in the fifo.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoIsDescriptorDoneById(
+					    DMA_InjFifoGroup_t *fg_ptr,
+					    int                 fifo_id,
+					    unsigned long long  desc_count,
+					    unsigned int        update
+					   )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return(DMA_InjFifoIsDescriptorDone( &fg_ptr->fifos[fifo_id],
+				      desc_count,
+				      update ) );
+
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Reserve Descriptor Storage Using a Fifo Id
+ *
+ * Reserve storage in a DMA injection fifo for a remote get descriptor, given
+ * a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0   Successful.  There was enough space in the fifo and the
+ *              storage was reserved.
+ * \retval -1   Unsuccessful.  There was not enough space in the fifo.
+ *
+ * \note Internally, this increments the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ int DMA_InjFifoReserveDescriptorStorageById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return ( DMA_InjFifoReserveDescriptorStorage( &fg_ptr->fifos[fifo_id] ) );
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Free Descriptor Storage Reservation Using a Fifo Id
+ *
+ * Free a reservation for storage for a remote get descriptor in a DMA injection
+ * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById().
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ * \note Internally, this decrements the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservationById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+						)
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoFreeDescriptorStorageReservation( &fg_ptr->fifos[fifo_id] );
+  return;
+}
+
+
+/*!
+ * \brief Check If An Injection Fifo Has Space For Injection Using a Fifo Id
+ *
+ * Check if an injection fifo has enough space for a single descriptor to be
+ * injected, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  hasSpace  An indicator of whether the fifo has space for a
+ *                    descriptor.
+ *                    - 0 (false) means the fifo is full.
+ *                    - 1 (true)  means the fifo has space.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoHasSpaceById(
+						DMA_InjFifoGroup_t    *fg_ptr,
+						int                    fifo_id
+					       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return  DMA_InjFifoHasSpace( &fg_ptr->fifos[fifo_id] );
+}
+
+
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for
+ *        Space Using a Fifo Id
+ *
+ * Inject a descriptor into a DMA injection fifo, given a fifo group and
+ * fifo id, without checking to see if there is enough space in the fifo.
+ * It is assumed that the caller has already checked for enough space using
+ * the DMA_InjFifoHasSpaceById() function.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  desc   A pointer to the descriptor to be injected.
+ *                    Must be 16-byte aligned.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - 1 means it was successfully injected.
+ *
+ * \see DMA_InjFifoHasSpaceById()
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheckById(
+					DMA_InjFifoGroup_t    *fg_ptr,
+					int                    fifo_id,
+					DMA_InjDescriptor_t   *desc
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return  DMA_InjFifoInjectDescriptorNoSpaceCheck( &fg_ptr->fifos[fifo_id],
+						   desc );
+}
+
+
+/*!
+ * \brief Inject Descriptor into a DMA InjFifo Using a Fifo Id
+ *
+ * Inject a descriptor into a DMA injection fifo, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  desc     Pointer to the descriptor to be injected into the fifo.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - 0 means it was not injected, most likely because
+ *                             the fifo is full.
+ *                           - 1 means it was successfully injected
+ *
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorById(
+					DMA_InjFifoGroup_t    *fg_ptr,
+					int                    fifo_id,
+					DMA_InjDescriptor_t   *desc
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return  DMA_InjFifoInjectDescriptor( &fg_ptr->fifos[fifo_id],
+				       desc );
+}
+
+
+/*!
+ * \brief Inject Multiple Descriptors into a DMA InjFifo Using a Fifo Id
+ *
+ * Inject multiple descriptors into a DMA injection fifo, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr    Pointer to the fifo group structure
+ * \param[in]  fifo_id   Id of the fifo within the group
+ *                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  num_desc  Number of descriptors to be injected
+ * \param[in]  desc      Pointer to an array of pointers to the descriptors to
+ *                       be injected into the fifo.
+ *
+ * \retval  numDescInjected  The number of descriptors injected.
+ *                           - less than num_desc means that some were not
+ *                             injected, most likely because the fifo is full.
+ *                           - equal to num_desc means that all were
+ *                             successfully injected.
+ *
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorsById(
+					DMA_InjFifoGroup_t     *fg_ptr,
+                                        int                     fifo_id,
+                                        int                     num_desc,
+                                        DMA_InjDescriptor_t   **desc
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return  DMA_InjFifoInjectDescriptors ( &fg_ptr->fifos[fifo_id],
+					 num_desc,
+					 desc );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Not Empty Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's not empty status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  32-bit status.  status bit "fifo_id" is 1 if the
+ *          fifo is not empty, 0 if empty.  That is, the return value
+ *          is 0 if empty, non-zero if not empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetNotEmptyById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return ( DMA_InjFifoGetNotEmpty( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Available Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's available status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  32-bit status.  status bit fifo_id is 1 if the
+ *          fifo is available, 0 if empty.
+ *
+ * \note  "available" status means the fifo is enabled and activated.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetAvailableById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return ( DMA_InjFifoGetAvailable( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  32-bit status.  status bit fifo_id is 1 if the
+ *          fifo's threshold has been crossed, 0 if not.
+ *
+ * \note  This will always be zero.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetThresholdCrossedById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return ( DMA_InjFifoGetThresholdCrossed( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Clear DMA InjFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Clear a DMA injection fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetClearThresholdCrossedById(
+					DMA_InjFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoSetClearThresholdCrossed( fg_ptr,
+				       _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Activated Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's activated status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  32-bit status.  status bit fifo_id is 1 if the
+ *          fifo is activated, 0 if empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetActivatedById(
+				DMA_InjFifoGroup_t *fg_ptr,
+				int                 fifo_id
+			       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  return ( DMA_InjFifoGetActivated( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Activate DMA InjFifo Using a Fifo Id
+ *
+ * Activate a DMA injection fifo, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetActivateById(
+				DMA_InjFifoGroup_t *fg_ptr,
+				int                 fifo_id
+			       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoSetActivate( fg_ptr,
+			  _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Deactivate DMA InjFifo Using a Fifo Id
+ *
+ * Deactivate a DMA injection fifo, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetDeactivateById(
+					     DMA_InjFifoGroup_t *fg_ptr,
+					     int                 fifo_id
+					    )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+  DMA_InjFifoSetDeactivate( fg_ptr,
+			    _BN(fifo_id) );
+}
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Packet.h b/arch/powerpc/include/spi/DMA_Packet.h
new file mode 100644
index 0000000..f7649c3
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Packet.h
@@ -0,0 +1,347 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef	_DMA_PACKET_H_ /* Prevent multiple inclusion */
+#define	_DMA_PACKET_H_
+
+
+/*!
+ * \file spi/DMA_Packet.h
+ *
+ * \brief DMA SPI Packet Definitions
+ *
+ */
+
+
+#include <common/namespace.h>
+
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the X plus direction.
+ */
+#define DMA_PACKET_HINT_XP  (0x20)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the X minus direction.
+ */
+#define DMA_PACKET_HINT_XM  (0x10)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Y plus direction.
+ */
+#define DMA_PACKET_HINT_YP  (0x08)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Y minus direction.
+ */
+#define DMA_PACKET_HINT_YM  (0x04)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Z plus direction.
+ */
+#define DMA_PACKET_HINT_ZP  (0x02)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Z minus direction.
+ */
+#define DMA_PACKET_HINT_ZM  (0x01)
+
+
+/*!
+ * \brief Virtual Channel Bits:  Dynamic 0.
+ */
+#define DMA_PACKET_VC_D0 (0)
+
+
+/*!
+ * \brief Virtual Channel Bits:  Dynamic 1.
+ */
+#define DMA_PACKET_VC_D1 (1)
+
+
+/*!
+ * \brief Virtual Channel Bits:  Deterministic Bubble Normal.
+ */
+#define DMA_PACKET_VC_BN (2)
+
+
+/*!
+ * \brief Virtual Channel Bits:  Deterministic Bubble Priority.
+ */
+#define DMA_PACKET_VC_BP (3)
+
+
+/*!
+ * \brief Dynamic Routing Bit:  Follows deterministic Routing.
+ */
+#define DMA_PACKET_DETERMINSTIC (0)
+
+
+/*!
+ * \brief Dynamic Routing Bit:  Follows dynamic Routing.
+ */
+#define DMA_PACKET_DYNAMIC      (1)
+
+
+/*!
+ * \brief Torus Hardware Packet Header Constants for Routing: Point to Point.
+ */
+#define DMA_PACKET_POINT2POINT  (0)
+
+
+/*!
+ * \brief Torus Hardware Packet Header Constants for Routing: Class.
+ */
+#define DMA_PACKET_CLASS        (1)
+
+
+/*!
+ * \brief Torus DMA Hardware Packet Header.
+ *
+ * There are two sections of the packet header:  The hardware header
+ * and the software header.
+ *
+ * The same 8-byte hardware header as was used on Blue Gene/L is used
+ * for Blue Gene/P, except that two bits that were previously unused
+ * will be used as follows:
+ *
+ * - The Pid bit on Blue Gene/L indicates the logical destination group.
+ *   This determines the reception fifo group a packet ends up in.
+ *   This bit is now called Pid0.  The new Pid1 bit expands the logical
+ *   destination group from two to four.  This corresponds to the increase
+ *   in cores from two to four.
+ *
+ * - The new Dm bit indicates the DMA mode:  Memory fifo or direct.
+ *   In memory fifo mode, the DMA receives packets from the torus fifos into
+ *   reception fifos located in memory.  Then the core copies the payload
+ *   from the memory fifo to its final destination.  In direct mode, the DMA
+ *   moves the packet payload directly from the torus fifos to its final
+ *   destination.
+ *
+ * The 8-byte software header was used by the software on Blue Gene/L for
+ * its own purposes.  On Blue Gene/P, parts of it are used by the DMA,
+ * depending on the type of DMA transfer being used.  The usage of the fields
+ * in the software header is as follows for the typical types of DMA transfers:
+ *
+ * - In memory fifo mode,
+ *   - The first 4 bytes of the software header contain the "put offset".
+ *     This is the offset from the injection counter's base address, in bytes,
+ *     of the memory being transferred in this packet.
+ *   - The last 4 bytes of the software header is for use by software.
+ *
+ * - In direct put mode,
+ *   - The first 4 bytes of the software header contain the "put offset".
+ *     This is the offset from the reception counter's base address, in bytes,
+ *     of the memory where the payload in this packet is placed.
+ *   - The fifth byte of the software header is the reception counter Id.
+ *   - The sixth byte of the software header is the number of valid bytes of
+ *     payload in this packet.
+ *   - The seventh byte of the software header contains DMA flags. Specifically,
+ *     the remote-get flag is 0.
+ *   - The last byte of the software header is for use by software.
+ *
+ * - In remote get mode, the payload contains one or more injection descriptors
+ *   describing data to be transferred by the DMA.  When the DMA receives this
+ *   packet, it injects the descriptors into injection fifos to perform the
+ *   specified data transfer.
+ *   - The first 5 bytes of the software header are for use by software.
+ *   - The sixth byte of the software header is the number of valid bytes of
+ *     payload in this packet.  This will be a multiple of 32, since the payload
+ *     consists of one or more 32 byte DMA descriptors.
+ *   - The seventh byte of the software header contains DMA flags. Specifically,
+ *     the remote-get flag is 1.
+ *   - The eighth byte of the software header is the injection fifo Id where
+ *     the descriptors in the payload will be injected.
+ *
+ */
+typedef struct DMA_PacketHeader_t
+{
+  union {
+      unsigned word0;             /*!< First 4 bytes of packet header.        */
+
+      struct {
+	unsigned CSum_Skip : 7;   /*!< Number of 2 byte units to skip from
+				       the top of a packet before including
+                                       the packet bytes into the running
+                                       checksum of the torus injection fifo
+                                       where this packet is injected.
+				  */
+
+	  unsigned Sk        : 1; /*!< Torus injection checksum skip packet
+	                               bit.
+	                               - 0 includes the packet (excluding the
+                                         portion designated by DMA_CSUM_SKIP)
+                                         in the checksum.
+                                       - 1 excludes the entire packet from
+                                         the checksum.
+				  */
+
+	  unsigned Hint      : 6; /*!< Hint bits for torus routing (6 bits).
+	                               Each bit corresponds to x+, x-, y+, y-,
+	                               z+, z-.  If a bit is set, it indicates
+                                       that the packet wants to travel along
+                                       the corresponding direction.  If all
+                                       bits are zero, the hardware calculates
+                                       the hint bits.  Both x+ and x- cannot
+                                       be set at the same time...same with y
+                                       and z.
+				  */
+
+	  unsigned Dp        : 1; /*!< Deposit Bit for Class Routed MultiCast.
+	                               If this bit is set to 1, then as the
+                                       packet travels along a straight line
+                                       to its destination, it also deposits
+	                               a copy of itself into each node as it
+                                       goes through.  This feature must be
+                                       used only if the packet is set to
+	                               travel along a straight line.
+				  */
+
+	  unsigned Pid0      : 1; /*!< Destination Fifo Group Most Significant
+                                       Bit.  (Pid0,Pid1) specifies which of 4
+                                       reception fifo groups that this packet
+                                       is destined for.
+				  */
+
+	  unsigned Chunks    : 3; /*!< Size in Chunks of 32B (0 for 1 chunk,
+                                       ... , 7 for 8 chunks).
+				  */
+
+          unsigned Pid1      : 1; /*!< Destination Fifo Group Least
+                                       significant bit.  Refer to Pid0.
+				  */
+
+  	  unsigned Dm        : 1; /*!< 1=DMA Mode, 0=Fifo Mode.               */
+
+	  unsigned Dynamic   : 1; /*!< 1=Dynamic Routing,
+				       0=Deterministic Routing.
+				  */
+
+	  unsigned VC        : 2; /*!< Virtual Channel
+        	                       - 0=Dynamic 0
+                                       - 1=Dynamic 1
+                                       - 2=Deterministic Bubble Normal
+                                       - 3=Deterministic Bubble Priority
+				  */
+
+	  unsigned X         : 8; /*!< Destination X Physical Coordinate.     */
+
+      }; /* End of individual fields in Word 0 */
+
+  }; /* End of Word 0 */
+
+
+  union {
+
+        unsigned word1;           /*!< Second 4 bytes of packet header.       */
+
+        struct {
+	  unsigned Y         : 8; /*!< Destination Y Physical Coordinate.     */
+
+	  unsigned Z         : 8; /*!< Destination Z Physical Coordinate.     */
+
+	  unsigned Resvd0    : 8; /*!< Reserved (pkt crc).                    */
+
+	  unsigned Resvd1    : 8; /*!< Reserved (pkt crc).                    */
+
+        }; /* End of individual fields in Word 1 */
+
+  }; /* End of Word 1 */
+
+
+  union {
+
+    unsigned word2;               /*!< Third 4 bytes of packet header.        */
+
+    unsigned Put_Offset;          /*!< For a memory fifo packet, gives a
+                                       unique ID to each packet in a long
+                                       message.  Derived from the put offset
+                                       of the torus packet header in the
+                                       descriptor, and updated for each
+                                       packet.
+                                       For a direct-put packet, the rDMA
+                                       writes the first payload byte to this
+                                       offset plus the base address
+                                       corresponding to the rDMA counter ID.
+				  */
+
+    unsigned Single_Packet_Parameter; /*!< For a single memory fifo packet,
+					   this is essentially unused space
+					   that can be used to pass a
+					   parameter to the target node.
+				      */
+  }; /* End of Word 2 */
+
+
+  union {
+
+      unsigned word3;             /*!< Fourth 4 bytes of packet header.       */
+
+      struct {
+	unsigned rDMA_Counter  : 8; /*!< For a direct-put packet, this is the
+                                         number of the rDMA counter associated
+	                                 with this packet.
+				    */
+
+	unsigned Payload_Bytes : 8; /*!< For a direct-put packet, this is the
+                                         number of valid bytes in the payload.
+                                         This is set by the iDMA, based on the
+                 	                 message length in the injection
+                                         descriptor.
+				    */
+
+	unsigned Flags         : 8; /*!< Flags[6]=Pacing, Flags[7]=Remote-Get.*/
+
+	unsigned iDMA_Fifo_ID  : 8; /*!< For a remote-get packet, this is the
+	                                 iDMA fifo ID to be injected during
+	                                 remote-get processing.
+				    */
+      };
+
+      struct {                      /*   For memory fifo packets...           */
+
+	unsigned SW_Arg   : 24;     /*!< User-defined.                        */
+
+	unsigned Func_Id  : 8 ;     /*!< Function ID for dispatching receiver
+                                         functions from Polling reception
+	                                 fifos.
+				    */
+      };
+
+  }; /* End of Word 3 */
+
+}
+ALIGN_QUADWORD DMA_PacketHeader_t;
+
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_RecFifo.h b/arch/powerpc/include/spi/DMA_RecFifo.h
new file mode 100644
index 0000000..20c8b34
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_RecFifo.h
@@ -0,0 +1,1810 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _DMA_RECFIFO_H_ /* Prevent multiple inclusion */
+#define _DMA_RECFIFO_H_
+
+
+/*!
+ * \file spi/DMA_RecFifo.h
+ *
+ * \brief DMA SPI Reception Fifo Definitions and Inline Functions
+ *
+ * There are
+ * - 6 normal-priority torus hardware fifos
+ * - 1 high-priority torus hardware fifo
+ * - 1 local memcpy reception fifo
+ * If we assigned a reception fifo to each, there would be 8.  These are called
+ * "normal reception fifos".
+ *
+ * There is one reception fifo used to store packet headers only (for debug).
+ * This is called the "header reception fifo".
+ *
+ * The hardware packet header's (Pid0, Pid1) bits specify up to four processors.
+ * There is one set of "normal" and one "header" reception fifos per processor,
+ * called a "reception fifo group".  Thus, there are 4 groups.
+ *
+ */
+
+
+#include <common/namespace.h>
+/* #include <common/bgp_ras.h> */
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ *   placed in the translation unit (eg. xxx.c)for use within that unit.
+ *   The function is not externalized for use by another unit...we want this
+ *   so we don't end up with multiple units exporting the same function,
+ *   which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+   \verbatim
+   #ifndef INLINE
+   # define INLINE extern inline
+   #endif
+   INLINE int max(int a, int b) {
+     return a > b ? a : b;
+   }
+   \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+   \verbatim
+   #define INLINE
+   #include "header.h"
+   \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_Assert.h>
+#include <spi/DMA_Packet.h>
+
+
+
+
+/*!
+ * \brief Number of Normal (non-header) Reception Fifos Per Group
+ *
+ * These will have fifo IDs 0 through 7 in a group.
+ *
+ */
+#define  DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP 8
+
+
+/*!
+ * \brief Number of Header Reception Fifos Per Group
+ *
+ */
+#define  DMA_NUM_HEADER_REC_FIFOS_PER_GROUP 1
+
+
+/*!
+ * \brief Fifo ID of the Header Reception Fifo in a group.
+ *
+ * This will be fifo ID 8 in a group.
+ */
+#define  DMA_HEADER_REC_FIFO_ID  (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Number of Reception Fifos Per Group
+ *
+ * 8 Normal + 1 Header
+ */
+#define  DMA_NUM_REC_FIFOS_PER_GROUP (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP + \
+                                      DMA_NUM_HEADER_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Number of Reception Fifo Groups
+ *
+ * One group for each processor, identified by (Pid0,Pid1) in the packet header.
+ */
+#define  DMA_NUM_REC_FIFO_GROUPS 4
+
+
+/*!
+ * \brief Total Number of Normal Reception Fifos
+ */
+#define  DMA_NUM_NORMAL_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \
+                                   DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP)
+
+/*!
+ * \brief Total Number of Header Reception Fifos
+ */
+#define  DMA_NUM_HEADER_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \
+                                   DMA_NUM_HEADER_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief The maximum number of packets that can be processed by a polling
+ *        function before it must update the fifo's hardware head.  The
+ *        polling function can keep track of the head in the va_head shadow
+ *        and only update the hardware head when this limit is reached to
+ *        reduce overhead.
+ */
+#define  DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD 32
+
+
+/*!
+ * \brief Minimum Reception Fifo Size in bytes
+ *
+ * It is important that this size be enough to hold more packets than
+ * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD.  Otherwise, the polling
+ * function may deadlock with the DMA (the DMA considers the fifo full,
+ * but we have actually processed all of the packets).
+ * Additionally, we add 512 bytes to this, since the DMA will only fill
+ * the fifo to fifoSize - 512 bytes.
+ *
+ *
+ */
+#define DMA_MIN_REC_FIFO_SIZE_IN_BYTES (512 + (256 * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD))
+
+/*!
+ * \brief DMA Reception Fifo Map Structure
+ *
+ * This structure defines the basic reception fifo configuration.
+ * It is common across all reception fifo groups.
+ *
+ * Example 1:
+ * In SMP mode you might have only two reception fifos:
+ * - One for normal-priority torus and local transfers, and
+ * - one for high-priority torus transfers.
+ * In this case, only one fifo group would be needed.
+ *
+ * Example 2:
+ * In virtual node mode, you might have two reception fifos per group (as
+ * described in Example 1), and 4 groups, one for each processor.
+ * All packets with the same (pid0,pid1) bits use the same group.
+*/
+typedef struct DMA_RecFifoMap_t
+{
+  unsigned short int save_headers; /*!< Flag that specifies whether header
+                                        will be used to store packet headers.
+                                        - 0 => Do not store headers
+                                        - 1 => Store headers (debug mode)     */
+
+  unsigned int fifo_types[ DMA_NUM_NORMAL_REC_FIFOS ]; /*!< The type of each
+                                        normal rec fifo.  If entry i is
+                                        - 0, rec fifo i is type 0
+                                        - 1, rec fifo i is type 1
+
+                                        For type i fifo, threshold interrupt
+                                        fires if fifo free space <=
+                                        threshold[i], in units of 16B quads.  */
+
+  unsigned int hdr_fifo_types[ DMA_NUM_HEADER_REC_FIFOS ]; /*!< The type of
+                                        each header reception fifo.  If entry
+                                        i is
+                                        - 0, header rec fifo i is type 0
+                                        - 1, header rec fifo i is type 1
+
+                                        For type i fifo, threshold interrupt
+                                        fires if fifo free space <=
+                                        threshold[i], in units of 16B quads.  */
+
+  unsigned int threshold[2];       /*!< For type i fifo, threshold interrupt
+                                        fires if fifo free space <=
+                                        threshold[i], in units of 16B quads.  */
+
+  unsigned char ts_rec_map[4][8];  /*!< Torus Reception Map.
+              This array contains the rec fifo ID into which torus
+              packets are deposited that originate from
+              - each hardware torus fifo (x+,x-,y+,y-,z+,z-) (0 through 5)
+              - high-priority hardware torus fifo (6)
+              - a local transfer (7)
+
+              for each group (0 through 3).
+
+              For ts_rec_map[i][j],
+              i is the rec fifo group ID, as defined by (pid0,pid1) pair.
+              j identifies the hardware torus fifo (0-5 for
+              x+,x-,y+,y-,z+,z- repectively), high-priority
+              torus fifo (6), and local transfer (7).
+              The value in each arrary element must be a global fifo ID
+              (between 0 and DMA_NUM_NORMAL_REC_FIFOS-1).  That is, the value
+              identifies the normal rec fifo that will receive packets
+              originating at i,j.
+              Note that the global fifo ID must be 0-7 for group 0,
+              8-15 for group 1, 16-23 for group 2, and 24-31 for group 3.
+
+              This affects DCRS 0xd60 to 0xd67 as defined by the following
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY  (_BGP_DCR_DMA+0x60)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL (_BGP_DCR_DMA+0x61)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY  (_BGP_DCR_DMA+0x62)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL (_BGP_DCR_DMA+0x63)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY  (_BGP_DCR_DMA+0x64)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL (_BGP_DCR_DMA+0x65)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY  (_BGP_DCR_DMA+0x66)
+              - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL (_BGP_DCR_DMA+0x67)
+
+              e.g., for (pid0,pid1) = (0,1)
+              - ts_rec_map[1][0] = fifo id for torus x+ receiver packets
+              - ts_rec_map[1][1] = fifo id for torus x- receiver packets
+              - ts_rec_map[1][2] = fifo id for torus y+ receiver packets
+              - ts_rec_map[1][3] = fifo id for torus y- receiver packets
+              - ts_rec_map[1][4] = fifo id for torus z+ receiver packets
+              - ts_rec_map[1][5] = fifo id for torus z- receiver packets
+              - ts_rec_map[1][6] = fifo id for torus high priority packets
+              - ts_rec_map[1][7] = fifo id for local transfer packets         */
+
+} DMA_RecFifoMap_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Status Structure
+ *
+ * Defines the DMA SRAM Status Area for Reception Fifos
+ *
+ */
+typedef struct DMA_RecFifoStatus_t
+{
+  volatile unsigned  not_empty[2]; /*!< R bit mask, 1 bit/FIFO:
+                                        Reception FIFO not empty status.
+                                        Not_empty[0] contains 1 bit for each
+                                        of the 32 normal fifos.
+                                        Each bit corresponds to a
+                                        global fifo ID.
+                                        Not_empty[1] :
+                                        - bit 7  for group 0 header fifo,
+                                        - bit 15 for group 1 header fifo,
+                                        - bit 23 for group 2 header fifo,
+                                        - bit 31 for group 3 header fifo.     */
+
+  volatile unsigned  available[2]; /*!< R bitmask, 1 bit/FIFO:
+                                        Reception FIFO available status.
+                                        Bits are as above for available[0]
+                                        and available[1].                     */
+
+  volatile unsigned  threshold_crossed[2]; /*!< R bitmask, 1 bit/FIFO:
+                                                Threshold crossed status.
+                                                Bits are as above for
+                                                threshhold_crossed[0] and
+                                                threshhold_crossed[1].        */
+
+  volatile unsigned  clear_threshold_crossed[2]; /*!< W bitmask, 1 bit/FIFO:
+                                        Clear Threshold Crossed Status.
+                                        Bits are as above for
+                                        clear_threshold_crossed[0] and
+                                        clear_threshold_crossed[1].           */
+}
+DMA_RecFifoStatus_t;
+
+
+/*!
+ * \brief Returns the word number that the specified reception fifo is in
+ *
+ * \param[in]  global_rec_fifo_id  The global ID of the reception fifo
+ *                                 (0 to DMA_NUM_REC_FIFOS-1).
+ *
+ * \return The number of the word that the specified fifo is in (0 or 1).
+ *
+ * Used as an index in the "not_empty", "available", "threshold_crossed", and
+ * "clear_threshold_crossed" fields of the DMA_RecFifoStatus_t structure.
+ *
+ */
+#define DMA_REC_FIFO_GROUP_WORD_ID(global_rec_fifo_id) \
+                                      ( ((global_rec_fifo_id)>>5) & _BN(31) )
+
+
+/*!
+ * \brief Reception DMA Fifo Structure
+ *
+ * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h)
+ * and other fields that are specific to a reception fifo used by software.
+ */
+typedef struct DMA_RecFifo_t
+{
+  DMA_Fifo_t         dma_fifo;       /*!< Common software fifo structure      */
+
+  unsigned char      global_fifo_id; /*!< Global fifo ID:
+                                          - 0 to DMA_NUM_NORMAL_REC_FIFOS-1
+                                            for normal fifos,
+                                          - 32-35 for header fifos.           */
+  /*!
+   * \note The following field contains info about the fifo that reflects the
+   *       DCR values configuring the fifo.
+   */
+
+  unsigned char type;                /*!< 0 or 1 for type of fifo.            */
+
+  /*!
+   * \note The following field is used by the reception fifo polling functions.
+   *       It counts the number of packets processed since the fifo's hardware
+   *       head was last updated.  When DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD
+   *       is reached, the hardware head is moved and this counter is reset
+   *       to zero.  This helps to minimize the number of times the hardware
+   *       head is updated, which can be an expensive operation.
+   */
+  unsigned int  num_packets_processed_since_moving_fifo_head; /*!< Tracks when
+								to move the
+								fifo head.    */
+}
+DMA_RecFifo_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Group Structure
+ *
+ * This structure defines a DMA Reception Fifo Group.  It points to a
+ * Reception Fifo Status structure, and contains DMA_NUM_REC_FIFOS_PER_GROUP
+ * Reception Fifo structures.
+ *
+ * It is returned from the DMA_RecFifoGetFifoGroup system call wrapper function
+ * defined in this header file.  This same structure must be used by all users
+ * of reception fifos in this group because the fifos will contain packets
+ * destined for these different users, and this structure contains shadows of
+ * the hardware fifos in the DMA SRAM that must be maintained as the fifos
+ * change.  This common structure is located in static storage
+ * declared in DMA_RecFifo.c.
+ *
+ */
+typedef struct DMA_RecFifoGroup_t
+{
+  int group_id;         /*!< Group ID (0 through DMA_NUM_REC_FIFO_GROUPS-1)   */
+
+  int num_normal_fifos; /*!< Number of normal fifos used in this group
+                             (0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP)   */
+
+  int num_hdr_fifos;    /*!< Number of header fifos used in this group
+                             - 0 - headers not being saved,
+                             - 1 - headers being saved.                       */
+
+  unsigned  mask;       /*!< All reads to the status for this group are
+                             masked by this, so you only see results for
+                             this group.
+                             - Group 0: 0xFF000000
+                             - Group 1: 0x00FF0000
+                             - Group 2: 0x0000FF00
+                             - Group 3: 0x000000FF                            */
+
+  void *unused1;        /*!< Unused space                                     */
+
+  DMA_RecFifoStatus_t *status_ptr; /*!< Pointer to the status, in DMA SRAM.   */
+
+  DMA_RecFifo_t        fifos[DMA_NUM_REC_FIFOS_PER_GROUP]; /*!< Rec Fifos.
+                  Indexes 0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1 are
+                  the normal fifos in the group.
+                  Index DMA_HEADER_REC_FIFO_ID is the header fifo in the
+                  group.
+                  Note:  fifos[0] may not be local fifo number 0 in the group.*/
+}
+ALIGN_L1D_CACHE  DMA_RecFifoGroup_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Receive Function Prototype
+ *
+ * Receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID
+ * to the function.  When the polling functions process a packet in a
+ * reception fifo, the appropriate receive function for that packet is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.
+ *
+ * \param[in]  f_ptr           Pointer to the reception fifo.
+ * \param[in]  packet_ptr      Pointer to the packet header (== va_head).
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  recv_func_parm  Pointer to storage specific to this receive
+ *                             function.  This pointer was specified when the
+ *                             receive function was registered with the kernel,
+ *                             and is passed to the receive function
+ *                             unchanged.
+ * \param[in]  payload_ptr     Pointer to the beginning of the payload.
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  payload_bytes   Number of bytes in the payload.  Note that this
+ *                             may be larger than the number of valid bytes
+ *                             in the payload because it is rounded such that
+ *                             it (payload_bytes) + 16 (size of packet header)
+ *                             is a multiple of 32 bytes.  Thus, if the size
+ *                             of the message is 64, payload_bytes is 80 such
+ *                             that the total packet size is 96, a multiple of
+ *                             32.
+ *
+ * \retval   0                 No errors found while processing the payload.
+ * \retval   negative_number   Errors found while processing the payload.
+ */
+typedef int  (*DMA_RecFifoRecvFunction_t)(
+					  DMA_RecFifo_t      *f_ptr,
+					  DMA_PacketHeader_t *packet_ptr,
+					  void               *recv_func_parm,
+					  char               *payload_ptr,
+					  int                 payload_bytes
+					 );
+
+
+/*!
+ * \brief DMA Reception Fifo Default Error Receive Function
+ *
+ * This is the default function that will handle packets having an
+ * unregistered registration ID.
+ *
+ * Receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID
+ * to the function.  When the polling functions process a packet in a
+ * reception fifo that has a registration ID that does not have a corresponding
+ * receive function registered, this error receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.
+ *
+ * \param[in]  f_ptr           Pointer to the reception fifo.
+ * \param[in]  packet_ptr      Pointer to the packet header (== va_head).
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  recv_func_parm  Pointer to storage specific to this receive
+ *                             function.  This pointer was specified when the
+ *                             receive function was registered with the kernel,
+ *                             and is passed to the receive function
+ *                             unchanged.
+ * \param[in]  payload_ptr     Pointer to the beginning of the payload.
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  payload_bytes   Number of bytes in the payload
+ *
+ * \retval  -1  An unregistered packet was just processed.  This is considered
+ *              an error.
+ */
+int  DMA_RecFifoDefaultErrorRecvFunction(
+					 DMA_RecFifo_t      *f_ptr,
+					 DMA_PacketHeader_t *packet_ptr,
+					 void               *recv_func_parm,
+					 char               *payload_ptr,
+					 int                 payload_bytes
+					);
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ *   DMA reception fifos that are to receive the packets from those hardware
+ *   torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ *   local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ *   DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in]  rec_map  Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h.
+ *                       _bgp_err_dma_rfifo_map_twice means the mapping has
+ *                       already been set.
+ *
+ * \note  This function should be called once per job, after DMA_ResetRelease().
+ *        It may be called by any core, but once a core has called it, other
+ *        calls by that same core or any other core will fail.
+ *
+ * \note  During job init, the kernel sets up the DCR clear masks for each
+ *        reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ *        a fifo in group g only clears group g.
+ *
+ */
+__INLINE__ int DMA_RecFifoSetMap(
+				 DMA_RecFifoMap_t * rec_map
+				)
+{
+	int rc;
+	rc = Kernel_RecFifoSetMap((uint32_t*)rec_map);
+	return rc;
+}
+
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out]  rec_map  A pointer to a Reception Fifo Map structure
+ *                          that will be filled-in upon return.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int DMA_RecFifoGetMap(
+				 DMA_RecFifoMap_t  *rec_map
+				)
+{
+	int rc;
+	rc = Kernel_RecFifoGetMap((uint32_t*)rec_map);
+	return rc;
+}
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs.  If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed).  In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in]  grp      The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in]  target   The core that will receive the interrupt when a
+ *                      fifo in this group reaches its threshold
+ *                      (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ *                      Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler  A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             normal fifo in this group reaches its threshold.
+ *                             This function must be coded to take 4 uint32_t
+ *                             parameters:
+ *                             - A pointer to storage specific to this
+ *                               handler.  This is the normal_handler_parm
+ *                               specified on this function call.
+ *                             - 3 uint32_t parameters that are not used.
+ *                             If normal_handler is NULL, threshold interrupts
+ *                             are not delivered for normal fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler_parm   A pointer to storage that should be passed
+ *                                   to the normal interrupt handling function
+ *                                   (see normal_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  header_handler  ** This parameter is deprecated.  Specify NULL.**
+ *                             A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             header fifo in this group reaches its threshold.
+ *                             This function must be coded to take 2 parameters:
+ *                               void* A pointer to storage specific to this
+ *                                     handler.  This is the header_handler_parm
+ *                                     specified on this function call.
+ *                               int   The global fifo ID of the fifo that hit
+ *                                     its threshold (0 through
+ *                                     NUM_DMA_REC_FIFOS-1).
+ *                             If header_handler is NULL, threshold interrupts
+ *                             are not delivered for header fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  header_handler_parm   ** This parameter is deprecated.  Specify
+ *                                      NULL. **
+ *                                   A pointer to storage that should be passed
+ *                                   to the header interrupt handling function
+ *                                   (see header_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  interruptGroup  A InterruptGroup_t that identifies the
+ *                             group of interrupts that the fifos in this group
+ *                             will become part of.
+ *                             Ignored on subsequent call with the same group.
+ *
+ * \return  RecFifoGroupStruct  Pointer to a DMA Reception Fifo Group structure
+ *                              that reflects the fifos that are being used in
+ *                              this group.  This same structure is shared by
+ *                              all users of this reception fifo group.
+ *                              NULL is returned if an error occurs.
+ */
+DMA_RecFifoGroup_t *
+DMA_RecFifoGetFifoGroup(
+			int                       grp,
+			int                       target,
+			Kernel_CommThreadHandler  normal_handler,
+			void                     *normal_handler_parm,
+			Kernel_CommThreadHandler  header_handler,
+			void                     *header_handler_parm,
+			Kernel_InterruptGroup_t   interruptGroup
+		       );
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a reception fifo structure
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Increment DMA Reception Fifo Head
+ *
+ * Increment a DMA reception fifo's "head", given a reception fifo structure
+ *
+ * \param[in]  f_ptr  Pointer to the reception fifo structure
+ * \param[in]  incr   The number of quads (16 byte units) to increment the
+ *                    head pointer by.
+ *
+ * \return  None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ *       and the fifo free space is recalculated.
+ *
+ */
+__INLINE__ void DMA_RecFifoIncrementHead(
+					 DMA_RecFifo_t *f_ptr,
+					 unsigned int   incr
+					)
+{
+  SPI_assert( f_ptr != NULL );
+
+  {
+  void *va_head = DMA_FifoGetHeadNoFreeSpaceUpdate( &f_ptr->dma_fifo );
+
+  void *va_end  = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+
+  unsigned int incr_bytes = incr << 4;
+
+  unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_head;
+
+  /*
+   * Note:  The following check must be >= instead of just >.  We never want
+   *        the head to be equal to the end so we can always copy a quad
+   *        from the head, safely.
+   */
+  if ( incr_bytes >= bytes_to_end )
+    {
+      va_head = (char *)
+	          ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) +
+		    ( incr_bytes - bytes_to_end ) );
+    }
+  else
+    {
+      va_head = (char *)( (unsigned)va_head + incr_bytes );
+    }
+
+  /* Set the head and update the fifo's free space */
+  DMA_FifoSetHead( &f_ptr->dma_fifo,
+		   va_head );
+  }
+}
+
+
+/*!
+ * \brief Get the "Not Empty" Status of a Reception Fifo Group
+ *
+ * Get the "Not Empty" status of the reception fifos that are being used in the
+ * specified "not empty" word.
+ *
+ * \param[in]  fg_ptr     Pointer to the reception fifo group structure
+ * \param[in]  word       The word (0 or 1) of the "not empty" status to be
+ *                        returned.
+ *
+ * \retval  notEmptyStatus  A 32-bit value:
+ *                          If "word" is 0, bit i is 1 if normal rec fifo i is
+ *                          in use and is not empty.
+ *                          If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ *                          rec fifos for groups 1, 2, 3, 4 respectively are in
+ *                          use and not empty.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetNotEmpty(
+					   DMA_RecFifoGroup_t *fg_ptr,
+					   int                 word
+					  )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (word == 0) || (word == 1) );
+
+   /*   printf("RecFifoGetNotEmpty: group=%d, status addr=0x%08x, not_empty=0x%08x, mask=0x%08x, RecFifoHwAddr=0x%08x, RecFifo4PaTail=0x%08x, PaHead=0x%08x\n", */
+   /* 	 fg_ptr->group_id, (unsigned)(&(fg_ptr->status_ptr->not_empty[word])), */
+   /* 	 fg_ptr->status_ptr->not_empty[word], fg_ptr->mask, */
+   /* 	 (unsigned)(fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr), */
+   /*  	 fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_tail, */
+   /* 	 fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_head); */
+
+  return ( fg_ptr->status_ptr->not_empty[word] & fg_ptr->mask );
+
+}
+
+
+/*!
+ * \brief Get the "Available" Status of a Reception Fifo Group
+ *
+ * Get the "available" status of the reception fifos that are being used in the
+ * specified "available" word.
+ *
+ * \param[in]  fg_ptr     Pointer to the reception fifo group structure
+ * \param[in]  word       The word (0 or 1) of the "available" status to be
+ *                        returned.
+ *
+ * \retval  availableStatus  A 32-bit value:
+ *                           If "word" is 0, bit i is 1 if normal rec fifo i is
+ *                           in use and is available.
+ *                           If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ *                           rec fifos for groups 1, 2, 3, 4 respectively are in
+ *                           use and available.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetAvailable(
+					    DMA_RecFifoGroup_t *fg_ptr,
+					    int                 word
+					   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (word == 0) || (word == 1) );
+
+  return ( fg_ptr->status_ptr->available[word] & fg_ptr->mask );
+}
+
+
+/*!
+ * \brief Get the "Threshold Crossed" Status of a Reception Fifo Group
+ *
+ * Get the "threshold crossed" status of the reception fifos that are being
+ * used in the specified "threshold crossed" word.
+ *
+ * \param[in]  fg_ptr     Pointer to the reception fifo group structure
+ * \param[in]  word       The word (0 or 1) of the "threshold crossed" status
+ *                        to be returned.
+ *
+ * \retval  thresholdCrossedStatus  A 32-bit value:
+ *                           If "word" is 0, bit i is 1 if normal rec fifo i is
+ *                           in use and its threshold has been crossed.
+ *                           If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ *                           rec fifos for groups 1, 2, 3, 4 respectively are in
+ *                           use and their threshold has been crossed.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetThresholdCrossed(
+					    DMA_RecFifoGroup_t *fg_ptr,
+					    int                 word
+					   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (word == 0) || (word == 1) );
+
+  return ( fg_ptr->status_ptr->threshold_crossed[word] & fg_ptr->mask );
+}
+
+
+/*!
+ * \brief Set the "Clear Threshold Crossed" Status of Specified Fifos in a
+ *        Reception Fifo Group
+ *
+ * Set the "clear threshold crossed" status of the specified reception fifos
+ * in the specified "clear threshold crossed" word.
+ *
+ * \param[in]  fg_ptr     Pointer to the reception fifo group structure
+ * \param[in]  clr        32-bit value, specifying which fifos are to have
+ *                        their "clear threshold crossed" status set.
+ *                        If "word" is 0, bit i is 1 if normal rec fifo i is
+ *                        to have its "clear threshold crossed" status set.
+ *                        If "word" is 1, one of bits 7, 15, 23, 31 is 1 if
+ *                        header fifo for group 1, 2, 3, 4 respectively is to
+ *                        have its "clear threshold crossed" status set.
+ *                        Fifos that are not in the group will not have their
+ *                        status set.
+ * \param[in]  word       The word (0 or 1) of the "clear threshold crossed"
+ *                        status to be set.
+ *
+ * \return  None
+ *
+ * \note This function does an MBAR after setting the status to ensure the
+ *       writes have been accepted by the memory system before allowing other
+ *       memory accesses to to occur.
+*/
+__INLINE__ void DMA_RecFifoSetClearThresholdCrossed(
+					    DMA_RecFifoGroup_t *fg_ptr,
+					    unsigned int        clr,
+					    int                 word
+					   )
+{
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( fg_ptr->status_ptr != NULL );
+  SPI_assert( (word == 0) || (word == 1) );
+
+  fg_ptr->status_ptr->clear_threshold_crossed[word] = clr & fg_ptr->mask;
+
+  _bgp_mbar();
+
+}
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a fifo group and a fifo ID
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval  0            Successful.
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h.
+ *                       _bgp_err_dma_rfifo_map_twice means this fifo has
+ *                       already been initialized
+ *
+ */
+__INLINE__ int DMA_RecFifoInitById(
+				   DMA_RecFifoGroup_t *fg_ptr,
+				   int                 fifo_id,
+				   void               *va_start,
+				   void               *va_head,
+				   void               *va_end
+				  )
+{
+  int rc;
+
+  SPI_assert( fg_ptr != NULL );
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( ( (uint32_t) va_end - (uint32_t)va_start ) >=
+	                                       DMA_MIN_REC_FIFO_SIZE_IN_BYTES );
+
+  /*
+   * Initialize the fifo by invoking a system call.
+   */
+
+  rc = Kernel_RecFifoInitById(
+			      (uint32_t*)fg_ptr,
+			      fifo_id,
+			      va_start,
+			      va_head,
+			      va_end);
+
+  return rc;
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Start Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA reception fifo's start pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_start  The virtual address of the start of the fifo
+ *
+ */
+__INLINE__ void * DMA_RecFifoGetStartById(
+					  DMA_RecFifoGroup_t *fg_ptr,
+					  int                 fifo_id
+					 )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Get a DMA reception fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_head  The virtual address of the head of the fifo
+ *
+ */
+__INLINE__ void * DMA_RecFifoGetHeadById(
+					 DMA_RecFifoGroup_t *fg_ptr,
+					 int                 fifo_id
+					)
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Tail Pointer Using a Fifo Id
+ *
+ * Get a DMA reception fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_tail  The virtual address of the tail of the fifo
+ *
+ */
+__INLINE__ void *DMA_RecFifoGetTailById(
+					DMA_RecFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return   DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo End Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA reception fifo's end pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  va_end  The virtual address of the end of the fifo
+ *
+ */
+__INLINE__ void *DMA_RecFifoGetEndById(
+				       DMA_RecFifoGroup_t *fg_ptr,
+				       int                 fifo_id
+				      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return   DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Size Using a Fifo Id
+ *
+ * Get a DMA reception fifo's size, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  size   The size of the DMA fifo, in units of 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_RecFifoGetSizeById(
+					       DMA_RecFifoGroup_t *fg_ptr,
+					       int                 fifo_id
+					      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return   DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Free Space Using a Fifo Id
+ *
+ * Get a DMA reception fifo's free space, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  read_head  Indicates whether to read the head from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware head
+ *                          - 0 means to use the current head shadow
+ * \param[in]  read_tail  Indicates whether to read the tail from the hardware
+ *                        fifo before calculating the free space.
+ *                          - 1 means to read the hardware tail
+ *                          - 0 means to use the current tail shadow
+ *
+ * \retval  freeSpace  The amount of free space in the fifo, in units of
+ *                     16B quads.
+ *
+ */
+__INLINE__ unsigned int  DMA_RecFifoGetFreeSpaceById(
+					 DMA_RecFifoGroup_t *fg_ptr,
+					 int                 fifo_id,
+					 unsigned int        read_head,
+					 unsigned int        read_tail
+					)
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo,
+			       read_head,
+			       read_tail );
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Set a DMA reception fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  va_head  The virtual address of the head of the fifo.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetHeadById(
+				       DMA_RecFifoGroup_t *fg_ptr,
+				       int                 fifo_id,
+				       void               *va_head
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  DMA_FifoSetHead( &fg_ptr->fifos[fifo_id].dma_fifo,
+		   va_head);
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Tail Pointer Using a Fifo Id
+ *
+ * Set a DMA reception fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  va_tail  The virtual address of the tail of the fifo.
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetTailById(
+				       DMA_RecFifoGroup_t *fg_ptr,
+				       int                 fifo_id,
+				       void               *va_tail
+				      )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo,
+		   va_tail);
+}
+
+
+/*!
+ * \brief Increment DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Increment a DMA reception fifo's head pointer, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  incr     The number of quads (16 byte units) to increment the
+ *                      head pointer by.
+ *
+ * \return  None
+ *
+*/
+__INLINE__ void DMA_RecFifoIncrementHeadById(
+					     DMA_RecFifoGroup_t *fg_ptr,
+					     int                 fifo_id,
+					     unsigned int        incr
+					    )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  DMA_RecFifoIncrementHead( &fg_ptr->fifos[fifo_id],
+			    incr);
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Not Empty Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's not empty status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         The specified fifo is empty.
+ *          non-zero  The specified fifo is not empty.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetNotEmptyById(
+					DMA_RecFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  {
+  int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+  unsigned status;
+  status = DMA_RecFifoGetNotEmpty(fg_ptr,
+				  word);
+  if ( word == 0 )
+    {
+      /* If normal fifo, mask with the correct bit number. */
+      status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+    }
+  /* For header fifo, don't need additional mask because the status word was
+   * already masked by the 8 bits for this group, leaving only the 1 bit for
+   * the header fifo.
+   */
+
+  return status;
+  }
+
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Available Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's available status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         The specified fifo is not available.
+ *          non-zero  The specified fifo is available.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetAvailableById(
+					DMA_RecFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  {
+  int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+  unsigned status;
+  status = DMA_RecFifoGetAvailable(fg_ptr,
+				   word);
+  if ( word == 0 )
+    {
+      /* If normal fifo, mask with the correct bit number. */
+      status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+    }
+  /* For header fifo, don't need additional mask because the status word was
+   * already masked by the 8 bits for this group, leaving only the 1 bit for
+   * the header fifo.
+   */
+
+  return status;
+  }
+
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         The specified fifo has not had its threshold crossed
+ *          non-zero  The specified fifo has had its threshold crossed
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetThresholdCrossedById(
+					DMA_RecFifoGroup_t *fg_ptr,
+					int                 fifo_id
+				       )
+{
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  {
+  int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+  unsigned status;
+  status = DMA_RecFifoGetThresholdCrossed(fg_ptr,
+					  word);
+  if ( word == 0 )
+    {
+      /* If normal fifo, mask with the correct bit number. */
+      status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+    }
+  /* For header fifo, don't need additional mask because the status word was
+   * already masked by the 8 bits for this group, leaving only the 1 bit for
+   * the header fifo.
+   */
+
+  return status;
+  }
+
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Clear Threshold Crossed Status Using a Fifo Id
+ *
+ * Set a DMA reception fifo's "clear threshold crossed" status, given a fifo
+ * group and fifo id.
+ *
+ * \param[in]  fg_ptr   Pointer to the fifo group structure
+ * \param[in]  fifo_id  Id of the fifo within the group
+ *                      (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \return  None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetClearThresholdCrossedById(
+					   DMA_RecFifoGroup_t *fg_ptr,
+					   int                 fifo_id
+					  )
+{
+  unsigned int clr;
+  int          word;
+
+  SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+  SPI_assert( fg_ptr != NULL );
+
+  word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+  if ( word == 0 )
+    {
+      /* If normal fifo, mask with the correct bit number so we only specify the
+       * bit corresponding to this normal fifo.
+       * Note:  The fg_ptr->mask shouldn't be necessary, but it is a bit safer.
+       */
+      clr = ( _BN(fg_ptr->fifos[fifo_id].global_fifo_id) & fg_ptr->mask);
+    }
+  else
+    {
+      /* If header fifo, it is ok to just clear all of the mask bits for this
+       * group, since only 1 bit is used inside the DMA.
+       */
+      clr = fg_ptr->mask;
+    }
+
+  DMA_RecFifoSetClearThresholdCrossed(fg_ptr,
+				      clr,
+				      word); /* Write to the DMA SRAM */
+}
+
+
+/*!
+ * \brief Register a Reception Fifo Receive Function
+ *
+ * Register a specified receive function to handle packets having a specific
+ * "registration ID".  It returns a registration ID (0-255) that is to be used
+ * in the packet header Func_Id field, such that packets that arrive in a
+ * reception fifo will result in the corresponding receive function being called
+ * when that fifo is processed by a polling or interrupt handler function.
+ *
+ * \param[in]  recv_func          Pointer to the receive function.
+ * \param[in]  recv_func_parm     Arbitrary pointer to be passed to the
+ *                                recv_func when it is called.
+ * \param[in]  is_error_function  1 means this is the receiver function
+ *                                to be called if a packet contains an invalid
+ *                                (unregistered) registration ID.  The return
+ *                                value from this function is zero, indicating
+ *                                success, not indicating a registration ID.
+ *                                A default function is provided if one is not
+ *                                registered.  If there is already a non-default
+ *                                error receive function registered, -EBUSY is
+ *                                returned.
+ *                                0 means this is not the error receiver
+ *                                function.
+ * \param[in]  is_header_fifo     Indicates whether the fifo is normal or
+ *                                header:
+ *                                - 0 is normal.  The return code is the
+ *                                  registration ID.
+ *                                - 1 is header.  The return code is 0,
+ *                                  indicating success, because packets in
+ *                                  header fifos are direct-put packets, and
+ *                                  hence have no registration ID.
+ *                                If there is already a header receive function
+ *                                registered, -EBUSY is returned.
+ *
+ * If both is_error_function and is_header_fifo are 1, -EINVAL is returned.
+ *
+ * \retval   0            This is a registration ID if is_error_function=0 and
+ *                        is_header_fifo=0.  Otherwise, it indicates success.
+ *           1-255        This is a registration ID.  Successful.
+ *           negative     Failure.  This is a negative errno value.
+ */
+int DMA_RecFifoRegisterRecvFunction(
+			        DMA_RecFifoRecvFunction_t  recv_func,
+				void                      *recv_func_parm,
+				int                        is_error_function,
+				int                        is_header_fifo
+			       );
+
+
+/*!
+ * \brief De-Register a Reception Fifo Receive Function
+ *
+ * De-register a previously registered receive function.
+ *
+ * \param[in]  registrationId     Registration Id returned from
+ *                                DMA_RecFifoRegisterRecvFunction (0..255).
+ *                                A negative value means that no
+ *                                registration id is specified.
+ * \param[in]  is_error_function  1 means the error receive function is
+ *                                to be de-registered.
+ *                                0 otherwise.
+ * \param[in]  is_header_fifo     1 means the header fifo receive function is
+ *                                to be de-registered.
+ *                                0 otherwise.
+ *
+ * \retval   0            Success
+ *           negative     Error value
+ *
+ * \see DMA_RecFifoRegisterRecvFunction
+ */
+int DMA_RecFifoDeRegisterRecvFunction(
+				      int registrationId,
+				      int is_error_function,
+				      int is_header_fifo
+				     );
+
+
+/*!
+ * \brief Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1.  Total_packets packets are received
+ * 2.  All the fifos are empty
+ * 3.  A receive function returns a non-zero value
+ * 4.  The last packet removed from a fifo has an invalid registration id.  The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group, beginning with array index starting_index. If a fifo has a packet,
+ * the appropriate receive function is called.  Upon return, the packet is
+ * removed from the fifo (the fifo head is moved past the packet).
+ *
+ * After processing packets_per_fifo packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed.  When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  total_packets     The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  packets_per_fifo  The maximum number of packets that will be
+ *                               processed in a given fifo before switching
+ *                               to the next fifo.
+ * \param[in]  starting_index    The fifos in the fifo group are maintained
+ *                               in an array.  This is the array index of the
+ *                               first fifo to be processed (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  num_empty_passes  The number of passes over the normal fifos
+ *                               while they are empty that this function
+ *                               should tolerate before giving up and
+ *                               returning.  This is an optimization
+ *                               to catch late arriving packets.
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ * \param[out] next_fifo_index   Pointer to an int where the recommended
+ *                               starting_index for the next call is returned.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ *                                next_fifo_index is set.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.  next_fifo_index is
+ *                                set.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ * \todo By setting fg_ptr->interrupt_lock? or by calling
+ *       the system call to disable a class of interrupts?
+ *
+ * \note next_fifo_index is set to the index of the fifo that had the last
+ *       packet received if all packets_per_fifo packets were not received from
+ *       that fifo.  However, if all packets_per_fifo packets were received
+ *       from that fifo, the index of the next fifo will be returned.
+ *
+ */
+int DMA_RecFifoPollNormalFifos(int                 total_packets,
+			       int                 packets_per_fifo,
+			       int                 starting_index,
+			       int                 num_empty_passes,
+			       int                 not_empty_poll_delay,
+			       DMA_RecFifoGroup_t *fg_ptr,
+			       int                *next_fifo_index
+			      );
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1.  All packets in all of the fifos have been received.
+ * 2.  A receive function returns a non-zero value.
+ * 3.  The last packet removed from a fifo has an invalid registration id.  The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ * 4.  There have been fruitfulPollLimit polls attempted (summed across all
+ *     fifos).
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group.  If a fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed.  When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur until all
+ * fifos are empty or fruitfulPollLimit polls have been completed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed.  This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  fg_ptr             Pointer to the fifo group.
+ * \param[in]  fruitfulPollLimit  The limit on the number of fruitful polls that
+ *                                will be attempted (summed across all fifos).
+ *                                If the limit is reached, this function
+ *                                returns.  A value of zero means there is no
+ *                                limit imposed.  A fruitful poll is one where
+ *                                at least one packet has arrived in the fifo
+ *                                since the last poll.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr,
+				      int                 fruitfulPollLimit);
+
+/*!
+ * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  num_packets packets are received
+ * 2.  The specified fifo is empty
+ * 3.  A receive function returns a non-zero value
+ * 4.  The last packet removed from the fifo has an invalid registration id. The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  num_packets       The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  fifo_id           The ID of the fifo to be polled.
+ *                               (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  num_empty_passes  The number of passes over the fifo
+ *                               while it is empty that this function
+ *                               should tolerate before giving up and
+ *                               returning.  This is an optimization
+ *                               to catch late arriving packets.
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ *
+ * \param[in]  empty_callback    Function to call when spinning because the FIFO looks empty.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ * \todo By setting fg_ptr->interrupt_lock? or by calling
+ *       the system call to disable a class of interrupts?
+ *
+ */
+int DMA_RecFifoPollNormalFifoById( int                 num_packets,
+				   int                 fifo_id,
+				   int                 num_empty_passes,
+				   int                 not_empty_poll_delay,
+				   DMA_RecFifoGroup_t *fg_ptr,
+				   void 		(*empty_callback)(void)
+				 );
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  All packets in the fifo have been received.
+ * 2.  The specified fifo is empty.
+ * 3.  A receive function returns a non-zero value.
+ * 4.  The last packet removed from the fifo has an invalid registration id. The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ * 5.  There have been fruitfulPollLimit polls attempted.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in the fifo (emptying that fifo),
+ * or the fruitfulPollLimit has been reached, the function returns the number
+ * of packets processed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed.  This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always pass a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function has
+ * to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  fifo_id           The ID of the fifo to be polled.
+ *                               (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ * \param[in]  fruitfulPollLimit  The limit on the number of fruitful polls that
+ *                                will be attempted.
+ *                                If the limit is reached, this function
+ *                                returns.  A value of zero means there is no
+ *                                limit imposed.  A fruitful poll is one where
+ *                                at least one packet has arrived in the fifo
+ *                                since the last poll.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifoById( int                 fifo_id,
+					 DMA_RecFifoGroup_t *fg_ptr,
+					 int                 fruitfulPollLimit
+				       );
+
+
+
+/*!
+ * \brief Poll Header Reception Fifo Given a Fifo Group
+ *
+ * Poll the "header" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  Total_packets packets are received
+ * 2.  The specified fifo is empty
+ * 3.  A receive function returns a non-zero value
+ *
+ * If the header fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed.
+ *
+ * The receive function must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header. The packet header is always
+ * 16 bytes of contiguous storage, in the fifo.  When the
+ * receive function returns, user code cannot assume that the buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header is 16-byte aligned
+ * for optimized copying.
+ *
+ * \param[in]  num_packets       The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  num_empty_passes  The number of passes over the fifo
+ *                               while it is empty that this function
+ *                               should tolerate before giving up and
+ *                               returning.  This is an optimization
+ *                               to catch late arriving packets.
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoPollHeaderFifo( int                 num_packets,
+			       int                 num_empty_passes,
+			       int                 not_empty_poll_delay,
+			       DMA_RecFifoGroup_t *fg_ptr
+			     );
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/bpcore_interface.h b/arch/powerpc/include/spi/bpcore_interface.h
new file mode 100644
index 0000000..ce8fecb
--- /dev/null
+++ b/arch/powerpc/include/spi/bpcore_interface.h
@@ -0,0 +1,42 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file spi/bpcore_interface.h
+ */
+#ifndef _BGP_BPCORE_INT_H_  /*  Prevent multiple inclusion */
+#define _BGP_BPCORE_INT_H_
+
+#define _BGP_UA_SCRATCH      (0x4)           /*  eDRAM Scratch: 0 to 8MB */
+#define _BGP_PA_SCRATCH      (0x00000000)
+#define _BGP_PS_SCRATCH      (8 * 1024 * 1024)
+#define _BGP_PM_SCRATCH      (0x007FFFFF)
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31                       */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G3_HIER_POS   3
+#define _BGP_IC_DMA_NFT_G3_UNIT_NUM   3
+#define _BGP_IC_DMA_NFT_G3_UNIT_POS   0
+#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE  32
+#define _BGP_IC_DMA_NFT_G3_UNIT_MASK  0xffffffff
+
+
+#endif  /*  Add nothing below this line */
+
diff --git a/arch/powerpc/include/spi/kernel_interface.h b/arch/powerpc/include/spi/kernel_interface.h
new file mode 100644
index 0000000..59bfbdf
--- /dev/null
+++ b/arch/powerpc/include/spi/kernel_interface.h
@@ -0,0 +1,1982 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file spi/kernel_interface.h
+ */
+
+#ifndef _BGP_VIRT2PHYS_H_  /*  Prevent multiple inclusion */
+#define _BGP_VIRT2PHYS_H_
+
+
+#if defined(__LINUX__) || defined(__LINUX_KERNEL__)
+
+#include <spi/linux_interface.h>
+
+/*  #warning Using LINUX kernel interface for SPI */
+
+#else
+
+#warning Using CNK kernel interface for SPI
+#error Should not be using CNK interface, this is in the Linux kernel tree
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#include <common/linkage.h>
+#include <bpcore/bgp_types.h>
+#include <bpcore/ppc450_core.h>
+#include <bpcore/ppc450_inlines.h>
+#include <spi/bpcore_interface.h>
+#include <spi/bgp_kernel_inlines.h>
+#include <common/bgp_ras.h>
+#include <cnk/VirtualMap.h>
+#include <cnk/vmm.h>
+#include <cnk/bgp_SPRG_Usage.h>
+#include <cnk/bgp_SysCall_Extensions.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+
+
+#if ((!defined(__CNK__)) && (!defined(__BL__)))
+#include <pthread.h>
+#endif
+
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+/*!
+ * \brief Communication Thread interrupt handler function prototype
+ *
+ * \param[in] arg1 1st argument to commthread
+ * \param[in] arg2 2nd argument to commthread
+ * \param[in] arg3 3rd argument to commthread
+ */
+typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4);
+
+/*!
+ * \brief Interrupt Group Prototype
+ *
+ * This data type is used to group interrupts of various devices together
+ * so they can be enabled or disabled simultaneously.  A given interrupt user
+ * (eg. messaging, QCD, etc) specifies a value of this data type when its
+ * interrupt resources are allocated.  The kernel associates those resources
+ * with the specified value so when this value is specified on the enable or
+ * disable interupts system call, all of the interrupts in the group are
+ * operated upon.  Examples of devices that can be grouped in this way include
+ * DMA fifos, torus, tree, etc.
+ *
+ * \todo The kernel should provide interfaces to allocate a
+ *       Kernel_InterruptGroup_t and deallocate it.
+ */
+typedef void * Kernel_InterruptGroup_t;
+
+
+
+
+
+/*! \brief Returns the number of Processes (Virtual Nodes) running on this Physical Node.
+ *
+ * \return Process Count
+ * \retval 1 Running in Single Process "SMP Mode"
+ * \retval 2 Running in "2 Virtual Node Mode"
+ * \retval 3 Running in "3 Virtual Node Mode"
+ * \retval 4 Running in "4 Virtual Node Mode"
+ */
+/* __INLINE__ int Kernel_ProcessCount( void )
+{
+  uint32_t shm  = _bgp_mfspr( _BGP_SPRGRO_SHMem  );
+
+  return( (shm & 0x3) + 1 );
+}
+*/
+/*! \brief Returns the number of Processors (cores) running in this Process (Virtual Node)
+ *
+ * \return Processor Count
+ * \retval 1 Single Processor in this Process (usually 4-VN Mode).
+ * \retval 2 Two Processors in this Process (usually 2-VN Mode).
+ * \retval 3 Three Processors in this Process.
+ * \retval 4 Four Processors in this Process (usually SMP Mode).
+ */
+/* __INLINE__ int Kernel_ProcessorCount( void )
+{
+  uint32_t shm  = _bgp_mfspr( _BGP_SPRGRO_SHMem  );
+
+  return( ((shm & 0xC) >> 2) + 1 );
+}
+*/
+__INLINE__ int Kernel_GetAppSegmentCount(uint32_t* count)
+{
+   _BGP_SprgShMem shm;
+
+   shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+   if(shm.IsStaticMap)
+   {
+      if(Kernel_ProcessCount() == 1)
+      {
+	 *count = 3;  /* text/rodata, data, heap */
+      }
+      else
+      {
+	 *count = 4;  /* text/rodata, data, heap, shared (in dual/vn) */
+      }
+   }
+   else
+   {
+      if(Kernel_ProcessCount() == 1)
+      {
+	 *count = 2;  /* text/rodata, data/heap */
+      }
+      else
+      {
+	 *count = 3;  /* text/rodata, data/heap, shared (in dual/vn) */
+      }
+   }
+   return 0;
+}
+
+__INLINE__ int Kernel_GetAppSegmentMapping(uint32_t segmentID, uint32_t coreID, uint32_t* va, uint64_t* pa, uint32_t* length)
+{
+   int rc = 0;
+   _BGP_SprgShMem shm;
+   shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+   if((!shm.IsStaticMap)&&(segmentID > 1))
+      segmentID++;
+
+   asm __volatile__ ("li 0,%1;"
+		     "mr 3,%2;"
+		     "mr 4,%3;"
+		     "mr 5,%4;"
+		     "mr 6,%5;"
+		     "mr 7,%6;"
+		     "sc;"
+		     "mr %0, 3;"
+		     : "=&r" (rc)   /*  early clobber */
+		     : "i" (_BGP_SYSCALL_NR_GETAPPSEGMENTMAPPING),
+		     "r" (segmentID),
+		     "r" (coreID),
+		     "r" (va),
+		     "r" (pa),
+		     "r" (length)
+		     : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+   return rc;
+}
+
+extern int KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized;
+extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segcnt;
+
+#define KERNEL_V2P_MAXSEGMENTS 5
+extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segva[KERNEL_V2P_MAXSEGMENTS];
+extern uint64_t KERNEL_VIRTUAL2PHYSICAL_segpa[KERNEL_V2P_MAXSEGMENTS];
+extern size_t   KERNEL_VIRTUAL2PHYSICAL_segsz[KERNEL_V2P_MAXSEGMENTS];
+#undef KERNEL_V2P_MAXSEGMENTS
+
+/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts.
+ *
+ * \param[in] pVA   32bit virtual address in the calling process
+ * \param[in] vsize size in bytes of the virtual range
+ * \param[out] ua_out upper 4 physical address bits
+ * \param[out] pa_out lower 32 physical address bits
+ * \return Error condition for translation
+ * \retval  0 Successful translation, with ua_out and pa_out filled in
+ * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified.
+ * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous.
+ * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled.
+ * \retval -4 invalid parameter
+ *
+ *  \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation
+ *  \warning CNK "pagesize" is 1MB.
+ *  \warning Text and Data are virtually contiguous, but not necessarily physically contiguous.
+ *  \todo Does not (currently) support > 4GB DDR space.
+ *  \todo Does not (currently) support Shared Memory Area.
+ */
+__INLINE__ int Kernel_Virtual2Physical( void     *pVA,      /*  input: 32bit Virtual start address */
+			    size_t   vsize,     /*  input: size in bytes of virtual range */
+			    uint32_t *ua_out,   /*  output: upper  4 Physical Address bits */
+			    uint32_t *pa_out )  /*  output: lower 32 Physical Address bits */
+{
+    _BGP_SprgShMem shmem;
+
+    shmem.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+    if(shmem.IsStaticMap)
+    {
+	uint32_t x;
+
+	if(KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized == 0)
+	{
+	    Kernel_GetAppSegmentCount(&KERNEL_VIRTUAL2PHYSICAL_segcnt);
+	    for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++)
+	    {
+		if(Kernel_GetAppSegmentMapping(x, Kernel_PhysicalProcessorID(), &KERNEL_VIRTUAL2PHYSICAL_segva[x], &KERNEL_VIRTUAL2PHYSICAL_segpa[x], &KERNEL_VIRTUAL2PHYSICAL_segsz[x]))
+		    return -1;
+	    }
+	    KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized = 1;
+	}
+	for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++)
+	{
+	    if(((uint32_t)pVA >= KERNEL_VIRTUAL2PHYSICAL_segva[x]) && (KERNEL_VIRTUAL2PHYSICAL_segsz[x] > (uint32_t)pVA - KERNEL_VIRTUAL2PHYSICAL_segva[x] + vsize) && ((uint32_t)pVA + vsize > (uint32_t)pVA))
+	    {
+		*ua_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x])) >> 32);
+		*pa_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x]))&0xffffffff);
+		return 0;
+	    }
+	}
+	return -1;
+    }
+
+    uint32_t vaddr = (uint32_t)pVA;
+    uint32_t texti = _bgp_mfspr( _BGP_SPRGRO_TextI );
+    uint32_t datai = _bgp_mfspr( _BGP_SPRGRO_DataI );
+    uint32_t dst2  = _bgp_mfspr( _BGP_SPRGRO_DST2  );
+    uint32_t shm   = (_bgp_mfspr( _BGP_SPRGRO_SHMem ) & 0xFFFFFFC0);
+    uint32_t text_v_start = (texti & 0xFFF00000);
+    uint32_t data_v_start = (datai & 0xFFF00000);  /*  text_v_limit is (data_v_start - 1) */
+    uint32_t text_ua      = ((texti & 0x000000C0) >> 6);
+    uint32_t text_p_start = ((texti & 0x000FFF00) << 12);
+    uint32_t data_ua      = ((datai & 0x000000C0) >> 6);
+    uint32_t data_p_start = ((datai & 0x000FFF00) << 12);
+    uint32_t data_v_size  = (dst2  & 0xFFF00000);
+    uint32_t data_v_limit = (data_v_start + data_v_size + _BGP_VMM_PAGE_MASK);
+    uint32_t vend    = (vaddr + vsize - 1);
+    uint32_t vpage   = (vaddr & ~_BGP_VMM_PAGE_MASK);   /*  which 1MB page? */
+    uint32_t voffset = (vaddr & _BGP_VMM_PAGE_MASK);    /*  offset within 1MB page */
+
+     /*  printf("V2P: texti=0x%08x, datai=0x%08x, dst2=0x%08x\n", texti, datai, dst2 ); */
+     /*  printf("V2P: vaddr=0x%08x, vend=0x%08x, text_v_start=0x%08x, data_v_limit=0x%08x\n", */
+     /*          vaddr, vend, text_v_start, data_v_limit ); */
+
+     /*  parm check */
+    if ( !vsize || !ua_out || !pa_out )
+	return(-4);
+
+     /*  range check: below text or off end of data, or in eDRAM Scratch */
+    if ( (vaddr < text_v_start) || (vend > data_v_limit) )
+    {
+	 /*  Scratch? */
+	if ( vaddr >= _BGP_VA_SCRATCH )
+	{
+	    uint32_t scratchMB   = ((dst2 & 0x00000078) << (20-3));
+	    uint32_t scratch_end = (_BGP_VA_SCRATCH + scratchMB);
+
+	    if ( !scratchMB || (vend > scratch_end) )
+		return(-3);
+
+	    *ua_out = (uint32_t)_BGP_UA_SCRATCH;
+	    *pa_out = (vaddr & _BGP_VM_SCRATCH);
+	    return(0);
+	}
+	else if ( shm )  /*  Shared Memory? If any, always mapped V=R. */
+	{
+	    uint32_t shm_v_start = (shm & 0xFFF00000);
+	    uint32_t shm_v_end   = (shm_v_start + ((shm & 0x000FFF00) << 12));
+	    uint32_t shm_ua      = ((shm & 0x000000C0) >> 6);
+
+	    if ( (vaddr >= shm_v_start) && (vend <= shm_v_end) )
+            {
+		*ua_out = shm_ua;
+		*pa_out = vaddr;
+		return(0);
+            }
+	}
+
+	return(-1);
+    }
+
+     /*  Text? (includes Read-Only Data) */
+    if ( vaddr < data_v_start )
+    {
+	 /*  if range starts in Text but ends in Data, then discontiguous */
+	if ( vend >= data_v_start )
+	    return(-2);
+
+	*ua_out = text_ua;
+	*pa_out = (text_p_start + (vpage - text_v_start) + voffset);
+
+	return(0);
+    }
+
+     /*  Data */
+    *ua_out = data_ua;
+    *pa_out = (data_p_start + (vpage - data_v_start) + voffset);
+
+    return(0);
+}
+
+
+/*! \brief Returns a copy of the node's personality
+ *
+ * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality
+ * \param[in]  size Size, in bytes, that was allocated to hold the personality structure
+ * \return Error indication
+ * \retval  0 Success
+ * \retval -1 Invalid parameters
+ */
+__INLINE__ int Kernel_GetPersonality(_BGP_Personality_t* personality, size_t size)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+  asm __volatile__ ("li 0,%3;"
+		    "mr 3,%1;"
+		    "mr 4,%2;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "r" (personality),
+		      "r" (size),
+		      "i" (_BGP_SYSCALL_NR_GET_PERSONALITY)
+                    : "r0", "r3", "r4", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Starts to checkpoint/restore the Kernel data structures for CNK
+ *
+ * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality
+ * \param[in]  size Size, in bytes, that was allocated to hold the personality structure
+ * \param[in]  int operation, The type of operation that the kernel needs to provide (e.g. CHECKPOINT_START, CHECKPOINT_RESTART,CHECKPOINT_COMPLETE)
+ * \return Error indication
+ * \retval  0 Success
+ * \retval -1 Invalid parameters
+ */
+__INLINE__ int Kernel_checkpoint(int component, int operation, void *buffer, uint32_t size, uint32_t *actualSize, uint32_t*basePtr)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                    "mr 6,%5;"
+                    "mr 7,%6;"
+                    "mr 8,%7;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_CHECKPOINT),
+                    "r" (component),
+                    "r" (operation),
+                    "r" (buffer),
+                    "r" (size),
+                    "r" (actualSize),
+                    "r" (basePtr)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Returns the contents of the running PPC450's processor version register.
+ * \return Contents of PPC450 PVR register
+ */
+__INLINE__ int Kernel_GetProcessorVersion()
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                     "sc;"
+		    "mr %0, 3;"
+
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GET_PERSONALITY)
+                    : "r0", "r3", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief LockBox allocate syscall definition
+ * \param[in] lockid Indicates which counter ID is to be obtained.  Counter IDs vary from 0-1023
+ * \param[in] numlocks The number of sequencial counter IDs that will be obtained
+ * \param[out] ptr An array of pointers that will be filled in with the counter virtual addresses.
+ * \param[in] flags Optional flags
+ * \warning Must storage indicated by ptr must be large enough to whole numlocks*sizeof(uint32_t) bytes
+ * \internal This is an internal syscall - do not use.
+ * \see LockBox_AllocateCounter
+ * \see LockBox_AllocateMutex
+ * \see LockBox_AllocateBarrier
+ */
+/* __INLINE__ int Kernel_AllocateLockBox(uint32_t lockid, uint32_t numlocks, uint32_t** ptr, uint32_t flags) */
+/* { */
+/*   int rc = 0; // this syscall returns RC in r3 and does not use errno */
+/*  */
+/*   asm __volatile__ ("li 0,%1;" */
+/* 		    "mr 3,%2;" */
+/* 		    "mr 4,%3;" */
+/* 		    "mr 5,%4;" */
+/* 		    "mr 6,%5;" */
+/*  /                    "sc;" */
+/* 		    "mr %0, 3;" */
+/*                     : "=&r" (rc)  // early clobber */
+/*                     : "i" (_BGP_SYSCALL_NR_ALLOC_COUNTER), */
+/* 		    "r" (lockid), */
+/* 		    "r" (numlocks), */
+/* 		    "r" (ptr), */
+/* 		    "r" (flags) */
+/*  /                   : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); */
+/*  */
+/*   return( rc ); */
+/* } */
+
+/*! \brief Converts a Rank into a XYZT Coordinate
+ *
+ * \param[in] rank Rank for the node
+ * \param[out] xcoord X Coordinate for the specified node
+ * \param[out] ycoord Y Coordinate for the specified node
+ * \param[out] zcoord Z Coordinate for the specified node
+ * \param[out] tcoord T Coordinate for the specified node
+ * \return Error status
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+__INLINE__ int Kernel_Rank2Coord(uint32_t rank, uint32_t* xcoord, uint32_t* ycoord, uint32_t* zcoord, uint32_t* tcoord)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                    "mr 6,%5;"
+		    "mr 7,%6;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RANK2COORD),
+                    "r" (rank),
+                    "r" (xcoord),
+                    "r" (ycoord),
+                    "r" (zcoord),
+		    "r" (tcoord)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Converts all ranks into a XYZT Coordinate
+ *
+ * \param[out] XYZT coordinates of all nodes.  The array is in
+ *    rank order.  If a rank is not mapped, its coordinates will be
+ *    (255,255,255,255).
+ * \param[in] len specifies the length of the coordinates array.  That is,
+ *    the caller is guaranteeing that there is storage for coordinates[0],
+ *    coordinates[1], ..., coordinates[len-1].
+ * \return Error status
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+
+typedef struct _Kernel_Coordinates {
+    unsigned char x;
+    unsigned char y;
+    unsigned char z;
+    unsigned char t;
+} kernel_coords_t;
+
+__INLINE__ int Kernel_Ranks2Coords(kernel_coords_t* coordinates, uint32_t len)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RANKS2COORDS),
+                    "r" (coordinates),
+		    "r" (len)
+                    : "r0", "r3", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Convert a XYZT Coordinate into a Rank.  Also returns number of nodes
+ * \param[in] xcoord X Coordinate used to specify the desired node
+ * \param[in] ycoord Y Coordinate used to specify the desired node
+ * \param[in] zcoord Z Coordinate used to specify the desired node
+ * \param[in] tcoord T Coordinate used to specify the desired node
+ * \param[out] rank Rank of the desired node
+ * \param[out] numnodes Number of Nodes in the partition
+ * \return Error indication
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+
+__INLINE__ int Kernel_Coord2Rank(uint32_t xcoord, uint32_t ycoord, uint32_t zcoord, uint32_t tcoord, uint32_t* rank, uint32_t* numnodes)
+{
+    int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                    "mr 6,%5;"
+                    "mr 7,%6;"
+		    "mr 8,%7;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_COORD2RANK),
+                    "r" (xcoord),
+                    "r" (ycoord),
+                    "r" (zcoord),
+                    "r" (tcoord),
+                    "r" (rank),
+		    "r" (numnodes)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Returns the Job ID
+ * \return Contains the control system JobID
+ */
+__INLINE__ uint32_t Kernel_GetJobID()
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GETJOBID)
+                    : "r0", "r3", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Read from a privileged DCR
+ * \param[in] dcrid Number of the DCR register
+ * \param[out] value Contents of DCR register
+ * \return Error indication
+ * \retval  0 Success
+ * \retval -1 Invalid DCR
+ * \note Only selected previleged DCRs will be accessible via this system call.
+ */
+__INLINE__ uint32_t Kernel_ReadDCR(uint32_t dcrid, uint32_t* value)
+{
+    int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_READDCR),
+		    "r" (dcrid),
+		    "r" (value)
+                    : "r0", "r3", "r4", "cc", "memory" );
+
+  return( rc );
+}
+
+/*! \brief Write to a privileged DCR
+ * \param[in] dcrid Number of the DCR register
+ * \param[in] value Contents of DCR register
+ * \return Error indication
+ * \retval  0 Success
+ * \retval -1 Invalid DCR
+ * \note Only selected previleged DCRs will be accessible via this system call.
+ */
+__INLINE__ uint32_t Kernel_WriteDCR(uint32_t dcrid, uint32_t value)
+{
+    int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_WRITEDCR),
+		    "r" (dcrid),
+		    "r" (value)
+                    : "r0", "r3", "r4", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number being queried (0 to
+ *                             DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out]  num_subgroups  Pointer to an int where the number of free
+ *                             subgroups in the specified group is returned
+ * \param[out]  subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of num_subgroups subgroups is returned.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).  The
+ *                             caller must provide space for
+ *                             DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ *                             entire counter group is free.
+ *
+ * \retval  0  Successful.  num_subgroups and subgroups array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupQueryFree()
+ * \note The kernel may need to synchronize with other cores performing
+ *       allocate or free syscalls.
+ *
+ */
+__INLINE__ uint32_t Kernel_CounterGroupQueryFree(uint32_t type, uint32_t group, uint32_t* num_subgroups, uint32_t* subgroups)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_COUNTERGRPQUERYFREE),
+		    "r" (type),
+		    "r" (group),
+			"r" (num_subgroups),
+			"r" (subgroups)
+                    : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group.  Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.  Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled.  A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number whose counters are being allocated
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be allocated from the group
+ *                             (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be allocated is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to num_subgroups-1).
+ * \param[in]   target         The core that will receive the interrupt when a
+ *                             counter in this allocation hits zero
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   handler        A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             counter in this allocation hits zero.  This
+ *                             function must be coded to take 4 uint32_t
+ *                             parameters:
+ *                             - A pointer to storage specific to this
+ *                               handler.  This is the handler_parm
+ *                               specified on this allocation function.
+ *                             - Three unint32_t parameters that are not used.
+ *                             If handler is NULL, hit-zero interrupts will not
+ *                             be enabled for these counters.
+ * \param[in]   handler_parm   A pointer to storage that should be passed to the
+ *                             interrupt handling function (see handler
+ *                             parameter)
+ * \param[in]   interruptGroup A InterruptGroup_t that identifies the
+ *                             group of interrupts that the counters being
+ *                             allocated will become part of.
+ * \param[out]  cg_ptr         Pointer to a structure that is filled in upon
+ *                             successful return for use in other inline
+ *                             functions to operate on the allocated counters.
+ *                             \li counter -     Array of software counter
+ *                                               structures.  Each element
+ *                                               points to the corresponding
+ *                                               hardware counter in DMA SRAM.
+ *                                               Pointers are null if not
+ *                                               allocated).
+ *                                               Counters are initialized to
+ *                                               DMA_COUNTER_INIT_VAL,
+ *                                               disabled, their hit_zero bit
+ *                                               is off, base and max are NULL.
+ *                             \li status_ptr  - Points to status area within the
+ *                                               DMA memory map.
+ *                             \li permissions - Bits set for each allocated
+ *                                               counter
+ *                             \li grp_permissions - Permissions for each
+ *                                                   subgroup
+ *                             \li group_id    - The group number
+ *                             \li type        - The type of DMA (injection or
+ *                                               reception)
+ *
+ * \retval  0  Successful.  Counters allocated and cg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.  Nothing has been
+ *                            allocated.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupAllocate()
+ * \note The kernel may need to synchronize with other cores performing queries
+ *       or frees.
+ *
+ */
+__INLINE__ uint32_t Kernel_CounterGroupAllocate(uint32_t type, uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t target, uint32_t handler, uint32_t* handler_parm, uint32_t interruptGroup, uint32_t* cg_ptr)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"mr 7,%6;"
+                    "mr 8,%7;"
+                    "mr 9,%8;"
+					"mr 10,%9;"
+					"mr 11,%10;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_COUNTERGRPALLOCATE),
+		    "r" (type),
+		    "r" (group),
+			"r" (num_subgroups),
+			"r" (subgroups),
+			"r" (target),
+			"r" (handler),
+			"r" (handler_parm),
+			"r" (interruptGroup),
+			"r" (cg_ptr)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group.  Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in]   grp            Group number whose counters are being freed
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be freed from the group
+ *                             (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be freed is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out]  cg_ptr         Pointer to the structure previously filled in when
+ *                             these counters were allocated.  Upon successful
+ *                             return, this structure is updated to reflect the
+ *                             freed counters:
+ *                             \li counter[]  -  Counter structures Pointers to
+ *                                               freed counters nulled.
+ *                             \li permissions - Bits cleared for each freed
+ *                                               counter.
+ *
+ * \retval  0  Successful.  Counters freed and cg_ptr structure updated as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupFree()
+ * \note The kernel may need to synchronize with other cores performing allocates
+ *       or queries.
+ */
+__INLINE__ uint32_t Kernel_CounterGroupFree(uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t* cg_ptr)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_COUNTERGRPFREE),
+		    "r" (group),
+			"r" (num_subgroups),
+			"r" (subgroups),
+			"r" (cg_ptr)
+                    : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+  return( rc );
+}
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in]   grp            Group number being queried
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out]  num_fifos      Pointer to an int where the number of free
+ *                             fifos in the specified group is returned
+ * \param[out]  fifo_ids       Pointer to an array of num_fifos ints where
+ *                             the list of free fifos is returned.
+ *                             Each int is the fifo number
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *                             The caller must provide space for
+ *                             DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ *                             in case the entire fifo group is free.
+ *
+ * \retval  0  Successful.  num_fifos and fifo_ids array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupQueryFree()
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupQueryFree(uint32_t group, uint32_t* num_fifos, uint32_t* fifo_ids)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                     "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_INJFIFOGRPQUERYFREE),
+		    "r" (group),
+			"r" (num_fifos),
+			"r" (fifo_ids)
+                    : "r0", "r3", "r4", "r5", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group.  Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to.  A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be allocated from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be allocated is provided.
+ *                           Each int is the fifo number (0 to num_fifos-1).
+ * \param[in]   priorities   Pointer to an array of num_fifos short ints where
+ *                           the list of priorities to be assigned to the fifos
+ *                           is provided.  Each short int indicates the priority
+ *                           to be assigned to each of the fifos identified in
+ *                           the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in]   locals       Pointer to an array of num_fifos short ints where
+ *                           an indication is provided of whether each fifo will
+ *                           be used for local transfers (within the same node)
+ *                           or torus transfers.  Each short int indicates the
+ *                           local/non-local attribute to be assigned to each of
+ *                           the fifos identified in the fifo_ids array (0 is
+ *                           non-local, 1 is local).  If 0, the corresponding
+ *                           array element in ts_inj_maps indicates which torus
+ *                           fifos can be injected.
+ * \param[in]   ts_inj_maps  Pointer to an array of num_fifos short ints where
+ *                           the torus fifos that can be injected are specified
+ *                           for each fifo.  Each short int specifies which of
+ *                           the 8 torus injection fifos can be injected when a
+ *                           descriptor is injected into the DMA injection fifo.
+ *                           Must be non-zero when the corresponding "locals"
+ *                           is 0.
+ * \param[out]  fg_ptr       Pointer to a structure that is filled in upon
+ *                           successful return for use in other inline functions
+ *                           to operate on the allocated fifos.
+ *                           \li fifos - Array of fifo structures.  Structures
+ *                                       for allocated fifos are initialized as
+ *                                       documented below.  Structures for
+ *                                       fifos not allocated by this instance of
+ *                                       this syscall are initialized to binary
+ *                                       zeros.  Allocated fifos are enabled.
+ *                           \li status_ptr  - Points to status area within the
+ *                                             DMA memory map.
+ *                           \li permissions - Bits indicating which fifos were
+ *                                             allocated during this syscall.
+ *                           \li group_id    - The id of this group.
+ *
+ * \retval  0  Successful.  Fifos allocated and fg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupAllocate()
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ *         initialized as follows:
+ *         - status_ptr points to the appropriate fifo group DMA memory map
+ *         - fifo structures array.  Fifo structures for fifos not allocated
+ *           during this syscall are initialized to binary zeros.  Fifo
+ *           structures for fifos allocated during this syscall are initialized:
+ *             - fifo_hw_ptr points to the DMA memory map for this fifo.  The
+ *               hardware start, end, head, and tail are set to zero by the
+ *               kernel.
+ *             - All other fields in the structure are set to zero by the kernel
+ *               except priority, local, and ts_inj_map are set to reflect what
+ *               was requested in the priorities, locals, and ts_inj_maps
+ *               syscall parameters.
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupAllocate(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint16_t* priorities, uint16_t* locals, uint8_t* ts_inj_maps, uint32_t* fg_ptr)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"mr 7,%6;"
+                    "mr 8,%7;"
+                    "mr 9,%8;"
+		    "sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_INJFIFOGRPALLOCATE),
+		    "r" (group),
+			"r" (num_fifos),
+			"r" (fifo_ids),
+			"r" (priorities),
+			"r" (locals),
+			"r" (ts_inj_maps),
+			"r" (fg_ptr)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be freed from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be freed is provided.
+ *                           Each int is the fifo number (0 to num_fifos-1).
+ * \param[in]   fg_ptr       Pointer to the structure previously filled in when
+ *                           these fifos were allocated.  Upon successful
+ *                           return, this structure is updated to reflect the
+ *                           freed fifos:
+ *                           \li fifos - Structures for freed fifos zero'd.
+ *                                       Freed fifos are disabled.
+ *                           \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval  0  Successful.  Fifos freed and fg_ptr structure updated as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupFree()
+ * \note  This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupFree(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint32_t* fg_ptr)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_INJFIFOGRPFREE),
+		    "r" (group),
+			"r" (num_fifos),
+			"r" (fifo_ids),
+			"r" (fg_ptr)
+                    : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ *   end.
+ * - Compute fifo size and free space.
+ * - Initialize wrap count.
+ * - Activate the fifo.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval   0  Successful.
+ * \retval  -1  Unsuccessful.  Error checks include
+ *              - va_start < va_end
+ *              - va_start <= va_head <=
+ *                  (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *              - va_start and va_end are 32-byte aligned
+ *              - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ *                                          DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoInitById(uint32_t* fg_ptr,
+				     int  fifo_id,
+				     uint32_t* va_start,
+				     uint32_t* va_head,
+				     uint32_t* va_end)
+{
+    	int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+	asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"mr 7,%6;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_INJFIFOINITID),
+		    "r" (fg_ptr),
+			"r" (fifo_id),
+			"r" (va_start),
+			"r" (va_head),
+			"r" (va_end)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+  return( rc );
+}
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ *   DMA reception fifos that are to receive the packets from those hardware
+ *   torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ *   local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ *   DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in]  rec_map  Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ * \internal This is an internal syscall
+ * \see DMA_RecFifoSetMap
+ * \note  This function should be called once per job, after DMA_ResetRelease().
+ *        It may be called by any core, but once a core has called it, other
+ *        calls by that same core or any other core will fail.
+ *
+ * \note  During job init, the kernel sets up the DCR clear masks for each
+ *        reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ *        a fifo in group g only clears group g.
+ *
+ */
+__INLINE__ int Kernel_RecFifoSetMap(uint32_t* rec_map)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RECFIFOSETMAP),
+		    "r" (rec_map)
+                    : "r0", "r3", "cc", "memory" );
+  return( rc );
+}
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out]  rec_map  A pointer to a Reception Fifo Map structure
+ *                          that will be filled-in upon return.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int Kernel_RecFifoGetMap(uint32_t* rec_map)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RECFIFOGETMAP),
+		    "r" (rec_map)
+                    : "r0", "r3", "cc", "memory" );
+  return( rc );
+}
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs.  If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed).  In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in]  grp      The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in]  target   The core that will receive the interrupt when a
+ *                      fifo in this group reaches its threshold
+ *                      (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ *                      Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler  A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             normal fifo in this group reaches its threshold.
+ *                             This function must be coded to take 4 uint32_t
+ *                             parameters:
+ *                             - A pointer to storage specific to this
+ *                               handler.  This is the normal_handler_parm
+ *                               specified on this function call.
+ *                             - 3 uint32_t parameters that are not used.
+ *                             If normal_handler is NULL, threshold interrupts
+ *                             are not delivered for normal fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler_parm   A pointer to storage that should be passed
+ *                                   to the normal interrupt handling function
+ *                                   (see normal_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  header_handler  ** This parameter is deprecated.  Specify NULL.**
+ *                             A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             header fifo in this group reaches its threshold.
+ *                             This function must be coded to take 2 parameters:
+ *                               void* A pointer to storage specific to this
+ *                                     handler.  This is the header_handler_parm
+ *                                     specified on this function call.
+ *                               int   The global fifo ID of the fifo that hit
+ *                                     its threshold (0 through
+ *                                     NUM_DMA_REC_FIFOS-1).
+ *                             If header_handler is NULL, threshold interrupts
+ *                             are not delivered for header fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  header_handler_parm   ** This parameter is deprecated.  Specify
+ *                                      NULL. **
+ *                                   A pointer to storage that should be passed
+ *                                   to the header interrupt handling function
+ *                                   (see header_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  interruptGroup  A InterruptGroup_t that identifies the
+ *                             group of interrupts that the fifos in this group
+ *                             will become part of.
+ *                             Ignored on subsequent call with the same group.
+ *
+ * \return  RecFifoGroupStruct  Pointer to a DMA Reception Fifo Group structure
+ *                              that reflects the fifos that are being used in
+ *                              this group.  This same structure is shared by
+ *                              all users of this reception fifo group.
+ *                              NULL is returned if an error occurs.
+ *
+ * \note  The following comments from Phil about the internals of the syscall:
+ *   - error checks
+ *     - 0 <= group_id < 4
+ *     - the start of the fifo group is a valid virtual address (tlb mapped)?
+ *   - disable the rDMA
+ *   - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ *   - loop through the map to determine how many and which fifos in this group
+ *     are used, including headers
+ *   - filling in the addresses of used fifos
+ *     - In particular, any pointer to any fifo in the group that is not used
+ *       will have a null pointer
+ *   - furthermore,
+ *     - write starting values to all used fifos
+ *     - make sure all interrupts are cleared
+ *     - enable rDMA
+ *
+ */
+__INLINE__ int Kernel_RecFifoGetFifoGroup(
+			uint32_t*		      	  fifogroup,
+			int                               grp,
+			int                               target,
+			Kernel_CommThreadHandler          normal_handler,
+			void                             *normal_handler_parm,
+			Kernel_CommThreadHandler          header_handler,
+			void                             *header_handler_parm,
+			Kernel_InterruptGroup_t           interruptGroup
+		       )
+{
+	int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+	asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"mr 7,%6;"
+					"mr 8,%7;"
+					"mr 9,%8;"
+					"mr 10,%9;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RECGETFIFOGROUP),
+			  "r" (fifogroup),
+			  "r" (grp),
+			"r" (target),
+			"r" (normal_handler),
+			"r" (normal_handler_parm),
+			"r" (header_handler),
+			"r" (header_handler_parm),
+			"r" (interruptGroup)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+
+  return( rc );
+}
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval   0  Successful.
+ * \retval  -1  Unsuccessful.  Error checks include
+ *              - va_start <  va_end
+ *              - va_start <= va_head < va_end
+ *              - va_start and va_end are 32-byte aligned
+ *              - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES
+ *
+ */
+__INLINE__ int Kernel_RecFifoInitById(
+				   uint32_t*		  fg_ptr,
+				   int                fifo_id,
+				   void               *va_start,
+				   void               *va_head,
+				   void               *va_end
+				  )
+{
+	int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+	asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+					"mr 6,%5;"
+					"mr 7,%6;"
+					"sc;"
+		    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_RECFIFOINITID),
+		    "r" (fg_ptr),
+			"r" (fifo_id),
+			"r" (va_start),
+			"r" (va_head),
+			"r" (va_end)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+  return( rc );
+}
+
+ /*!
+  * \brief Injects a binary (RAW) RAS message to the control system
+  *
+  * Ships a RAS message of the given facility, unit, errcode, and packed data to the control system.  No checking is done on the
+  * correctness of the data.  Can be used to simulate a RAS message for testing purposes.
+  *
+  * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags)
+  * \param[in] unit Unit generating the RAS event.  (e.g., _bgp_unit_ppc450, _bgp_unit_snoop)
+  * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0)
+  * \param[in] numwords Number of 32-bit integers in the packed binary array
+  * \param[in] array Pointer to the array of packed binary data.
+  *
+  * Restriction.  There is currently a limit of eight 32-bit words of packed binary data.
+  *
+  * \internal This function is intended for testing purposes only.  It should not be used in a production system as it could introduce false RAS messages.
+ */
+
+__INLINE__ int Kernel_InjectRAWRAS(
+				   _BGP_Facility facility,
+				   _BGP_RAS_Units unit,
+				   uint16_t err_code,
+				   int numwords,
+				   const uint32_t* array)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno                                                                            */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+		    "mr 6,%5;"
+		    "mr 7,%6;"
+		    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber                                                                                                     */
+                    : "i" (_BGP_SYSCALL_NR_RAWRASINJECT),
+                    "r" (facility),
+		    "r" (unit),
+		    "r" (err_code),
+		    "r" (numwords),
+		    "r" (array)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+  return( rc );
+}
+
+ /*!
+  * \brief Injects a ASCII (Textual) RAS message to the control system
+  *
+  * Ships a RAS message of the given facility, unit, errcode, and an ASCII string to the control system.  No checking is done on the
+  * correctness of the facility or unit.  Can be used to simulate a RAS message for testing purposes.
+  *
+  * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags)
+  * \param[in] unit Unit generating the RAS event.  (e.g., _bgp_unit_ppc450, _bgp_unit_snoop)
+  * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0)
+  * \param[in] text Pointer to a string of null-terminated ASCII characters
+  *
+  * \internal This function is intended for testing purposes only.  It should not be used in a production system as it could introduce false RAS messages.
+ */
+__INLINE__ int Kernel_InjectASCIIRAS(
+				   _BGP_Facility facility,
+				   _BGP_RAS_Units unit,
+				   uint16_t err_code,
+				   const uint8_t* text)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno                                                                            */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+		    "mr 6,%5;"
+		    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber                                                                                                     */
+                    : "i" (_BGP_SYSCALL_NR_ASCIIRASINJECT),
+                    "r" (facility),
+		    "r" (unit),
+		    "r" (err_code),
+		    "r" (text)
+                    : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+  return( rc );
+}
+
+
+
+/*!
+ * \brief Enables/Disables the counter overflow/underflow interrupts
+ *
+ * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts
+ *
+ * \param[in]  enable/disable boolean
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int Kernel_ChgCounterInterruptEnables(uint32_t enable)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_CHGDMACTRINTERRUPT),
+                    "r" (enable)
+                    : "r0", "r3", "cc", "memory" );
+  return( rc );
+}
+
+
+/*!
+ * \brief Clears the Full Reception FIFO (DD1 workaround)
+ *
+ * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only.  It should not be needed in DD2.
+ *
+ * NOTE: the implementation has been removed.
+ */
+__INLINE__ int Kernel_ClearFullReceptionFifo()
+{
+  return 0;
+}
+
+#include <spi/lockbox_interface.h>
+
+#if ((!defined(__CNK__)) && (!defined(__BL__)))
+/*! \brief Creates a pthread with a commthread attribute
+ *
+ * \note CNK restriction:  1 CommThread per core is allowed
+ * \note In Dual or VNM, each process must allocate its own commthreads
+ * \note CommThreads are pinned per core.  (e.g., in SMP mode, this SPI must be called 4 times to create enough CommThreads for each processor)
+ * \warning non-portable pthread API
+ * \param[in] thread pthread_t structure
+ * \param[in] attr   pthread_attr_t structure
+ * \param[in] start_routine function pointer of the thread's main()
+ * \param[in] arg    1st argument to the pthread
+ * \return Error condition from pthread_create
+ * \retval 0 success
+ * \retval -1 error, check errno
+ */
+__INLINE__ int pthread_create_CommThread_np( pthread_t *thread,
+                                          pthread_attr_t *attr,
+                                          void *(*start_routine)(void *),
+                                          void *arg )
+{
+  uint32_t usprg0 = _bgp_mfspr( SPRN_USPRG0 );  /*  save orig usprg0 */
+
+  _bgp_mtspr( SPRN_USPRG0, _BGP_COMMTHREAD_MAGIC );
+
+  int rc = pthread_create( thread, attr, start_routine, arg );
+  _bgp_mtspr( SPRN_USPRG0, usprg0 );   /*  restore orig usprg0 */
+
+  return( rc );
+}
+#endif
+
+/*! \brief Causes a commthread to disappear from the runqueue
+ *
+ *  \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof.
+ *  \note TLS data is preserved across a poof
+ *  \note This SPI is only executable on a comm. thread.
+ *  \warning non-portable pthread API
+ *  \return error indication
+ *  \retval success Does not return.  Thread has "poofed"
+ *  \retval -1 Calling thread is not a CommThread, so cannot poof
+ */
+__INLINE__ int pthread_poof_np( void )
+{
+    int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                     "sc;"
+		    "mr %0, 3;"
+		    : "=&r" (rc)   /*  early clobber */
+		    : "i" (_BGP_SYSCALL_NR_PTHREAD_POOF)
+		    : "r0", "r3", "cc", "memory" );
+
+  return( rc );
+}
+
+
+
+/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes
+ *  \{
+ * \note Only 1 interrupt route can be specified per opcode
+ * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination
+ * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified
+ */
+#define COMMTHRD_OPCODE_DISABLE            0x00  /* !< Interrupt route - Not routed / interrupt disabled */
+#define COMMTHRD_OPCODE_CORE0              0x01  /* !< Interrupt route - Dispatched on core0 */
+#define COMMTHRD_OPCODE_CORE1              0x02  /* !< Interrupt route - Dispatched on core1 */
+#define COMMTHRD_OPCODE_CORE2              0x03  /* !< Interrupt route - Dispatched on core2 */
+#define COMMTHRD_OPCODE_CORE3              0x04  /* !< Interrupt route - Dispatched on core3 */
+#define COMMTHRD_OPCODE_BCAST              0x05  /* !< Interrupt route - Dispatched on all cores */
+#define COMMTHRD_OPCODE_ROUTEMASK          0x0F  /* !< Interrupt route mask */
+#define COMMTHRD_OPCODE_CALLFUNC           0x10  /* !< The provided function will be called on the comm. thread */
+#define COMMTHRD_OPCODE_DISABLEINTONENTRY  0x20  /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */
+#define COMMTHRD_OPCODE_ENABLEINTONPOOF    0x40  /* !< Interrupts using cntrid will be enabled when comm. thread poofs */
+/*!
+ * \}
+ */
+
+/*! \brief Generates an InterruptID value
+ * \param[in] group group of the interrupt.  range 0-9.
+ * \param[in] irq_in_group irq within the group.  range 0-31.
+ * \return Composite value able to be passed to Kernel_SetCommThreadConfig
+ * \see Kernel_SetCommThreadConfig
+ */
+#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f))
+
+/*!
+ * \brief Sets kernel data structures needed to dispatch a communications thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread.  Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine.
+ * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter
+ *      to ensure that the interrupt is disabled until all interrupts for the counter
+ *      have been configured.
+ * \pre All
+ * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke
+ *       Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on
+ *       that counter to enable the interrupts for that class.
+ * \see LockBox_AllocateCounter
+ * \see Kernel_DisableInterruptClass
+ * \see Kernel_EnableInterruptClass
+ * \see Kernel_HardEnableInterruptClass
+ * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter)
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel may prevent changing interrupt settings for certain InterruptID values.
+ * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed.
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ * \param[in] InterruptID  Identifies a unique interrupt line.  \see Kernel_MkInterruptID
+ * \param[in] opcode       Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES
+ * \param[in] cntrid       ID of the lockbox counter used for interrupt enable/disable control
+ * \param[in] funcptr      Function pointer that will be invoked when the interrupt fires
+ * \param[in] arg1         1st argument to the funcptr when the interrupt fires
+ * \param[in] arg2         2nd argument to the funcptr when the interrupt fires
+ * \param[in] arg3         3rd argument to the funcptr when the interrupt fires
+ *
+ */
+__INLINE__ int Kernel_SetCommThreadConfig(int InterruptID, int opcode, LockBox_Counter_t cntrid,
+					  Kernel_CommThreadHandler funcptr,
+					  uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4)
+{
+   int rc = 0;
+   asm __volatile__ ("li 0,%1;"
+		     "mr 3, %2;"
+		     "mr 4, %3;"
+		     "mr 5, %4;"
+		     "mr 6, %5;"
+		     "mr 7, %6;"
+		     "mr 8, %7;"
+		     "mr 9, %8;"
+		     "mr 10, %9;"
+		     "sc;"
+		     "mr %0, 3;"
+		     : "=&r" (rc)  /*  early clobber */
+		     : "i" (_BGP_SYSCALL_NR_SETCOMMTHREADCONFIG),
+		     "r" (InterruptID),
+		     "r" (opcode),
+		     "r" (cntrid),
+		     "r" (funcptr),
+		     "r" (arg1),
+		     "r" (arg2),
+		     "r" (arg3),
+		     "r" (arg4)
+		    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+  return rc;
+}
+
+/*!
+ * \brief Returns the kernel data structures that were specified to dispatch communication thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread.  Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ *
+ * \param[in] InterruptID  Identifies a unique interrupt line.
+ * \param[out] opcode    Storage for opcode value.  Specifies which core receives the interrupt.  It also controls whether the interrupt disables a class of interrupts.  Valid \ref COMMTHRD_OPCODES
+ * \param[out] cntrid       Storage for ID of the lockbox counter used for interrupt enable/disable control
+ * \param[out] funcptr      Storage for Function pointer that will be invoked when the interrupt fires
+ * \param[out] arg1         Storage for 1st argument to the funcptr when the interrupt fires
+ * \param[out] arg2         Storage for 2nd argument to the funcptr when the interrupt fires
+ * \param[out] arg3         Storage for 3rd argument to the funcptr when the interrupt fires
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ *
+ */
+__INLINE__ int Kernel_GetCommThreadConfig(int InterruptID, int* opcode, LockBox_Counter_t* cntrid,
+					  Kernel_CommThreadHandler* funcptr,
+					  uint32_t* arg1, uint32_t* arg2, uint32_t* arg3, uint32_t* arg4)
+{
+  int rc = 0;
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3, %2;"
+                    "mr 4, %3;"
+                    "mr 5, %4;"
+                    "mr 6, %5;"
+                    "mr 7, %6;"
+                    "mr 8, %7;"
+                    "mr 9, %8;"
+		    "mr 10, %9;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)  /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GETCOMMTHREADCONFIG),
+		    "r" (InterruptID),
+		    "r" (opcode),
+		    "r" (cntrid),
+		    "r" (funcptr),
+		    "r" (arg1),
+		    "r" (arg2),
+		    "r" (arg3),
+		    "r" (arg4)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+  return rc;
+}
+
+/*! \brief Flush interrupt enable/disable state
+ *
+ * For each interrupt that has a lockbox counter associated with it, this SPI will
+ * update the interrupt controller to match the state specified by the lockbox counter.
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel is responsible for updating the interrupt controller to match all lockbox counters
+ *
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ */
+__INLINE__ int Kernel_FlushInterruptState()
+{
+   int rc;
+   asm __volatile__ ("li 0,%1;"
+		     "sc;"
+		     "mr %0, 3;"
+		     : "=&r" (rc)   /*  early clobber */
+		     : "i" (_BGP_SYSCALL_NR_FLUSHINTSTATE)
+		     : "r0", "r3", "cc", "memory" );
+   return rc;
+}
+
+/*! \brief Indicates that the kernel should disable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt.
+ * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread
+ * is invoked for that interrupt class.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ *    If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled.  The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Do not disable an already disabled interrupt class.
+ * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid)
+{
+  return ( LockBox_FetchAndInc(classid) );
+}
+
+/*! \brief Indicates that the kernel should enable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should leave this interrupt enabled.
+ * This does not hard-enable the interrupts for this classid (see Kernel_HardEnableInterruptClass).
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ *    If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled.  The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is hard-disabled.
+ * \note There is potential race condition that must be avoided in the kernel.  The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events).  One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero.  There are fancier solutions as well.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ uint32_t Kernel_EnableInterruptClass(LockBox_Counter_t classid)
+{
+  return ( LockBox_FetchAndDec(classid) );
+}
+
+/*! \brief Indicates that the kernel should hard enable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel has hard-enabled this interrupt.
+ * If the kernel has actually disabled the interrupt, this SPI will enable the interrupt by using the
+ * Kernel_FlushInterruptState() SPI.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ *    If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled.  The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is disabled.
+ * \note There is potential race condition that must be avoided in the kernel.  The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events).  One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero.  There are fancier solutions as well.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ void Kernel_HardEnableInterruptClass(LockBox_Counter_t classid)
+{
+  LockBox_FetchAndDec(classid);
+  Kernel_FlushInterruptState();
+}
+
+/*! \brief Delivers an interrupt to the cores specified in the mask
+ * \param[in] coremask Bitmask describing which processor cores will receive the interrupt.  Processor 0 is the least significant bit (1<<0 in C parlance).  Processor 3 is 1<<3.  Any combination of processors can be interrupted.
+ * \note It is possible to interrupt yourself.
+ */
+__INLINE__ int Kernel_DeliverCommSignal(uint32_t ipiset, uint32_t coremask)
+{
+   int rc = 0;
+   asm __volatile__ ("li 0,%1;"
+		     "mr 3, %2;"
+		     "mr 4, %3;"
+		     "sc;"
+                     "mr %0, 3;"
+		     : "=&r" (rc)   /*  early clobber */
+		     : "i" (_BGP_SYSCALL_NR_DELIVERCOMMSIGNAL),
+		     "r" (ipiset),
+		     "r" (coremask)
+		     : "r0", "r3", "r4", "cc", "memory" );
+  return rc;
+}
+
+/*!
+ * \brief Suspends/Resumes a core
+ *
+ * \param[in]  target core ID
+ * \param[in]  suspend Boolean.  TRUE if core is to be suspended
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ * \note In a threaded application, use care to avoid suspending a thread containing a lock needed by the active thread.  (e.g., if the other core is performing a printf, it may have the glibc io subsystem locked with a mutex.  If that happens, the main thread may deadlock if it also happens to call printf)
+ *
+ */
+__INLINE__ int Kernel_ChangeCoreEnables(uint32_t target_core, uint32_t suspend)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_CHGCOREENABLES),
+                    "r" (target_core),
+		    "r" (suspend)
+                    : "r0", "r3", "r4", "cc", "memory" );
+  return( rc );
+}
+
+/*! \brief Persistent Shared Memory interface to application. Currently, simlpy a wrapper to open(2),
+ *         with a prefix of /dev/persist
+ */
+__INLINE__ int persist_open( char *name, int oflag, mode_t mode )
+{
+   char pathName[PATH_MAX];
+   strcpy(pathName, "/dev/persist/");
+   strncat(pathName, name, PATH_MAX - strlen("/dev/persist/") - 1);
+   return open(pathName, oflag, mode);
+}
+
+/*! \brief Memory region types that can be used for Kernel_GetMemorySize.
+*/
+enum KERNEL_MEMSIZETYPE
+{
+   KERNEL_MEMSIZE_SHARED = 200,       /*!< Size in bytes of shared memory */
+   KERNEL_MEMSIZE_PERSIST,            /*!< Size in bytes of persistent memory */
+   KERNEL_MEMSIZE_HEAPAVAIL,          /*!< Size in bytes of available heap (must be process leader (a.k.a main) thread) */
+   KERNEL_MEMSIZE_ESTHEAPAVAIL,       /*!< Estimated size in bytes of the available heap */
+   KERNEL_MEMSIZE_STACKAVAIL,         /*!< Size in bytes available to the process leader's stack. (must be process leader (a.k.a. main) thread) */
+   KERNEL_MEMSIZE_ESTSTACKAVAIL,      /*!< Estimated size in bytes available to the process leader's stack */
+   KERNEL_MEMSIZE_STACK,              /*!< Size in bytes of the process leader's stack (must be process leader (a.k.a. main) thread) */
+   KERNEL_MEMSIZE_ESTSTACK,           /*!< Estimated size in bytes available to the process leader's stack */
+   KERNEL_MEMSIZE_HEAP,               /*!< Size in bytes of the heap size*/
+   KERNEL_MEMSIZE_GUARD               /*!< Size in bytes of the heap guardpage */
+};
+
+/*! \brief Returns size of the specified memory region.
+ */
+
+__INLINE__ int Kernel_GetMemorySize(enum KERNEL_MEMSIZETYPE type, uint32_t* size)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GETMEMSIZE),
+                    "r" (type),
+		    "r" (size)
+                    : "r0", "r3", "r4", "cc", "memory" );
+  return( rc );
+}
+
+/*! \brief Sets a virtual memory window for the process based on a user supplied physical address and tlb slot
+ */
+
+__INLINE__ int Kernel_SetProcessWindow(int tlbslot, uint64_t window_paddr, size_t window_reqsize, uint32_t window_permissions,
+                                 uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+   /*  need to divide up the uint64 so we can setup the uint32 registers */
+  uint32_t window_paddr_h = window_paddr >> 32;
+  uint32_t window_paddr_l = (uint32_t)window_paddr;
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                    "mr 6,%5;"
+                    "mr 7,%6;"
+                    "mr 8,%7;"
+                    "mr 9,%8;"
+                    "mr 10,%9;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_SETPRWINDOW),
+                    "r" (tlbslot),
+                    "r" (window_paddr_h),
+                    "r" (window_paddr_l),
+                    "r" (window_reqsize),
+                    "r" (window_permissions),
+                    "r" (window_actualvaddr),
+                    "r" (window_actualpaddr),
+		    "r" (window_actualsize)
+                    : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+  return( rc );
+}
+
+/*! \brief Returns size of the process memory window that was set by the _SetProcessWindow.
+ */
+
+__INLINE__ int Kernel_GetProcessWindow(int tlbslot,
+                        uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "mr 5,%4;"
+                    "mr 6,%5;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GETPRWINDOW),
+                    "r" (tlbslot),
+                    "r" (window_actualvaddr),
+                    "r" (window_actualpaddr),
+		    "r" (window_actualsize)
+                    : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+  return( rc );
+}
+
+/*! \brief Returns the range of available TLB slots for use by Kernel_SetProcessWindow
+ */
+
+__INLINE__ int Kernel_GetProcessWindowSlotRange(int *minslot, int *maxslot)
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                    "mr 3,%2;"
+                    "mr 4,%3;"
+                    "sc;"
+                    "mr %0, 3;"
+                    : "=&r" (rc)   /*  early clobber */
+                    : "i" (_BGP_SYSCALL_NR_GETPRWINDOWSLOTS),
+                    "r" (minslot),
+		    "r" (maxslot)
+                    : "r0", "r3", "r4", "cc", "memory" );
+  return( rc );
+}
+
+/*! \brief Returns the number of Active Processes in the node (np adjusted)
+ *
+ * \return Processor Count
+ * \retval 1 one process is active in the node. (SMT or DUAL/VN with -np restrictions)
+ * \retval 2 two processes active in this node  (DUAL or VN with -np restrictions)
+ * \retval 3 three processes active in this node (VN with -np restrictions)
+ * \retval 4 four processes active in this node (VN)
+ */
+__INLINE__ int Kernel_ActiveProcessCount( void )
+{
+  int rc = 0;  /*  this syscall returns RC in r3 and does not use errno */
+
+  asm __volatile__ ("li 0,%1;"
+                     "sc;"
+		    "mr %0, 3;"
+		    : "=&r" (rc)   /*  early clobber */
+		    : "i" (_BGP_SYSCALL_NR_ACTIVEPROCESSCOUNT)
+		    : "r0", "r3", "cc", "memory" );
+  return( rc );
+}
+
+
+#if SPI_DEPRECATED
+
+/* ! \see Kernel_PhysicalProcessorID */
+#define BGP_PhysicalProcessorID  Kernel_PhysicalProcessorID
+
+/* ! \see Kernel_Virtual2Physical */
+#define _bgp_Virtual2Physical    Kernel_Virtual2Physical
+
+/* ! \see Kernel_GetPersonality */
+#define rts_get_personality(p,s)    Kernel_GetPersonality(p,s)
+
+/* ! \see Kernel_PhysicalProcessorID */
+#define rts_get_processor_id()      Kernel_PhysicalProcessorID()
+
+/* ! \see Kernel_GetProcessorVersion */
+#define rts_get_processor_version() Kernel_GetProcessorVersion()
+#endif
+
+__END_DECLS
+
+
+#endif /* ! __LINUX__ */
+
+
+#endif  /*  Add nothing below this line */
diff --git a/arch/powerpc/include/spi/linux_interface.h b/arch/powerpc/include/spi/linux_interface.h
new file mode 100644
index 0000000..bb49e17
--- /dev/null
+++ b/arch/powerpc/include/spi/linux_interface.h
@@ -0,0 +1,777 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _BGP_SPI_LINUX_INTERFACE_H_  /*  Prevent multiple inclusion */
+#define _BGP_SPI_LINUX_INTERFACE_H_
+
+
+/*! \brief Returns the physical processor ID of the running PPC450 core.
+ *
+ * \return Physical processor ID
+ * \retval 0 Running on processor 0
+ * \retval 1 Running on processor 1
+ * \retval 2 Running on processor 2
+ * \retval 3 Running on processor 3
+ */
+extern inline uint32_t Kernel_PhysicalProcessorID( void ) { return 0; }  /*  ?????? */
+
+
+/*! \brief Causes a commthread to disappear from the runqueue
+ *
+ *  \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof.
+ *  \note TLS data is preserved across a poof
+ *  \note This SPI is only executable on a comm. thread.
+ *  \warning non-portable pthread API
+ *  \return error indication
+ *  \retval success Does not return.  Thread has "poofed"
+ *  \retval -1 Calling thread is not a CommThread, so cannot poof
+ */
+int pthread_poof_np( void );
+
+
+
+
+/*!
+ * \brief Clears the Full Reception FIFO (DD1 workaround)
+ *
+ * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only.  It should not be needed in DD2.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_ClearFullReceptionFifo(void);
+
+
+/*! \brief Generates an InterruptID value
+ * \param[in] group group of the interrupt.  range 0-9.
+ * \param[in] irq_in_group irq within the group.  range 0-31.
+ * \return Composite value able to be passed to Kernel_SetCommThreadConfig
+ * \see Kernel_SetCommThreadConfig
+ */
+#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f))
+
+
+/*!
+ * \brief Communication Thread interrupt handler function prototype
+ *
+ * \param[in] arg1 1st argument to commthread
+ * \param[in] arg2 2nd argument to commthread
+ * \param[in] arg3 3rd argument to commthread
+ */
+typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4);
+
+/*!
+ * \brief Interrupt Group Prototype
+ *
+ * This data type is used to group interrupts of various devices together
+ * so they can be enabled or disabled simultaneously.  A given interrupt user
+ * (eg. messaging, QCD, etc) specifies a value of this data type when its
+ * interrupt resources are allocated.  The kernel associates those resources
+ * with the specified value so when this value is specified on the enable or
+ * disable interupts system call, all of the interrupts in the group are
+ * operated upon.  Examples of devices that can be grouped in this way include
+ * DMA fifos, torus, tree, etc.
+ *
+ * \todo The kernel should provide interfaces to allocate a
+ *       Kernel_InterruptGroup_t and deallocate it.
+ */
+typedef void * Kernel_InterruptGroup_t;
+
+
+/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes
+ *  \{
+ * \note Only 1 interrupt route can be specified per opcode
+ * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination
+ * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified
+ */
+#define COMMTHRD_OPCODE_DISABLE            0x00  /* !< Interrupt route - Not routed / interrupt disabled */
+#define COMMTHRD_OPCODE_CORE0              0x01  /* !< Interrupt route - Dispatched on core0 */
+#define COMMTHRD_OPCODE_CORE1              0x02  /* !< Interrupt route - Dispatched on core1 */
+#define COMMTHRD_OPCODE_CORE2              0x03  /* !< Interrupt route - Dispatched on core2 */
+#define COMMTHRD_OPCODE_CORE3              0x04  /* !< Interrupt route - Dispatched on core3 */
+#define COMMTHRD_OPCODE_BCAST              0x05  /* !< Interrupt route - Dispatched on all cores */
+#define COMMTHRD_OPCODE_ROUTEMASK          0x0F  /* !< Interrupt route mask */
+#define COMMTHRD_OPCODE_CALLFUNC           0x10  /* !< The provided function will be called on the comm. thread */
+#define COMMTHRD_OPCODE_DISABLEINTONENTRY  0x20  /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */
+#define COMMTHRD_OPCODE_ENABLEINTONPOOF    0x40  /* !< Interrupts using cntrid will be enabled when comm. thread poofs */
+
+
+/*!
+ * \brief Sets kernel data structures needed to dispatch a communications thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread.  Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine.
+ * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter
+ *      to ensure that the interrupt is disabled until all interrupts for the counter
+ *      have been configured.
+ * \pre All
+ * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke
+ *       Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on
+ *       that counter to enable the interrupts for that class.
+ * \see LockBox_AllocateCounter
+ * \see Kernel_DisableInterruptClass
+ * \see Kernel_EnableInterruptClass
+ * \see Kernel_HardEnableInterruptClass
+ * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter)
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel may prevent changing interrupt settings for certain InterruptID values.
+ * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed.
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ * \param[in] InterruptID  Identifies a unique interrupt line.  \see Kernel_MkInterruptID
+ * \param[in] opcode       Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES
+ * \param[in] cntrid       ID of the lockbox counter used for interrupt enable/disable control
+ * \param[in] funcptr      Function pointer that will be invoked when the interrupt fires
+ * \param[in] arg1         1st argument to the funcptr when the interrupt fires
+ * \param[in] arg2         2nd argument to the funcptr when the interrupt fires
+ * \param[in] arg3         3rd argument to the funcptr when the interrupt fires
+ *
+ */
+typedef uint32_t* LockBox_Counter_t;  /*!< Counter ID definition */
+int Kernel_SetCommThreadConfig(int InterruptID,
+			       int opcode,
+			       LockBox_Counter_t cntrid,
+			       Kernel_CommThreadHandler funcptr,
+			       uint32_t arg1,
+			       uint32_t arg2,
+			       uint32_t arg3,
+			       uint32_t arg4);
+
+
+
+/*! \brief Indicates that the kernel should disable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt.
+ * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread
+ * is invoked for that interrupt class.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ *    If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled.  The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Do not disable an already disabled interrupt class.
+ * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid);
+
+
+
+/*!
+ * \brief Enables/Disables the counter overflow/underflow interrupts
+ *
+ * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts
+ *
+ * \param[in]  enable/disable boolean
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_ChgCounterInterruptEnables(uint32_t enable);
+
+
+/* int rts_get_personality( void * pers, size_t size ); */
+
+
+/*!
+ * \brief Update mapping info about physically contigouos application memory regions
+ *        ( used only in HPC mode )
+ */
+int Kernel_UpdateAppSegmentInfo(void);
+
+
+
+/*!
+ * \brief Internal helper function for virtual to physical address translation
+ *
+ */
+
+int User_Virtual2Physical(unsigned long vaddr,      /*  32bit Virtual start address */
+			  size_t   vsize,           /*  size in bytes of virtual range */
+			  uint32_t *ua_out,         /*  upper 4 Physical Address bits */
+			  uint32_t *pa_out );
+
+
+/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts.
+ *
+ * \param[in] pVA   32bit virtual address in the calling process
+ * \param[in] vsize size in bytes of the virtual range
+ * \param[out] ua_out upper 4 physical address bits
+ * \param[out] pa_out lower 32 physical address bits
+ * \return Error condition for translation
+ * \retval  0 Successful translation, with ua_out and pa_out filled in
+ * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified.
+ * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous.
+ * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled.
+ * \retval -4 invalid parameter
+ *
+ *  \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation
+ *  \warning CNK "pagesize" is 1MB.
+ *  \warning Text and Data are virtually contiguous, but not necessarily physically contiguous.
+ *  \todo Does not (currently) support > 4GB DDR space.
+ *  \todo Does not (currently) support Shared Memory Area.
+ */
+int Kernel_Virtual2Physical( void     *pVA,      /*  input: 32bit Virtual start address */
+			     size_t   vsize,     /*  input: size in bytes of virtual range */
+			     uint32_t *ua_out,   /*  output: upper  4 Physical Address bits */
+			     uint32_t *pa_out );  /*  output: lower 32 Physical Address bits */
+
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number being queried (0 to
+ *                             DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out]  num_subgroups  Pointer to an int where the number of free
+ *                             subgroups in the specified group is returned
+ * \param[out]  subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of num_subgroups subgroups is returned.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).  The
+ *                             caller must provide space for
+ *                             DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ *                             entire counter group is free.
+ *
+ * \retval  0  Successful.  num_subgroups and subgroups array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupQueryFree()
+ * \note The kernel may need to synchronize with other cores performing
+ *       allocate or free syscalls.
+ *
+ */
+uint32_t Kernel_CounterGroupQueryFree(uint32_t   type,
+				      uint32_t   group,
+				      uint32_t * num_subgroups,
+				      uint32_t * subgroups);
+
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group.  Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.  Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled.  A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in]   type           Specifies whether this is an injection or
+ *                             reception counter group (DMA_Type_Injection
+ *                             or DMA_Type_Reception)
+ * \param[in]   grp            Group number whose counters are being allocated
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be allocated from the group
+ *                             (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be allocated is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to num_subgroups-1).
+ * \param[in]   target         The core that will receive the interrupt when a
+ *                             counter in this allocation hits zero
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   handler        A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             counter in this allocation hits zero.  This
+ *                             function must be coded to take 3 parameters:
+ *                               void*  A pointer to storage specific to this
+ *                                      handler.  This is the handler_parm
+ *                                      specified on this allocation function.
+ *                               int    The counter's subgroup number (0 to
+ *                                      DMA_NUM_COUNTER_SUBGROUPS-1).
+ *                                      Note this number spans all groups.
+ *                             If handler is NULL, hit-zero interrupts will not
+ *                             be enabled for these counters.
+ * \param[in]   handler_parm   A pointer to storage that should be passed to the
+ *                             interrupt handling function (see handler
+ *                             parameter)
+ * \param[in]   interruptGroup A InterruptGroup_t that identifies the
+ *                             group of interrupts that the counters being
+ *                             allocated will become part of.
+ * \param[out]  cg_ptr         Pointer to a structure that is filled in upon
+ *                             successful return for use in other inline
+ *                             functions to operate on the allocated counters.
+ *                             \li counter -     Array of software counter
+ *                                               structures.  Each element
+ *                                               points to the corresponding
+ *                                               hardware counter in DMA SRAM.
+ *                                               Pointers are null if not
+ *                                               allocated).
+ *                                               Counters are initialized to
+ *                                               DMA_COUNTER_INIT_VAL,
+ *                                               disabled, their hit_zero bit
+ *                                               is off, base and max are NULL.
+ *                             \li status_ptr  - Points to status area within the
+ *                                               DMA memory map.
+ *                             \li permissions - Bits set for each allocated
+ *                                               counter
+ *                             \li grp_permissions - Permissions for each
+ *                                                   subgroup
+ *                             \li group_id    - The group number
+ *                             \li type        - The type of DMA (injection or
+ *                                               reception)
+ *
+ * \retval  0  Successful.  Counters allocated and cg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.  Nothing has been
+ *                            allocated.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupAllocate()
+ * \note The kernel may need to synchronize with other cores performing queries
+ *       or frees.
+ *
+ */
+uint32_t Kernel_CounterGroupAllocate(uint32_t   type,
+				     uint32_t   group,
+				     uint32_t   num_subgroups,
+				     uint32_t * subgroups,
+				     uint32_t   target,
+				     uint32_t   handler,
+				     uint32_t * handler_parm,
+				     uint32_t   interruptGroup,
+				     uint32_t * cg_ptr);
+
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group.  Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in]   grp            Group number whose counters are being freed
+ *                             (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in]   num_subgroups  Number of subgroups to be freed from the group
+ *                             (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in]   subgroups      Pointer to an array of num_subgroups ints where
+ *                             the list of subgroups to be freed is provided.
+ *                             Each int is the subgroup number
+ *                             (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out]  cg_ptr         Pointer to the structure previously filled in when
+ *                             these counters were allocated.  Upon successful
+ *                             return, this structure is updated to reflect the
+ *                             freed counters:
+ *                             \li counter[]  -  Counter structures Pointers to
+ *                                               freed counters nulled.
+ *                             \li permissions - Bits cleared for each freed
+ *                                               counter.
+ *
+ * \retval  0  Successful.  Counters freed and cg_ptr structure updated as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupFree()
+ * \note The kernel may need to synchronize with other cores performing allocates
+ *       or queries.
+ */
+uint32_t Kernel_CounterGroupFree( uint32_t   group,
+				  uint32_t   num_subgroups,
+				  uint32_t * subgroups,
+				  uint32_t * cg_ptr );
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in]   grp            Group number being queried
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out]  num_fifos      Pointer to an int where the number of free
+ *                             fifos in the specified group is returned
+ * \param[out]  fifo_ids       Pointer to an array of num_fifos ints where
+ *                             the list of free fifos is returned.
+ *                             Each int is the fifo number
+ *                             (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *                             The caller must provide space for
+ *                             DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ *                             in case the entire fifo group is free.
+ *
+ * \retval  0  Successful.  num_fifos and fifo_ids array set as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupQueryFree()
+ */
+
+uint32_t Kernel_InjFifoGroupQueryFree( uint32_t   group,
+				       uint32_t * num_fifos,
+				       uint32_t * fifo_ids);
+
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group.  Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to.  A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be allocated from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be allocated is provided.
+ *                           Each int is the fifo number (0 to num_fifos-1).
+ * \param[in]   priorities   Pointer to an array of num_fifos short ints where
+ *                           the list of priorities to be assigned to the fifos
+ *                           is provided.  Each short int indicates the priority
+ *                           to be assigned to each of the fifos identified in
+ *                           the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in]   locals       Pointer to an array of num_fifos short ints where
+ *                           an indication is provided of whether each fifo will
+ *                           be used for local transfers (within the same node)
+ *                           or torus transfers.  Each short int indicates the
+ *                           local/non-local attribute to be assigned to each of
+ *                           the fifos identified in the fifo_ids array (0 is
+ *                           non-local, 1 is local).  If 0, the corresponding
+ *                           array element in ts_inj_maps indicates which torus
+ *                           fifos can be injected.
+ * \param[in]   ts_inj_maps  Pointer to an array of num_fifos short ints where
+ *                           the torus fifos that can be injected are specified
+ *                           for each fifo.  Each short int specifies which of
+ *                           the 8 torus injection fifos can be injected when a
+ *                           descriptor is injected into the DMA injection fifo.
+ *                           Must be non-zero when the corresponding "locals"
+ *                           is 0.
+ * \param[out]  fg_ptr       Pointer to a structure that is filled in upon
+ *                           successful return for use in other inline functions
+ *                           to operate on the allocated fifos.
+ *                           \li fifos - Array of fifo structures.  Structures
+ *                                       for allocated fifos are initialized as
+ *                                       documented below.  Structures for
+ *                                       fifos not allocated by this instance of
+ *                                       this syscall are initialized to binary
+ *                                       zeros.  Allocated fifos are enabled.
+ *                           \li status_ptr  - Points to status area within the
+ *                                             DMA memory map.
+ *                           \li permissions - Bits indicating which fifos were
+ *                                             allocated during this syscall.
+ *                           \li group_id    - The id of this group.
+ *
+ * \retval  0  Successful.  Fifos allocated and fg_ptr structure filled in as
+ *                          described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupAllocate()
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ *         initialized as follows:
+ *         - status_ptr points to the appropriate fifo group DMA memory map
+ *         - fifo structures array.  Fifo structures for fifos not allocated
+ *           during this syscall are initialized to binary zeros.  Fifo
+ *           structures for fifos allocated during this syscall are initialized:
+ *             - fifo_hw_ptr points to the DMA memory map for this fifo.  The
+ *               hardware start, end, head, and tail are set to zero by the
+ *               kernel.
+ *             - All other fields in the structure are set to zero by the kernel
+ *               except priority, local, and ts_inj_map are set to reflect what
+ *               was requested in the priorities, locals, and ts_inj_maps
+ *               syscall parameters.
+ *
+ */
+uint32_t Kernel_InjFifoGroupAllocate( uint32_t   group,
+				      uint32_t   num_fifos,
+				      uint32_t * fifo_ids,
+				      uint16_t * priorities,
+				      uint16_t * locals,
+				      uint8_t  * ts_inj_maps,
+				      uint32_t * fg_ptr );
+
+
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in]   grp          Group number whose DMA injection fifos are being
+ *                           freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in]   num_fifos    Number of fifos to be freed from the group
+ *                           (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in]   fifo_ids     Pointer to an array of num_fifos ints where
+ *                           the list of fifos to be freed is provided.
+ *                           Each int is the fifo number (0 to num_fifos-1).
+ * \param[in]   fg_ptr       Pointer to the structure previously filled in when
+ *                           these fifos were allocated.  Upon successful
+ *                           return, this structure is updated to reflect the
+ *                           freed fifos:
+ *                           \li fifos - Structures for freed fifos zero'd.
+ *                                       Freed fifos are disabled.
+ *                           \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval  0  Successful.  Fifos freed and fg_ptr structure updated as described.
+ * \retval  -1 Unsuccessful.  errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupFree()
+ * \note  This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+uint32_t Kernel_InjFifoGroupFree(uint32_t   group,
+				 uint32_t   num_fifos,
+				 uint32_t * fifo_ids,
+				 uint32_t * fg_ptr);
+
+
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ *   end.
+ * - Compute fifo size and free space.
+ * - Initialize wrap count.
+ * - Activate the fifo.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval   0  Successful.
+ * \retval  -1  Unsuccessful.  Error checks include
+ *              - va_start < va_end
+ *              - va_start <= va_head <=
+ *                  (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *              - va_start and va_end are 32-byte aligned
+ *              - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ *                                          DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+uint32_t Kernel_InjFifoInitById(uint32_t * fg_ptr,
+				int        fifo_id,
+				uint32_t * va_start,
+				uint32_t * va_head,
+				uint32_t * va_end);
+
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ *   DMA reception fifos that are to receive the packets from those hardware
+ *   torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ *   local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ *   DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in]  rec_map  Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ * \internal This is an internal syscall
+ * \see DMA_RecFifoSetMap
+ * \note  This function should be called once per job, after DMA_ResetRelease().
+ *        It may be called by any core, but once a core has called it, other
+ *        calls by that same core or any other core will fail.
+ *
+ * \note  During job init, the kernel sets up the DCR clear masks for each
+ *        reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ *        a fifo in group g only clears group g.
+ *
+ */
+int Kernel_RecFifoSetMap(uint32_t* rec_map);
+
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out]  rec_map  A pointer to a Reception Fifo Map structure
+ *                          that will be filled-in upon return.
+ *
+ * \retval  0            Successful
+ * \retval  error_value  An error value defined in the _BGP_RAS_DMA_ErrCodes
+ *                       enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_RecFifoGetMap(uint32_t* rec_map);
+
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs.  If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed).  In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in]  grp      The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in]  target   The core that will receive the interrupt when a
+ *                      fifo in this group reaches its threshold
+ *                      (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ *                      Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler  A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             normal fifo in this group reaches its threshold.
+ *                             This function must be coded to take 2 parameters:
+ *                               void* A pointer to storage specific to this
+ *                                     handler.  This is the normal_handler_parm
+ *                                     specified on this function call.
+ *                               int   The global fifo ID of the fifo that hit
+ *                                     its threshold (0 through
+ *                                     NUM_DMA_REC_FIFOS-1).
+ *                             If normal_handler is NULL, threshold interrupts
+ *                             are not delivered for normal fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler_parm   A pointer to storage that should be passed
+ *                                   to the normal interrupt handling function
+ *                                   (see normal_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  header_handler  A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             header fifo in this group reaches its threshold.
+ *                             This function must be coded to take 2 parameters:
+ *                               void* A pointer to storage specific to this
+ *                                     handler.  This is the header_handler_parm
+ *                                     specified on this function call.
+ *                               int   The global fifo ID of the fifo that hit
+ *                                     its threshold (0 through
+ *                                     NUM_DMA_REC_FIFOS-1).
+ *                             If header_handler is NULL, threshold interrupts
+ *                             are not delivered for header fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  header_handler_parm   A pointer to storage that should be passed
+ *                                   to the header interrupt handling function
+ *                                   (see header_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  interruptGroup  A InterruptGroup_t that identifies the
+ *                             group of interrupts that the fifos in this group
+ *                             will become part of.
+ *                             Ignored on subsequent call with the same group.
+ *
+ * \return  RecFifoGroupStruct  Pointer to a DMA Reception Fifo Group structure
+ *                              that reflects the fifos that are being used in
+ *                              this group.  This same structure is shared by
+ *                              all users of this reception fifo group.
+ *                              NULL is returned if an error occurs.
+ *
+ * \note  The following comments from Phil about the internals of the syscall:
+ *   - error checks
+ *     - 0 <= group_id < 4
+ *     - the start of the fifo group is a valid virtual address (tlb mapped)?
+ *   - disable the rDMA
+ *   - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ *   - loop through the map to determine how many and which fifos in this group
+ *     are used, including headers
+ *   - filling in the addresses of used fifos
+ *     - In particular, any pointer to any fifo in the group that is not used
+ *       will have a null pointer
+ *   - furthermore,
+ *     - write starting values to all used fifos
+ *     - make sure all interrupts are cleared
+ *     - enable rDMA
+ *
+ */
+int Kernel_RecFifoGetFifoGroup(
+			       uint32_t * fifogroup,
+			       int        grp,
+			       int        target,
+			       void     * normal_handler,
+			       void     * normal_handler_parm,
+			       void     * header_handler,
+			       void     * header_handler_parm,
+			       void     * interruptGroup
+			       );
+
+
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in]  fg_ptr    Pointer to fifo group structure.
+ * \param[in]  fifo_id   Id of the fifo to be initialized
+ *                       (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  va_start  Virtual address of the start of the fifo.
+ * \param[in]  va_head   Virtual address of the head of the fifo (typically
+ *                       equal to va_start).
+ * \param[in]  va_end    Virtual address of the end of the fifo.
+ *
+ * \retval   0  Successful.
+ * \retval  -1  Unsuccessful.  Error checks include
+ *              - va_start <  va_end
+ *              - va_start <= va_head < va_end
+ *              - va_start and va_end are 32-byte aligned
+ *              - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES
+ *
+ */
+int Kernel_RecFifoInitById( uint32_t * fg_ptr,
+			    int        fifo_id,
+			    void     * va_start,
+			    void     * va_head,
+			    void     * va_end );
+
+
+
+
+#endif  /*  Add nothing below this line */
diff --git a/arch/powerpc/include/spi/linux_kernel_spi.h b/arch/powerpc/include/spi/linux_kernel_spi.h
new file mode 100644
index 0000000..05d32f8
--- /dev/null
+++ b/arch/powerpc/include/spi/linux_kernel_spi.h
@@ -0,0 +1,113 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _LINUX_KERNEL_SPI_H_  /*  Prevent multiple inclusion */
+#define _LINUX_KERNEL_SPI_H_
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include <common/bgp_personality.h>
+
+#ifndef __LINUX_KERNEL__
+#define __LINUX_KERNEL__
+#endif
+
+#ifndef __BGP_HIDE_STANDARD_TYPES__
+#define __BGP_HIDE_STANDARD_TYPES__
+#endif
+
+
+/*  this comes from src/arch/ppc/platforms/4xx/bluegene.c */
+extern int bluegene_getPersonality(void *buf, int bufsize);
+#define rts_get_personality(p,s)  bluegene_getPersonality(p,s)
+
+
+/*   Lockbox used by DMA_InjFifoRgetFifoFullInit ... */
+#define  LockBox_FetchAndClear(x)
+
+
+/*  asm inlines used by dma spi */
+
+#define _bgp_msync(void) asm volatile ("msync" : : : "memory")
+#define _bgp_mbar(void)  asm volatile ("mbar"  : : : "memory")
+#define _bgp_isync(void) asm volatile ("isync" : : : "memory")
+extern inline void _bgp_msync_nonspeculative( void )
+{
+    do {
+       asm volatile ("   b 1f;"
+                     "   nop;"
+                     "1: msync;"
+                     : : : "memory");
+       }
+       while(0);
+}
+
+#define _bgp_QuadLoad(v,f)  asm volatile( "lfpdx  " #f ",0,%0" :: "r" (v) : "fr" #f )
+#define _bgp_QuadStore(v,f) asm volatile( "stfpdx " #f ",0,%0" :: "r" (v) : "memory" )
+
+#define _bgp_dcache_touch_line(v) do { asm volatile ("dcbt  0,%0" : : "r" (v)); } while(0)
+
+/*  in ppc450_inlines.h */
+/* #define _bgp_msync_nonspeculative(x) */
+/* { */
+/*     do { */
+/*        asm volatile ("   b 1f;" */
+/*                      "   nop;" */
+/*                      "1: msync;" */
+/*                      : : : "memory"); */
+/*        } */
+/*        while(0); */
+/* } */
+
+/*  assert and printf variants for kernel use */
+
+#define assert(x) if ( !(x)) printk( KERN_ALERT "(E) bgpdma assertion at %s:%d\n",__FILE__,__LINE__);
+
+#define SPI_assert(x) assert(x)
+
+#define printf(...) printk(KERN_INFO __VA_ARGS__)
+
+/*  we need a dummy errno for linking */
+static int errno;
+
+/*  general bgp quad struct */
+/*  (better one in bgp_types.h , use that in preference) */
+/* typedef struct { u32 w[4]; } __attribute__ ((aligned(16))) _bgp_QuadWord_t; */
+
+
+/*  virtual base address of the DMA (see bgp_dma_memap.h) */
+#define _BGP_VA_DMA  bgpdma_kaddr
+
+#include <asm/bgp_personality.h>
+#include <common/alignment.h>
+#include <bpcore/bgp_dma_memmap.h>
+#include <bpcore/ic_memmap.h>
+
+#include <spi/DMA_Counter.h>
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_InjFifo.h>
+#include <spi/DMA_RecFifo.h>
+
+
+
+#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8d1a419..14037ca 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,8 @@
 
 CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
+EXTRA_CFLAGS	=
+
 ifeq ($(CONFIG_PPC64),y)
 CFLAGS_prom_init.o	+= -mno-minimal-toc
 endif
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 73cb6a3..230567e 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -230,6 +230,7 @@
  * case when we are running with the cache disabled
  * for debugging.
  */
+static int align_dcbz_count ;
 static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
 {
 	long __user *p;
@@ -246,6 +247,7 @@
 	for (i = 0; i < size / sizeof(long); ++i)
 		if (__put_user_inatomic(0, p+i))
 			return -EFAULT;
+	align_dcbz_count += 1 ;
 	return 1;
 }
 
@@ -930,3 +932,32 @@
 
 	return 1;
 }
+
+static struct ctl_table align_table[] = {
+    {
+            .ctl_name       = CTL_UNNUMBERED,
+            .procname       = "align_dcbz_count" ,
+            .data           = &align_dcbz_count,
+            .maxlen         = sizeof(int),
+            .mode           = 0644,
+            .proc_handler   = &proc_dointvec
+    } ,
+        {}
+};
+
+static struct ctl_path align_path[] = {
+
+            { .procname = "bgp", .ctl_name = 0, },
+            { .procname = "kernel", .ctl_name = 0, },
+            { },
+};
+
+static int __init
+align_sysctl_init(void)
+{
+    struct ctl_table_header * sysctl_table_header = register_sysctl_paths(align_path,align_table);
+    printk(KERN_INFO "align_sysctl_init sysctl_table_header=%p\n",sysctl_table_header) ;
+    return 0;
+}
+
+__initcall(align_sysctl_init);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 923f87a..65c67c4 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1634,6 +1634,19 @@
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
+	{ /* Blue Gene/P */
+		.pvr_mask               = 0xfffffff0,
+		.pvr_value              = 0x52131880,
+		.cpu_name               = "450 Blue Gene/P",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460gt,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index a088c06..7979cfa 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -12,6 +12,39 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  *
+ *    Revised 2011 Chris Ward <tjcw@uk.ibm.com>
+ *  To improve the ability to use floating-point registers in the kernel,
+ *  I have added code to hold the interrupt mask around some instructions
+ *  in 'giveup_fpu()'. The specific case needing this is
+ *  'Zepto' linux for BlueGene/P, where floating-point registers are
+ *  needed in a 'tasklet' as part of the bgp_collective network driver,
+ *  and floating-point registers are wanted as part of an optimisation
+ *  to 'copy_tofrom_user'. The observed problem (without the interrupt mask)
+ *  seemed to be that the code could be interrupted after the "mffs fr0"
+ *  and before the ownership of the fp regs had been disclaimed; then
+ *  'giveup_fpu()' could be called again as a consequence of the interrupt.
+ *  When fr0 then got saved in the task struct, it would overwrite the
+ *  proper fr0 value; and when the task was subspequently dispatched it
+ *  would find an unexpected value in fr0.
+ *
+ *  There is some discussion (Benjamin Herrenschmidt <benh@kernel.crashing.org>)
+ *  as to whether the code here is sufficient to achieve the intent, and
+ *  as to how future-proof it is; current state of discussion is
+ *
+ * > > Reentering giveup_fpu() is only a problem if you can get a different
+ * > > answer the second time from the first time; and the only way you can
+ * > > get a different answer is if you reenter it while fr0 has the contents
+ * > > of fpscr in it, instead of the application's fr0.
+ * >
+ * > That's very fishy ... in any case, just don't do it. FPU in the kernel
+ * > is already close to the limit of what's acceptable, doing it in
+ * > interrupt context is way beyond that limit, at least if using the
+ * > standard FPU primitives.
+ * >
+ * > Do as I said earlier, that is save/restore what you use, if you really
+ * > want to use it there.
+ * >
+ *
  */
 
 #include <asm/reg.h>
@@ -24,12 +57,15 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 
+/* #define DISABLE_INTERRUPTS_DURING_LOAD_UP_FPU */
+#define DISABLE_INTERRUPTS_DURING_GIVEUP_FPU
+
 #ifdef CONFIG_VSX
 #define REST_32FPVSRS(n,c,base)						\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	REST_32FPRS(n,base);						\
+	REST_32FPRS(n,c,base);						\
 	b	3f;							\
 2:	REST_32VSRS(n,c,base);						\
 3:
@@ -38,13 +74,13 @@
 BEGIN_FTR_SECTION							\
 	b	2f;							\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	SAVE_32FPRS(n,base);						\
+	SAVE_32FPRS(n,c,base);						\
 	b	3f;							\
 2:	SAVE_32VSRS(n,c,base);						\
 3:
 #else
-#define REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
-#define SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
+#define REST_32FPVSRS(n,b,base)	REST_32FPRS(n,b,base)
+#define SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n,b,base)
 #endif
 
 /*
@@ -55,16 +91,19 @@
  * enable the FPU for the current task and return to the task.
  */
 _GLOBAL(load_up_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
+	mfmsr	r6
+	ori	r6,r6,MSR_FP
 #ifdef CONFIG_VSX
 BEGIN_FTR_SECTION
-	oris	r5,r5,MSR_VSX@h
+	oris	r6,r6,MSR_VSX@h
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	SYNC
-	MTMSRD(r5)			/* enable use of fpu now */
+	MTMSRD(r6)			/* enable use of fpu now */
 	isync
+#if defined(DISABLE_INTERRUPTS_DURING_LOAD_UP_FPU)
+	wrteei 0
+#endif
 /*
  * For SMP, we don't do lazy FPU switching because it just gets too
  * horrendously complex, especially when a task switches from one CPU
@@ -111,6 +150,9 @@
 	fromreal(r4)
 	PPC_STL	r4,ADDROFF(last_task_used_math)(r3)
 #endif /* CONFIG_SMP */
+#if defined(DISABLE_INTERRUPTS_DURING_LOAD_UP_FPU)
+	wrtee r6			/* restore the interrupt mask */
+#endif
 	/* restore registers and return */
 	/* we haven't used ctr or xer or lr */
 	blr
@@ -122,16 +164,16 @@
  * Enables the FPU for use in the kernel on return.
  */
 _GLOBAL(giveup_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
+	mfmsr	r6
+	ori	r6,r6,MSR_FP
 #ifdef CONFIG_VSX
 BEGIN_FTR_SECTION
-	oris	r5,r5,MSR_VSX@h
+	oris	r6,r6,MSR_VSX@h
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	SYNC_601
 	ISYNC_601
-	MTMSRD(r5)			/* enable use of fpu now */
+	MTMSRD(r6)			/* enable use of fpu now */
 	SYNC_601
 	isync
 	PPC_LCMPI	0,r3,0
@@ -140,6 +182,9 @@
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
 	SAVE_32FPVSRS(0, r4 ,r3)
+#if defined(DISABLE_INTERRUPTS_DURING_GIVEUP_FPU)
+	wrteei 0	/* Disable interupts while fr0 is inconsistent with what the application thinks should be there */
+#endif
 	mffs	fr0
 	stfd	fr0,THREAD_FPSCR(r3)
 	beq	1f
@@ -153,6 +198,9 @@
 	LOAD_REG_ADDRBASE(r4,last_task_used_math)
 	PPC_STL	r5,ADDROFF(last_task_used_math)(r4)
 #endif /* CONFIG_SMP */
+#if defined(DISABLE_INTERRUPTS_DURING_GIVEUP_FPU)
+	wrtee r6	/* Restore the interrupt mask */
+#endif
 	blr
 
 /*
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b56fecc..f057f33 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -38,6 +38,9 @@
 #include <asm/asm-offsets.h>
 #include "head_booke.h"
 
+#ifdef CONFIG_ZEPTO
+#include <asm/zepto_tlb.h>
+#endif
 
 /* As with the other PowerPC ports, it is expected that when code
  * execution begins here, the following registers contain valid, yet
@@ -143,7 +146,11 @@
 	sync
 
 	/* Initialize MMUCR */
+#ifdef CONFIG_L1_WRITETHROUGH
+        lis     r5,PPC44x_MMUCR_U2SWOAE@h
+#else
 	li	r5,0
+#endif
 	mtspr	SPRN_MMUCR,r5
 	sync
 
@@ -158,7 +165,12 @@
 	/* attrib fields */
 	/* Added guarded bit to protect against speculative loads/stores */
 	li	r5,0
+#ifdef CONFIG_L1_WRITETHROUGH
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_U2)
+        oris    r5,r5,PPC44x_TLB_WL1@h
+#else
 	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+#endif
 
         li      r0,63                    /* TLB slot 63 */
 
@@ -228,6 +240,14 @@
 	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
 	mtspr	SPRN_IVPR,r4
 
+#ifdef CONFIG_SMP
+	/* are we an additional CPU? */
+	li	r0, 0
+	mfspr	r4, SPRN_PIR
+	cmpw	r4, r0
+	bgt	secondary_entry
+#endif
+
 	/*
 	 * This is where the main kernel code starts.
 	 */
@@ -278,6 +298,75 @@
 	mtspr	SPRN_SRR1,r3
 	rfi			/* change context and jump to start_kernel */
 
+
+#ifdef CONFIG_SMP
+/* Extra cpus will come here. */
+//#define _GLOBAL_DEVINIT(n)      \
+//	    .section	.text.devinit; \
+//	    .text;          \
+//       .stabs __stringify(n:F-1),N_FUN,0,0,n;\
+//        .globl n;       \
+//n:
+//
+//_GLOBAL_DEVINIT(secondary_entry)
+secondary_entry:
+        /* Enable U2SWOA.  U2 will be enabled in TLBs. */
+        lis     r7,PPC44x_MMUCR_U2SWOAE@h
+	mtspr	SPRN_MMUCR,r7
+	li	r7,0
+	mtspr	SPRN_PID,r7
+	sync
+	lis	r8,KERNELBASE@h
+
+        /* The tlb_44x_hwater global var (setup by cpu#0) reveals how many
+         * 256M TLBs we need to map.
+         */
+        lis     r9, tlb_44x_hwater@ha
+        lwz     r9, tlb_44x_hwater@l(r9)
+
+	li	r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M|PPC44x_TLB_U2)
+        oris    r5, r5, PPC44x_TLB_WL1@h
+
+	/* tlb_44x_hwater is the biggest TLB slot number for regular TLBs.
+	   TLB 63 covers kernel base mapping(256MB) and TLB 62 covers CNS.
+	   With 768MB lowmem, it is set to 59. 
+	*/
+2:
+        addi    r9, r9, 1
+        cmpwi   r9,62                  /* Stop at entry 62 which is the firmware */
+        beq     3f
+        addis   r7,r7,0x1000           /* add 256M */
+        addis   r8,r8,0x1000
+	ori	r6,r8,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+	tlbwe	r6,r9,PPC44x_TLB_PAGEID	/* Load the pageid fields */
+	tlbwe	r7,r9,PPC44x_TLB_XLAT   /* Load the translation fields */
+	tlbwe	r5,r9,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
+        b       2b
+
+3:      isync
+
+        /* Setup context from global var secondary_ti */
+        lis     r1, secondary_ti@ha
+        lwz     r1, secondary_ti@l(r1)
+        lwz     r2, TI_TASK(r1)         /*  r2 = task_info */
+
+	addi	r3,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r3
+
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+	/* Let's move on */
+	lis	r4,start_secondary@h
+	ori	r4,r4,start_secondary@l
+	lis	r3,MSR_KERNEL@h
+	ori	r3,r3,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi			/* change context and jump to start_secondary */
+#endif
+
 /*
  * Interrupt vector entry code
  *
@@ -297,7 +386,11 @@
 
 interrupt_base:
 	/* Critical Input Interrupt */
+#ifdef CONFIG_BGP
+	CRITICAL_EXCEPTION(0x0100, CriticalInput, critical_exception)
+#else
 	CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+#endif
 
 	/* Machine Check Interrupt */
 	CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
@@ -430,7 +523,11 @@
 tlb_44x_patch_hwater_D:
 	cmpwi	0,r13,1			/* reserve entries */
 	ble	5f
+#ifdef CONFIG_ZEPTO
+	li      r13, REGULAR_TLB_START_SLOT  /* slot after device and bigmem */
+#else
 	li	r13,0
+#endif
 5:
 	/* Store the next available TLB index */
 	stw	r13,tlb_44x_index@l(r10)
@@ -526,7 +623,11 @@
 tlb_44x_patch_hwater_I:
 	cmpwi	0,r13,1			/* reserve entries */
 	ble	5f
+#ifdef CONFIG_ZEPTO
+	li      r13, REGULAR_TLB_START_SLOT   /* slot after device and bigmme */
+#else
 	li	r13,0
+#endif
 5:
 	/* Store the next available TLB index */
 	stw	r13,tlb_44x_index@l(r10)
@@ -588,7 +689,15 @@
 	andi.	r10,r12,_PAGE_USER		/* User page ? */
 	beq	1f				/* nope, leave U bits empty */
 	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
-1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+1:
+#ifdef CONFIG_L1_WRITETHROUGH
+        andi.   r10, r11, PPC44x_TLB_I
+        bne     2f
+        oris    r11,r11,PPC44x_TLB_WL1@h        /* Add coherency for non-inhibited */
+        ori     r11,r11,PPC44x_TLB_SWOA|PPC44x_TLB_M
+2:
+#endif
+	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
 
 	/* Done...restore registers and get out of here.
 	*/
@@ -670,3 +779,5 @@
  */
 abatron_pteptrs:
 	.space	8
+
+
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 15f28e0..d775827 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -491,6 +491,7 @@
  *
  * void clear_pages(void *page, int order) ;
  */
+#if !defined(CONFIG_WRAP_COPY_TOFROM_USER) || !defined(CONFIG_BGP_TORUS)
 _GLOBAL(clear_pages)
 	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
@@ -502,12 +503,25 @@
 	stw	r4, 8(r3)
 	stw	r4, 12(r3)
 #else
+#ifdef CONFIG_L1_WRITETHROUGH
+        /* assuming 32 byte cacheline */
+	li	r4, 0
+1:	stw	r4, 0(r3)
+	stw	r4, 4(r3)
+	stw	r4, 8(r3)
+	stw	r4, 12(r3)
+	stw	r4, 16(r3)
+	stw	r4, 20(r3)
+	stw	r4, 24(r3)
+	stw	r4, 28(r3)
+#else
 1:	dcbz	0,r3
 #endif
+#endif
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	1b
 	blr
-
+#endif
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
@@ -524,6 +538,7 @@
 	stw	r8,12(r3);	\
 	stwu	r9,16(r3)
 
+#if !defined(CONFIG_WRAP_COPY_TOFROM_USER) || !defined(CONFIG_BGP_TORUS)
 _GLOBAL(copy_page)
 	addi	r3,r3,-4
 	addi	r4,r4,-4
@@ -556,7 +571,9 @@
 	mtctr	r0
 1:
 	dcbt	r11,r4
+#ifndef CONFIG_L1_WRITETHROUGH
 	dcbz	r5,r3
+#endif
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
@@ -578,6 +595,7 @@
 	li	r11,4
 	b	2b
 #endif	/* CONFIG_8xx */
+#endif /* CONFIG_WRAP_COPY_TOFROM_USER */
 
 /*
  * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index fb7049c..8ba2cc7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -50,6 +50,11 @@
 #endif
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <mm/mmu_decl.h>
+
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
 
 extern unsigned long _get_SP(void);
 
@@ -64,8 +69,14 @@
  * Make sure the floating-point register state in the
  * the thread_struct is up to date for task tsk.
  */
+/* Does not need FP_SAVE_UNDER_MASK in this file, it is done more finely in fpu.S */
+/* #define FP_SAVE_UNDER_MASK */
 void flush_fp_to_thread(struct task_struct *tsk)
 {
+#if defined(FP_SAVE_UNDER_MASK)
+    unsigned long flags ;
+    local_irq_save(flags) ;
+#endif
 	if (tsk->thread.regs) {
 		/*
 		 * We need to disable preemption here because if we didn't,
@@ -91,10 +102,17 @@
 		}
 		preempt_enable();
 	}
+#if defined(FP_SAVE_UNDER_MASK)
+    local_irq_restore(flags) ;
+#endif
 }
 
 void enable_kernel_fp(void)
 {
+#if defined(FP_SAVE_UNDER_MASK)
+    unsigned long flags ;
+    local_irq_save(flags) ;
+#endif
 	WARN_ON(preemptible());
 
 #ifdef CONFIG_SMP
@@ -105,6 +123,9 @@
 #else
 	giveup_fpu(last_task_used_math);
 #endif /* CONFIG_SMP */
+#if defined(FP_SAVE_UNDER_MASK)
+    local_irq_restore(flags) ;
+#endif
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
@@ -303,6 +324,9 @@
 	unsigned long flags;
 	struct task_struct *last;
 
+#if defined(FP_SAVE_UNDER_MASK)
+    local_irq_save(flags) ;
+#endif
 #ifdef CONFIG_SMP
 	/* avoid complexity of lazy save/restore of fpu
 	 * by just saving it every time we switch out if
@@ -395,7 +419,9 @@
 	}
 #endif
 
-	local_irq_save(flags);
+#if !defined(FP_SAVE_UNDER_MASK)
+    local_irq_save(flags) ;
+#endif
 
 	account_system_vtime(current);
 	account_process_vtime(current);
@@ -407,6 +433,10 @@
 	 * of sync. Hard disable here.
 	 */
 	hard_irq_disable();
+	
+	if( IS_ZEPTO_TASK(current) )  {
+	    _tlbil_all();
+	}
 	last = _switch(old_thread, new_thread);
 
 	local_irq_restore(flags);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 9e1ca74..87fd7ab 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -44,6 +44,13 @@
 
 #define DBG(fmt...)
 
+#ifdef CONFIG_ZEPTO
+#include <linux/utsname.h>
+int zepto_debug_level = 1;
+
+#endif
+
+
 extern void bootx_init(unsigned long r4, unsigned long phys);
 
 int boot_cpuid;
@@ -69,6 +76,15 @@
 int icache_bsize;
 int ucache_bsize;
 
+#ifdef CONFIG_ZEPTO
+/* XXX: this might not be an approrpite location to add this code. */
+int zepto_kparam_noPRE;
+int zepto_kparam_noU3;
+int zepto_kparam_globaltick;
+int zepto_kparam_tickdesync;
+#endif
+
+
 /*
  * We're called here very early in the boot.  We determine the machine
  * type and call the appropriate low-level setup functions.
@@ -285,6 +301,36 @@
 	if (ppc_md.init_early)
 		ppc_md.init_early();
 
+#ifdef CONFIG_ZEPTO
+	{
+	    char* optstr = "zepto_debug=";
+	    if(strstr(cmd_line, optstr) ) {
+		char* p;
+		p = strstr( cmd_line, optstr );
+		if( p && (strlen(p)-strlen(optstr))>0 ) { 
+		    p=p+strlen(optstr);
+		    zepto_debug_level=simple_strtoul(p,&p,0);
+		}
+	    }
+	}
+
+	if(strstr(cmd_line,"noPRE")) zepto_kparam_noPRE = 1;	
+	else  		zepto_kparam_noPRE = 0;
+
+	if(strstr(cmd_line,"noU3")) zepto_kparam_noU3 = 1;	
+	else  		zepto_kparam_noU3 = 0;
+
+#ifdef  CONFIG_ZEPTO_COMPUTENODE
+	if(strstr(cmd_line,"globaltick")) zepto_kparam_globaltick = 1;
+	else  		zepto_kparam_globaltick = 0;
+
+	if(strstr(cmd_line,"tickdesync")) zepto_kparam_tickdesync = 1;
+	else  		zepto_kparam_tickdesync = 0;
+#endif
+
+#endif
+
+
 	find_legacy_serial_ports();
 
 	smp_setup_cpu_maps();
@@ -330,8 +376,33 @@
 
 	if (ppc_md.setup_arch)
 		ppc_md.setup_arch();
+
+#ifdef CONFIG_ZEPTO
+
+	/* CNS is initialized in setup_arch(). we can start using print functions from here*/ 
+	{
+	    extern unsigned long __bigmem_size; /* defined in  arch/powerpc/mm/init_32.c */
+
+	    printk("Z: Zepto patched BGP %s %s %s\n", 
+		   utsname()->sysname,
+		   utsname()->release,
+		   utsname()->version);
+	    printk("Z: zepto_debug_level=%d\n",zepto_debug_level);
+	    printk("Z: __bigmem_size=%ld\n", __bigmem_size);
+	    printk("Z: options: %s%s%s%s\n",
+		   zepto_kparam_noPRE?"noPRE ":"", 
+		   zepto_kparam_noU3?"noU3 ":"", 
+		   zepto_kparam_globaltick?"globaltick ":"",
+		   zepto_kparam_tickdesync?"tickdesync ":"");
+	    lmb_dump_all();
+	    printk("Z: lmb_phys_mem_size()=>%08x  lmb_end_of_DRAM()=>%08x\n",
+		   (unsigned)lmb_phys_mem_size(), (unsigned)lmb_end_of_DRAM() );
+	}
+#endif
+
 	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
 
+
 	paging_init();
 
 	/* Initialize the MMU context management stuff */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 65484b2..ef8b595 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -489,6 +489,14 @@
 	current->active_mm = &init_mm;
 
 	smp_store_cpu_info(cpu);
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+	/* Clear any pending timer interrupts */
+	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+	/* Enable decrementer interrupt */
+	mtspr(SPRN_TCR, TCR_DIE);
+#endif
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
 	cpu_callin_map[cpu] = 1;
@@ -549,6 +557,13 @@
 {
 	cpumask_t old_mask;
 
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+	extern void erase_CNS_orig(void);  /* defined in platforms/44x/bgp_cns.c */
+
+	printk("Z: smp_cpus_done  max_cpus=%d\n", max_cpus);
+	erase_CNS_orig();
+#endif
+
 	/* We want the setup_cpu() here to be called from CPU 0, but our
 	 * init thread may have been "borrowed" by another CPU in the meantime
 	 * se we pin us down to CPU 0 for a short while
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 93219c3..139cb47 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -45,3 +45,30 @@
 
 _GLOBAL(sys_call_table)
 #include <asm/systbl.h>
+
+
+#ifdef CONFIG_BLUEGENE
+/* See also arch/powerpc/include/asm/unistd.h */
+		
+/* Generate ni_syscall until #1047.
+ * Each entry on ppc32 is 4 bytes so we do the math to pad the table.
+ */
+        .rept 1048 - (. - sys_call_table)/4
+SYSCALL(ni_syscall)
+        .endr
+/* Blue Gene hack syscalls start at 1048 */
+
+/* Define a zepto syscalls  */
+#ifdef CONFIG_ZEPTO
+
+SYSCALL(zepto_generic)  /* 1048 */
+SYSCALL(zepto_bigmem)   /* 1049 */
+SYSCALL(zepto_lockbox)  /* 1050 */
+#endif /* CONFIG_ZEPTO*/
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+SYSCALL(zepto_dma)      /* 1051 */
+#endif
+
+#endif /* CONFIG_BLUEGENE */		
+
+
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
index 19415e7..cfef7d0 100644
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ b/arch/powerpc/kernel/systbl_chk.sh
@@ -15,11 +15,13 @@
 	/^[ \t]*$/ { next; }
 	/^START_TABLE/ { num = 0; next; }
 	/^END_TABLE/ {
-		if (num != $2) {
-			printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n",
-				$2, num - 1;
-			exit(1);
-		}
+# zepto syscall starts at 1048, so there is a gap between the zepto syscall and the last syscall
+# the following check fails.
+#		if (num != $2) {
+#			printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n",
+#				$2, num - 1;
+#			exit(1);
+#		}
 		num = -1;	# Ignore the rest of the file
 	}
 	{
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5457e95..e978865 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -71,6 +71,11 @@
 EXPORT_SYMBOL(__debugger_fault_handler);
 #endif
 
+#ifdef CONFIG_BGP
+#include <asm/bgcns.h>
+extern BGCNS_Descriptor bgcnsd;
+#endif
+
 /*
  * Trap & Exception support
  */
@@ -546,6 +551,28 @@
 	_exception(SIGTRAP, regs, 0, 0);
 }
 
+#ifdef CONFIG_BGP
+void critical_exception(struct pt_regs *regs)
+{
+	/* On Blue Gene we do not use criticals, but firmware does.
+	 * So call firmware to see what happened.  If firmware doesn't
+	 * handle it, then we panic.
+	 */
+	unsigned group, irq;
+	int ret;
+
+	/* Note: CNS by design stomps on a TLB.  We really can't do that here
+	 * because we might be in the middle of a TLB miss.
+	 * In the future change bgcns() to a bgcns_map()/bgcns_unmap() combination
+	 * and fully restore the TLB (even if it is invalid). is this still true?
+	 */
+	ret = bgcnsd.services->getInterrupt(BGCNS_Critical, &group, &irq);
+	if (ret != -1) {
+		panic("Unhandled critical exception, rc=%d group=0x%x irq=0x%x", ret, group, irq);
+	}
+}
+#endif
+
 void instruction_breakpoint_exception(struct pt_regs *regs)
 {
 	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7d6c9bb..c51bac2 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -17,6 +17,10 @@
 #include <asm/processor.h>
 #include <asm/udbg.h>
 
+#ifdef CONFIG_PPC_EARLY_DEBUG_BGP
+#include <asm/bluegene.h>
+#endif
+
 void (*udbg_putc)(char c);
 int (*udbg_getc)(void);
 int (*udbg_getc_poll)(void);
@@ -59,9 +63,18 @@
 	udbg_init_40x_realmode();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_CPM)
 	udbg_init_cpm();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_BGP)
+        /* XXX: can't call bgp_init_cns() from here. 
+           need to invetigate. */
+        if(0) {
+	    extern void bgp_udbg_putc(char c);
+	    bgp_init_cns();
+	    udbg_putc = bgp_udbg_putc;
+	}
 #endif
 
-#ifdef CONFIG_PPC_EARLY_DEBUG
+// #ifdef CONFIG_PPC_EARLY_DEBUG
+#ifdef CONFIG_BLUEGENE_NOISY_BOOT
 	console_loglevel = 10;
 #endif
 }
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 161b9b9..fadbdf5 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -263,6 +263,12 @@
 		*(.data.init_task)
 	}
 
+#ifdef CONFIG_ZEPTO
+	. = ALIGN(262144);
+	.cns.256KB_aligned : AT(ADDR(.cns.256KB_aligned) - LOAD_OFFSET) {
+		*(.cns.256KB_aligned)
+	}
+#endif
 	. = ALIGN(PAGE_SIZE);
 	.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
 		*(.data.page_aligned)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8db3527..63a1192 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -29,3 +29,5 @@
 obj-y			+= code-patching.o
 obj-y			+= feature-fixups.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+obj-$(CONFIG_WRAP_COPY_TOFROM_USER) += copy_tofrom_user.o
+
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index c657de5..a774f03 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -98,7 +98,7 @@
 	bdnz	4b
 3:	mtctr	r9
 	li	r7,4
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH)
 10:	dcbz	r7,r6
 #else
 10:	stw	r4, 4(r6)
@@ -200,7 +200,7 @@
 	mtctr	r0
 	beq	63f
 53:
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH)
 	dcbz	r11,r6
 #endif
 	COPY_16_BYTES
@@ -318,7 +318,11 @@
 	mtctr	r7
 	b	1b
 
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER)
+_GLOBAL(__real__copy_tofrom_user)
+#else
 _GLOBAL(__copy_tofrom_user)
+#endif
 	addi	r4,r4,-4
 	addi	r6,r3,-4
 	neg	r0,r3
@@ -391,7 +395,11 @@
 	mtctr	r8
 
 53:	dcbt	r3,r4
+#ifdef CONFIG_L1_WRITETHROUGH
+54:
+#else
 54:	dcbz	r11,r6
+#endif
 	.section __ex_table,"a"
 	.align	2
 	.long	54b,105f
diff --git a/arch/powerpc/lib/copy_tofrom_user.c b/arch/powerpc/lib/copy_tofrom_user.c
new file mode 100644
index 0000000..525da59
--- /dev/null
+++ b/arch/powerpc/lib/copy_tofrom_user.c
@@ -0,0 +1,19 @@
+#include <linux/kernel.h>
+
+extern unsigned long __real__copy_tofrom_user(void  *to,
+		const void __user *from, unsigned long size) ;
+
+#if defined(CONFIG_BGP_TORUS)
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void  *to,
+		const void __user *from, unsigned long size) ;
+#endif
+
+unsigned long __copy_tofrom_user(void  *to,
+		const void __user *from, unsigned long size)
+{
+#if defined(CONFIG_BGP_TORUS)
+	int rc=bgp_fpu_instrument_copy_tofrom_user(to, from, size) ;
+	if( 0 == rc) return 0 ;
+#endif
+	return __real__copy_tofrom_user(to, from, size) ;
+}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 98052ac..758771c 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -31,10 +31,18 @@
 
 #include "mmu_decl.h"
 
+#ifdef CONFIG_ZEPTO
+#include <asm/zepto_tlb.h>
+#endif
+
 /* Used by the 44x TLB replacement exception handler.
  * Just needed it declared someplace.
  */
+#ifdef CONFIG_ZEPTO
+unsigned int tlb_44x_index = REGULAR_TLB_START_SLOT;
+#else
 unsigned int tlb_44x_index; /* = 0 */
+#endif
 unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
 int icache_44x_need_flush;
 
@@ -64,6 +72,12 @@
 static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
 {
 	unsigned int entry = tlb_44x_hwater--;
+	unsigned attrs = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX;
+#ifdef CONFIG_L1_WRITETHROUGH
+	attrs |= PPC44x_TLB_WL1 | PPC44x_TLB_SWOA | PPC44x_TLB_M;
+#else
+	attrs |= PPC44x_TLB_G;
+#endif
 
 	ppc44x_update_tlb_hwater();
 
@@ -72,7 +86,7 @@
 		"tlbwe	%1,%3,%5\n"
 		"tlbwe	%0,%3,%6\n"
 	:
-	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+	: "r" (attrs),
 	  "r" (phys),
 	  "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
 	  "r" (entry),
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 953cc4a..2a4ddc9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,6 +6,8 @@
 EXTRA_CFLAGS	+= -mno-minimal-toc
 endif
 
+ZSPI_INC=-Iarch/powerpc/include/zspi
+CFLAGS_zepto_bigmem.o=$(ZSPI_INC)
 obj-y				:= fault.o mem.o pgtable.o \
 				   init_$(CONFIG_WORD_SIZE).o \
 				   pgtable_$(CONFIG_WORD_SIZE).o
@@ -15,6 +17,8 @@
 obj-$(CONFIG_PPC64)		+= hash_utils_64.o \
 				   slb_low.o slb.o stab.o \
 				   gup.o mmap.o $(hash-y)
+
+obj-$(CONFIG_BGP) += mmap.o	   
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   tlb_hash$(CONFIG_WORD_SIZE).o \
@@ -26,3 +30,4 @@
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage-prot.o
+obj-$(CONFIG_ZEPTO)    	        += zepto_bigmem.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b86..0ed6723 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -40,6 +40,10 @@
 #include <asm/tlbflush.h>
 #include <asm/siginfo.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
+
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs)
@@ -101,6 +105,73 @@
 	return 0;
 }
 
+#ifdef CONFIG_BGP
+/* The icbi instruction does not broadcast to all cpus in the ppc450 processor used
+ * by Blue Gene/P.  It is unlikely this problem will be exhibited in other processors
+ * so this remains ifdef'ed for BGP specifically.
+ *
+ * We deal with this by marking executable pages either writable, or executable, but
+ * never both.  The permissions will fault back and forth if the thread is actively
+ * writing to executable sections.  Each time we fault to become executable we flush
+ * the dcache into icache on all cpus.
+ */
+struct bgp_fixup_parm {
+	struct page		*page;
+	unsigned long		address;
+	struct vm_area_struct	*vma;
+};
+static void bgp_fixup_cache_tlb(void *parm)
+{
+	struct bgp_fixup_parm	*p = parm;
+
+	if (!PageHighMem(p->page))
+		flush_dcache_icache_page(p->page);
+	local_flush_tlb_page(p->vma, p->address);
+}
+
+static void bgp_fixup_access_perms(struct vm_area_struct *vma,
+				  unsigned long address,
+				  int is_write, int is_exec)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *ptep = NULL;
+	pmd_t *pmdp;
+
+	if (get_pteptr(mm, address, &ptep, &pmdp)) {
+		spinlock_t *ptl = pte_lockptr(mm, pmdp);
+		pte_t old;
+
+		spin_lock(ptl);
+		old = *ptep;
+		if (pte_present(old)) {
+			struct page *page = pte_page(old);
+
+			if (is_exec) {
+				struct bgp_fixup_parm param = {
+					.page		= page,
+					.address	= address,
+					.vma		= vma,
+				};
+				pte_update(ptep, _PAGE_HWWRITE, 0);
+				on_each_cpu(bgp_fixup_cache_tlb, &param, 1);
+				pte_update(ptep, 0, _PAGE_HWEXEC);
+				pte_unmap_unlock(ptep, ptl);
+				return;
+			}
+			if (is_write &&
+			    (pte_val(old) & _PAGE_RW) &&
+			    (pte_val(old) & _PAGE_DIRTY) &&
+			    !(pte_val(old) & _PAGE_HWWRITE)) {
+				pte_update(ptep, _PAGE_HWEXEC, _PAGE_HWWRITE);
+			}
+		}
+		if (!pte_same(old, *ptep))
+			flush_tlb_page(vma, address);
+		pte_unmap_unlock(ptep, ptl);
+	}
+}
+#endif
+
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -170,6 +241,19 @@
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
+#ifdef CONFIG_ZEPTO_MEMORY
+       if( enable_bigmem &&  IS_ZEPTO_TASK(current) ) {
+	   if( in_bigmem(address) ) {
+	       // printk("Attempted to access bigmem region addr=%08lx pid=%d\n",   address,current->pid);
+	       if( install_bigmem_tlb()==-1 ) {
+		   printk(KERN_ERR "bigmem is not availble\n");
+		   return SIGSEGV;
+	       }
+	       return 0;
+	   }
+	}
+#endif
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -266,6 +350,7 @@
 		     !(vma->vm_flags & (VM_READ | VM_WRITE))))
 			goto bad_area;
 #else
+#ifndef CONFIG_BGP
 		pte_t *ptep;
 		pmd_t *pmdp;
 
@@ -292,6 +377,7 @@
 			pte_unmap_unlock(ptep, ptl);
 		}
 #endif
+#endif
 	/* a write */
 	} else if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
@@ -330,6 +416,12 @@
 #endif
 	} else
 		current->min_flt++;
+
+#ifdef CONFIG_BGP
+	/* Fixup _PAGE_HWEXEC and _PAGE_HWWRITE if necessary */
+	bgp_fixup_access_perms(vma, address, is_write, is_exec);
+#endif
+
 	up_read(&mm->mmap_sem);
 	return 0;
 
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 666a5e8..694c8a1 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -46,6 +46,16 @@
 
 #include "mmu_decl.h"
 
+#ifdef CONFIG_ZEPTO
+#include <linux/zepto_debug.h>
+#endif
+
+#ifdef CONFIG_ZEPTO_MEMORY
+/* Use bigmemsize command line parameter to change it  */
+unsigned long __bigmem_size=1024*1024*1024;
+int bgp4GB; /* =1 if BGP has 4GB of memory, otherwise we assume BGP memory size is 2GB */
+#endif
+
 #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
 /* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
 #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET))
@@ -71,6 +81,7 @@
 EXPORT_SYMBOL(agp_special_page);
 #endif
 
+
 void MMU_init(void);
 
 /* XXX should be in current.h  -- paulus */
@@ -106,6 +117,21 @@
 	if (strstr(cmd_line, "noltlbs")) {
 		__map_without_ltlbs = 1;
 	}
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	{
+	    char* p;
+	    char* s;
+
+	    if( (s=strstr(cmd_line, "bigmemsize=")) ) {
+		p = s+strlen("bigmemsize=");
+		/* align __bigmem_size to 16 MB. zepto_bigmem.c uses
+		   TLBs from 1G to 16M of size */
+		__bigmem_size = _ALIGN(memparse(p, &p), 0x01000000);
+	    }
+	}
+#endif
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	__map_without_bats = 1;
 	__map_without_ltlbs = 1;
@@ -138,6 +164,33 @@
 		printk(KERN_WARNING "Only using first contiguous memory region");
 	}
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	{ 
+	    u64 oldDRAMsize = lmb_end_of_DRAM(); /* realDRAMsize - cns_size. check  boot/bgp.c */
+	    u64 newDRAMsize;
+	    
+	    if( oldDRAMsize > 0x80000000 ) bgp4GB=1;
+	    else bgp4GB=0;
+
+	    /* sanity check, make sure we leave at least  256 MB for the kernel 
+	       ZXXX: fix this when we support 1MB, 16MB TLBs  */
+	    
+	    if (__bigmem_size < 0x10000000) __bigmem_size = 0x10000000;  
+
+	    if( bgp4GB && (__bigmem_size > 0xd0000000) ) 
+		__bigmem_size = 0xd0000000;  /* 4 GB BG/P */
+	    else if (__bigmem_size > 0x70000000)
+		__bigmem_size = 0x70000000;  /* 2 GB BG/P*/
+
+	    if( bgp4GB)   newDRAMsize = (u64)0x100000000ULL - __bigmem_size;
+	    else          newDRAMsize = (u64) 0x80000000ULL - __bigmem_size;
+
+
+	    lmb_enforce_memory_limit(newDRAMsize);
+	    lmb_analyze();
+	}
+#endif
+
 	total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr;
 	lowmem_end_addr = memstart_addr + total_lowmem;
 
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 86010fc..2db8129 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -48,11 +48,13 @@
 
 static inline int mmap_is_legacy(void)
 {
+#if defined(CONFIG_64BIT)
 	/*
 	 * Force standard allocation for 64 bit programs.
 	 */
 	if (!test_thread_flag(TIF_32BIT))
 		return 1;
+#endif
 
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 52a0cfc..ba043c4c 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -46,7 +46,7 @@
 static unsigned long *context_map;
 static unsigned long *stale_map[NR_CPUS];
 static struct mm_struct **context_mm;
-static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(context_lock);
 
 #define CTX_MAP_SIZE	\
 	(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
@@ -73,7 +73,6 @@
 	struct mm_struct *mm;
 	unsigned int cpu, max;
 
- again:
 	max = last_context - first_context;
 
 	/* Attempt to free next_context first and then loop until we manage */
@@ -108,7 +107,9 @@
 	spin_unlock(&context_lock);
 	cpu_relax();
 	spin_lock(&context_lock);
-	goto again;
+
+       /* This will cause the caller to try again */
+       return MMU_NO_CONTEXT;
 }
 #endif  /* CONFIG_SMP */
 
@@ -127,12 +128,12 @@
 
 	pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
 
-	/* Mark this mm has having no context anymore */
-	mm->context.id = MMU_NO_CONTEXT;
-
 	/* Flush the TLB for that context */
 	local_flush_tlb_mm(mm);
 
+	/* Mark this mm has having no context anymore */
+	mm->context.id = MMU_NO_CONTEXT;
+
 	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
 	__clear_bit(id, stale_map[cpu]);
 
@@ -194,6 +195,8 @@
 		WARN_ON(prev->context.active < 1);
 		prev->context.active--;
 	}
+
+ again:
 #endif /* CONFIG_SMP */
 
 	/* If we already have a valid assigned context, skip all that */
@@ -212,6 +215,8 @@
 #ifdef CONFIG_SMP
 		if (num_online_cpus() > 1) {
 			id = steal_context_smp(id);
+                       if (id == MMU_NO_CONTEXT)
+                               goto again;
 			goto stolen;
 		}
 #endif /* CONFIG_SMP */
@@ -272,6 +277,7 @@
  */
 void destroy_context(struct mm_struct *mm)
 {
+        unsigned long flags;
 	unsigned int id;
 
 	if (mm->context.id == MMU_NO_CONTEXT)
@@ -279,18 +285,18 @@
 
 	WARN_ON(mm->context.active != 0);
 
-	spin_lock(&context_lock);
+        spin_lock_irqsave(&context_lock, flags);
 	id = mm->context.id;
 	if (id != MMU_NO_CONTEXT) {
 		__clear_bit(id, context_map);
 		mm->context.id = MMU_NO_CONTEXT;
 #ifdef DEBUG_MAP_CONSISTENCY
 		mm->context.active = 0;
-		context_mm[id] = NULL;
 #endif
+               context_mm[id] = NULL;
 		nr_free_contexts++;
 	}
-	spin_unlock(&context_lock);
+	spin_unlock_irqrestore(&context_lock, flags);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index f900a39..0bee5b4 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -34,8 +34,13 @@
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_ZEPTO
+#include <asm/zepto_tlb.h>
+#endif
+
 #if defined(CONFIG_40x)
 
+	
 /*
  * 40x implementation needs only tlbil_va
  */
@@ -101,7 +106,15 @@
 
 _GLOBAL(_tlbil_all)
 _GLOBAL(_tlbil_pid)
+#if defined(CONFIG_ZEPTO)
+	/*
+	 BigMem TLBs are flushed as well as regular page TLBs but
+         we have to keep TLBs for devices such UPC, torus, etc if there are
+	*/
+	li	r3, TLB_SLOT_AFTERDEV
+#else	
 	li	r3,0
+#endif
 	sync
 
 	/* Load high watermark */
diff --git a/arch/powerpc/mm/zepto_bigmem.c b/arch/powerpc/mm/zepto_bigmem.c
new file mode 100644
index 0000000..bbb9397
--- /dev/null
+++ b/arch/powerpc/mm/zepto_bigmem.c
@@ -0,0 +1,1016 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+/*
+  low-level implementation of zepto big memory
+*/
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/lmb.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include <asm/time.h>
+
+#ifdef CONFIG_ZEPTO
+#include <asm/zepto_tlb.h>
+#endif
+
+#ifdef CONFIG_ZEPTO
+extern int zepto_kparam_noU3; /* currently this variable is defined in arch/ppc/mm/init.c*/
+#endif
+
+#include "mmu_decl.h"
+
+#define __ZCL_KERNEL__
+
+#include <zepto/zepto_syscall.h>
+
+#define __ZCL__    /* this is to avoid cnk/VirtualMap.h from ppc450_inlines.h */
+
+#include <linux/zepto_task.h>
+
+extern unsigned int tlb_44x_index;  /* defined in arch/powerpc/mm/44x_mmu.c */
+
+
+#define _bgp_dcache_zero_line_index(v,i) do { asm volatile ("dcbz %1,%0" : : "r" (v), "Ob" (i) : "memory"); } while(0)
+#define _bgp_dcache_zero_line(v)         do { asm volatile ("dcbz  0,%0" : : "r" (v) : "memory"); } while(0)
+#define _bgp_dcache_flush_line(v)        do { asm volatile ("dcbf  0,%0" : : "r" (v) : "memory"); } while(0)
+#define _bgp_dcache_store_line(v)        do { asm volatile ("dcbst 0,%0" : : "r" (v) : "memory"); } while(0)
+#define _bgp_dcache_touch_line(v)        do { asm volatile ("dcbt  0,%0" : : "r" (v)           ); } while(0)
+#define _bgp_dcache_invalidate_line(v)   do { asm volatile ("dcbi  0,%0" : : "r" (v) : "memory"); } while(0)
+#define _bgp_icache_invalidate_line(v)   do { asm volatile ("icbi  0,%0" : : "r" (v) : "memory"); } while(0)
+#define _bgp_icache_touch_line(v)        do { asm volatile ("icbt  0,%0" : : "r" (v)           ); } while(0)
+#define _bgp_dcache_invalidate_all(void) do { asm volatile ("dccci 0,0"  : : : "memory"); } while(0)
+#define _bgp_icache_invalidate_all(void) do { asm volatile ("iccci 0,0"  : : : "memory"); } while(0)
+#define _bgp_isync(void)       do { asm volatile ("isync" : : : "memory"); } while(0)
+#define _bgp_msync(void)       do { asm volatile ("msync" : : : "memory"); } while(0)
+
+
+
+unsigned get_bigmem_size(void)
+{
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+    /* preliminary DUAL */
+    switch( bigmem_nprocs_per_node  ) {
+	case 4:
+	    return __bigmem_size/4;  /*__bigmem_size is defined in include/linux/zepto_bigmem.h */
+	    break;
+	case 2:
+	    return __bigmem_size/2;   /* not supported */
+	case 1:
+	    return __bigmem_size;
+	default:
+	    panic( "Bigmem: wrong running mode");
+	    return 0;
+    }
+#else
+#warning "faking bigmem size since CNS is not relocated"
+    return __bigmem_size - 0x10000000U; /* XXXX: ad-hoc solution, which waste (256MB-256KB).
+					   fix physical memory allocation code */
+#endif
+}
+
+
+
+/* 
+   If zepto task is running (i.e. MPI task is running), bigmem_process_count
+   is equal to bigmem_nprocs_per_node.
+*/
+
+static atomic_t    bigmem_process_count = ATOMIC_INIT(0);
+static unsigned    bigmem_cpucoremask = 0;
+
+void bigmem_process_reset(void)
+{
+    atomic_set( &bigmem_process_count,0);
+    bigmem_cpucoremask = 0;
+}
+
+/* 
+   return the processor id on success.
+   otherwise, -1 is returned. 
+*/
+int bigmem_process_new(void)
+{
+    int ret;
+    int coreid;
+
+    ret = atomic_inc_return(&bigmem_process_count);
+    if( ret > bigmem_nprocs_per_node ) {
+	return -1;
+    }
+
+    if( bigmem_nprocs_per_node == 2 ) {
+	coreid = (ret-1)*2;
+    } else {
+	coreid = (ret-1);
+    }
+
+    bigmem_cpucoremask |= 1<<coreid;
+
+    printk("bigmem_process_new: coreid=%d bigmem=%08x\n", coreid, bigmem_cpucoremask);
+
+    return coreid;
+}
+
+/*
+  On success, the number of active cores is returned.
+  Otherwise, -1 is returned.
+*/
+int bigmem_process_release(void)
+{
+    int ret;
+    int coreid=0;
+    int coreidbit;
+
+    coreid = bigmem_process_cid();
+
+    coreidbit = (1<<coreid);
+
+    if( bigmem_cpucoremask & coreidbit ) {
+	ret = atomic_dec_return(&bigmem_process_count);
+	bigmem_cpucoremask &= (~coreidbit);
+    } else {
+	return -1; /* this core is already free'ed */
+    }
+    printk("bigmem_process_release: ret=%d bigmem_cpucoremask=%08x\n", ret, bigmem_cpucoremask);
+
+    return ret;
+}
+
+/*
+  return the number of active bigmem processes.
+*/
+int bigmem_process_active_count(void)
+{
+    return atomic_read( &bigmem_process_count );
+}
+
+/* 
+   return 1 if the number of zepto task is equal to bigmem_nprocs_per_node
+*/
+int bigmem_process_all_active(void)
+{
+    return (atomic_read(&bigmem_process_count)==bigmem_nprocs_per_node);
+}
+
+
+
+/* ---------------------------------------------------------------------- */
+
+typedef struct _ppc450tlbentry_ {
+  /* see ppc440 user guide (ppc440x6_um_v7_pub_2008.pdf) for definition */
+  unsigned w0;
+  unsigned w1;
+  unsigned w2;
+} ppc450tlbentry;
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+static ppc450tlbentry  bigmem_tlbs[4][BIGMEM_N_TLBS] __attribute__((aligned(16)));
+static int             n_bigmem_tlbs[4]={0,0,0,0};
+#else
+static ppc450tlbentry  bigmem_tlbs[BIGMEM_N_TLBS] __attribute__((aligned(16)));
+static int             n_bigmem_tlbs=0;
+#endif
+
+ppc450tlbentry  mktlb(unsigned va, unsigned pa_hi, unsigned pa_lo,
+		      unsigned tlbsizeflag, unsigned flag)
+{
+  ppc450tlbentry  ret;
+
+  ret.w0 = (va    & 0xfffff000) | PPC44x_TLB_VALID | tlbsizeflag;
+  ret.w1 = (pa_lo & 0xfffff000) | (pa_hi & 0xf);
+  ret.w2 = flag;
+
+  return ret;
+}
+
+#define _1G    (0x40000000)
+#define _256M  (0x10000000)
+#define _16M   (0x01000000)
+
+
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+
+/*
+  NOTE: this routine is incomplete since it doesn't check alignment on va.
+  it only works with 4 * 256MB region.
+ */
+static int create_bigmem_vn_tlbs(int coreid,
+				 unsigned va_start,
+				 unsigned pa_start,
+				 unsigned bigmemsize,
+				 unsigned tlb_flags )
+{
+  /* start is 16M aligned at least. size is a multiple of 16M */
+  int n_256M_tlbs = 0;
+  unsigned pa_addr = pa_start;
+  unsigned va_addr = va_start;
+  unsigned start_256M_region, end_256M_region;
+  int idx=0;
+
+  if( !IS_ALIGNED(va_start,_16M) || !IS_ALIGNED(pa_start,_16M) ) {
+    printk(KERN_ERR "Error: start address is not 16M aligned: va:%08x pa:%08x\n",
+	   va_start, pa_start);
+    return 0;
+  }
+
+  if( !IS_ALIGNED(bigmemsize,_16M) ) {
+    printk(KERN_ERR "Error: bigmemsize is not 16M aligned: %08x\n",
+	   bigmemsize);
+    return 0;
+  }
+
+  if( IS_ALIGNED(pa_start, _256M) ) {
+    start_256M_region = pa_start;
+  } else {
+    start_256M_region = (pa_start&(~(_256M-1))) + _256M;
+  }
+  end_256M_region =  (pa_start+bigmemsize)&(~(_256M-1));
+
+  if( end_256M_region-start_256M_region >= _256M ) {
+    n_256M_tlbs = (end_256M_region-start_256M_region)/_256M;
+  } else {
+    n_256M_tlbs=0;
+  }
+
+  if( pa_addr < start_256M_region ) {
+    unsigned boarder_addr = start_256M_region;
+    if( boarder_addr > (pa_start+bigmemsize) )  
+      boarder_addr = pa_start+bigmemsize;
+
+    for(;pa_addr<boarder_addr; pa_addr+=_16M, va_addr+=_16M,idx++) {
+      bigmem_tlbs[coreid][idx] = 
+	mktlb(va_addr,0,pa_addr,PPC44x_TLB_16M,tlb_flags);
+    }
+  }
+
+  if( pa_addr == start_256M_region  ) {
+    for(;pa_addr<end_256M_region; pa_addr+=_256M, va_addr+=_256M,idx++) {
+      bigmem_tlbs[coreid][idx] = 
+	mktlb(va_addr,0,pa_addr,PPC44x_TLB_256M,tlb_flags);
+    }
+  }
+
+  if( pa_addr < pa_start+bigmemsize ) {
+    for(;pa_addr<pa_start+bigmemsize; pa_addr+=_16M,va_addr+=_16M,idx++) {
+      bigmem_tlbs[coreid][idx] = 
+	mktlb(va_addr,0,pa_addr,PPC44x_TLB_16M,tlb_flags);
+    }
+  }
+  return idx;
+}
+
+
+
+
+/*
+  This function fills bigmem_tlbs[cid][], based on bigmem virtual, physical
+  start address and bigmem size. 
+
+  return the number of tlbs if succedded, otherwise return a negative
+  number
+*/
+/* CN */
+int  create_bigmem_tlbs_CN(int cid,unsigned va_start, unsigned pa_start, 
+			unsigned bigmemsize)
+{
+  int i, n;
+  unsigned flags = 
+    PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_SX|
+    PPC44x_TLB_UW|PPC44x_TLB_UR|PPC44x_TLB_UX|
+    PPC44x_TLB_U2|PPC44x_TLB_U3|PPC44x_TLB_WL1|PPC44x_TLB_M;
+  unsigned va, pa, size;
+
+  /* check see if bigmem start address and size is aligned 16M */
+  if( ! (IS_ALIGNED(va_start,_16M) && IS_ALIGNED(pa_start,_16M) &&
+	 IS_ALIGNED(bigmemsize,_16M))  )     {
+    return -1;
+  }
+
+  /* Disable L2 Optimistic prefetch */
+  if( zepto_kparam_noU3 ) flags = flags & (~PPC44x_TLB_U3);
+
+  n_bigmem_tlbs[cid] = 0;
+
+  va = va_start;
+  pa = pa_start;
+
+  if( bigmem_nprocs_per_node==4 ) {
+      n_bigmem_tlbs[cid] = create_bigmem_vn_tlbs(cid, va, pa, bigmemsize, flags);
+  } else if( bigmem_nprocs_per_node==2 ) {
+      printk(KERN_ERR "VN mode is not supported\n");
+  } else if( bigmem_nprocs_per_node==1 ) {
+      if( IS_ALIGNED(va,_1G) && IS_ALIGNED(pa,_1G) &&
+	  IS_ALIGNED(bigmemsize,_1G) ) {
+    
+	  n = bigmemsize / _1G;
+	  for(i=0; i<n; i++) {
+	      bigmem_tlbs[cid][i] = mktlb(va, 0, pa, PPC44x_TLB_1G, flags);
+	      va += _1G;
+	      pa += _1G;
+	      n_bigmem_tlbs[cid] ++;
+	  }
+      } else {
+	  /* fill 16M tlbs first */
+	  size = _16M;
+	  n = (bigmemsize/size)%16;
+	  for(i=0;i<n;i++) {
+	      bigmem_tlbs[cid][n_bigmem_tlbs[cid]] = mktlb(va,0,pa,PPC44x_TLB_16M, flags);
+	      va += size;
+	      pa += size;
+	      bigmemsize -= size;
+	      n_bigmem_tlbs[cid]++;
+	      if( n_bigmem_tlbs[cid] >= BIGMEM_N_TLBS ) return -2;
+	  }
+
+	  /* fill 256M tlbs first */
+	  size = _256M;
+	  n = (bigmemsize/size)%16;
+	  for(i=0;i<n;i++) {
+	      bigmem_tlbs[cid][n_bigmem_tlbs[cid]] = mktlb(va,0,pa,PPC44x_TLB_256M, flags);
+	      va += size;
+	      pa += size;
+	      bigmemsize -= size;
+	      n_bigmem_tlbs[cid]++;
+	      if( n_bigmem_tlbs[cid] >= BIGMEM_N_TLBS ) return -3;
+	  }
+      }
+  } else {
+      printk(KERN_ERR "Error: bigmem_nprocs_per_node=%d\n", bigmem_nprocs_per_node);
+  }
+
+  return n_bigmem_tlbs[cid];
+}
+
+#else  /* create_bigmem_tlbs for ION */
+
+int  create_bigmem_tlbs_ION(unsigned va_start, unsigned pa_end, 
+			unsigned bigmemsize)
+{
+    int i, n, sz;
+    unsigned flags = 
+	PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_SX|
+	PPC44x_TLB_UW|PPC44x_TLB_UR|PPC44x_TLB_UX|
+	PPC44x_TLB_U2|PPC44x_TLB_U3|PPC44x_TLB_WL1|PPC44x_TLB_M;
+    /*
+      PPC44x_TLB_U2:  L1 Store WithOut Allocate
+      PPC44x_TLB_U3:  L2 Optimiztic Prefetch ("Automatic" when 0)
+      PPC44x_TLB_WL1: Write-Thru L1
+    */
+    unsigned va, pa, size;
+
+    /* check see if bigmem start address and size is aligned 16M */
+    if( ! (IS_ALIGNED(va_start,_16M) && IS_ALIGNED(pa_end,_16M) &&
+	   IS_ALIGNED(bigmemsize,_16M))  )     {
+	return -1;
+    }
+
+    n_bigmem_tlbs = 0;
+
+    va = va_start;
+    pa = pa_end;
+    sz = bigmemsize;
+
+    if( IS_ALIGNED(va,_1G) && IS_ALIGNED(pa, _1G) ) {
+	n = sz / _1G;
+	for(i=0; i<n; i++) {
+	    pa -= _1G;
+	    bigmem_tlbs[i] = mktlb(va, 0, pa, PPC44x_TLB_1G, flags);
+	    zepto_debug(2, "installed 1G TLB #%d at va=0x%08x pa=0x%08x\n", i, va, pa);
+	    va += _1G;
+	    n_bigmem_tlbs ++;
+	    sz -= _1G;
+	}
+    }
+    if ( IS_ALIGNED(va, _256M) && IS_ALIGNED(pa, _256M) ) {
+	/* fill 256M tlbs first */
+        size = _256M;
+	n = (sz/size)%16;
+	for(i=0;i<n;i++) {
+	    pa -= size;
+	    bigmem_tlbs[n_bigmem_tlbs] = mktlb(va,0,pa,PPC44x_TLB_256M, flags);
+	    zepto_debug(2, "Configured 256M TLB #%d at va=0x%08x pa=0x%08x\n", i, va, pa);
+	    va += size;
+	    bigmemsize -= size;
+	    n_bigmem_tlbs++;
+	    sz -= _256M;
+	    if( n_bigmem_tlbs >= BIGMEM_N_TLBS ) return -3;
+	}
+    }
+    if ( IS_ALIGNED(va, _16M) && IS_ALIGNED(pa, _16M) ) {
+	/* fill 16M tlbs first */
+	size = _16M;
+	n = (sz/size)%16;
+	for(i=0;i<n;i++) {
+	    pa -= size;
+	    bigmem_tlbs[n_bigmem_tlbs] = mktlb(va,0,pa,PPC44x_TLB_16M, flags);
+	    zepto_debug(2, "Configured 16M TLB #%d at va=0x%08x pa=0x%08x\n", i, va, pa);
+	    va += size;
+	    bigmemsize -= size;
+	    n_bigmem_tlbs++;
+	    sz -= _16M;
+	    if( n_bigmem_tlbs >= BIGMEM_N_TLBS ) return -2;
+	}
+    }
+    return n_bigmem_tlbs;
+}
+
+#endif
+
+static void tlbwrite(int slot, ppc450tlbentry e)
+{
+    __asm__ __volatile__ (
+	"tlbwe	%1,%0,0\n"
+	"tlbwe	%2,%0,1\n"
+	"tlbwe	%3,%0,2\n"
+	"isync\n"                 /* to invalidate shadow TLBs */
+	:
+	: "r" (slot), "r" (e.w0), "r" (e.w1), "r" (e.w2)  );
+}
+
+#if 0
+void tlbsearch(unsigned look_addr)
+{
+    unsigned  look_slot=-1;
+
+    __asm__ __volatile__ (
+	"tlbsx %0,0,%1\n"
+	: "=r"(look_slot)
+	: "r"(look_addr) 
+	);
+    if( look_slot>=0 ) {
+	register unsigned w0,w1,w2;
+	zepto_debug(2,"tlb matched slot=%d\n", look_slot);
+	__asm__
+	    __volatile__
+	    ( "tlbre   %0,%3,%4 \n"
+	      "tlbre   %1,%3,%5 \n"
+	      "tlbre   %2,%3,%6 \n"
+	      : "=r"(w0), "=r"(w1), "=r"(w2)
+	      : "r"(look_slot),
+	      /* %4 */  "i"(PPC44x_TLB_PAGEID),  /* ws=0 */
+	      /* %5 */  "i"(PPC44x_TLB_XLAT),    /* ws=1 */
+	      /* %6 */  "i"(PPC44x_TLB_ATTRIB)   /* ws=2 */
+		);
+	zepto_debug(2,"tlb matched w0=%08x w1=%08x w2=%08x\n", w0,w1,w2);
+    } else {
+	zepto_debug(2,"no tlb matched for %08x\n", look_addr);
+    }
+}
+#endif
+
+/*
+  Install TLBs for compute node process special address space. 
+*/
+
+#define BIGMEM_VA_UNINITIALIZED  (0xffffffff)
+#define BIGMEM_PA_UNINITIALIZED  (0xffffffff)
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+/* bigmem va start (per-core resource)*/ 
+static unsigned bigmem_va_start[4] = { BIGMEM_VA_UNINITIALIZED,
+				       BIGMEM_VA_UNINITIALIZED,
+				       BIGMEM_VA_UNINITIALIZED,
+				       BIGMEM_VA_UNINITIALIZED };
+
+/* bigmem pa start (per-core resource). initialized by init_bigmem_pa() */
+static unsigned long bigmem_pa_start[4] = { BIGMEM_PA_UNINITIALIZED,
+					    BIGMEM_PA_UNINITIALIZED,
+					    BIGMEM_PA_UNINITIALIZED,
+					    BIGMEM_PA_UNINITIALIZED };
+#else 
+static unsigned bigmem_va_start = BIGMEM_VA_UNINITIALIZED;
+static unsigned bigmem_pa_start = BIGMEM_PA_UNINITIALIZED;
+#endif
+
+/* scratchpad addresses (shared resources) */
+
+static unsigned scratchpad_va;
+static unsigned scratchpad_pa;
+static unsigned scratchpad_len;
+
+/* the following two functions are used for DMA'able region */
+unsigned long long get_entire_bigmem_pa_start(void)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    return (unsigned long long)bigmem_pa_start[0];
+#else
+    return (unsigned long long)bigmem_pa_start;
+#endif
+}
+
+unsigned long long get_entire_bigmem_pa_end(void)
+{
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+    if( bgp4GB )  return 0x100000000ULL;
+    else          return  0x80000000ULL;
+#else
+    if( bgp4GB )  return 0x100000000ULL - 0x01000000ULL;
+    else          return  0x80000000ULL - 0x01000000ULL;
+#endif
+}
+
+
+
+/* 
+   init_bigmem_pa() is called from 
+   zeptorc_init() @ arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c
+
+   With CN config, init_bigmem_pa() is called when /proc/setup_treeroute is
+   written (by the zoid control process).
+
+   With ION config, init_bigmem_pa() is called only once from zeptorc_init() @
+   arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c, which sets
+   bigmem_nprocs_per_node 1.
+*/
+void init_bigmem_pa(void)
+{
+    unsigned bigmem_entire_pa_start;
+
+    if( bgp4GB )   bigmem_entire_pa_start = 0          - __bigmem_size;
+    else           bigmem_entire_pa_start = 0x80000000 - __bigmem_size;
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    if(bigmem_nprocs_per_node==4) {
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+	int i;
+	/* preliminary VN mode */
+	if( __bigmem_size != 0x40000000 ) { 
+	    printk(KERN_ERR "VN mode requires 1024MB\n");
+	    return;
+	}
+	for(i=0;i<4;i++)    bigmem_pa_start[i] = (0x40000000/4)*i + bigmem_entire_pa_start;
+#else
+	printk(KERN_ERR "VN mode is not available. Recompile w/ ZEPTO_CNS_RELOCATION\n");
+#endif
+    } else if(bigmem_nprocs_per_node==2) {
+	printk(KERN_ERR "DUAL mode is not implemented!\n");
+	return ;
+    } else {
+	/* SMP mode */
+	bigmem_pa_start[0] = bigmem_pa_start[1] = bigmem_pa_start[2] = bigmem_pa_start[3] = 
+	    bigmem_entire_pa_start;
+    }
+#else
+    bigmem_pa_start = bigmem_entire_pa_start;
+#endif
+
+    zepto_debug(2,"BIGMEM_TLB_START_SLOT=%d BIGMEM_TLB_END_SLOT=%d BIGMEM_N_TLBS=%d\n",
+		BIGMEM_TLB_START_SLOT, BIGMEM_TLB_END_SLOT, BIGMEM_N_TLBS );
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    {
+	int i;
+	for(i=0;i<4;i++) {
+	    zepto_debug(2,"bigmem_pa_start[%d]=0x%08lx\n",i, bigmem_pa_start[i]);
+	}
+    }
+#else
+    zepto_debug(2,"bigmem_pa_start=0x%08x\n",bigmem_pa_start);
+#endif
+}
+
+/*
+  install_bigmem_tlb() is called from :
+
+    1. do_page_fault() @ arch/powerpc/mm/fault.c 
+    2. load_elf_binary() @ fs/binfmt_elf.c
+
+  return 0 if succeeded, otherwise return -1
+*/
+int install_bigmem_tlb(void)
+{
+    int i;
+    unsigned va, pa;
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    int cid = 0;
+#endif
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    cid = bigmem_process_cid();
+
+    if( bigmem_pa_start[cid] == BIGMEM_PA_UNINITIALIZED )
+	return -1;
+
+    if( bigmem_va_start[cid] == BIGMEM_VA_UNINITIALIZED )
+        return -1;
+
+    va = bigmem_va_start[cid];
+    pa = bigmem_pa_start[cid];
+
+    if( n_bigmem_tlbs[cid] < 1 ) {
+	int rc;
+	zepto_debug(2, "cid=%d va=%08x pa=%08x size=%08x\n",
+		    cid,va,pa,get_bigmem_size() );
+	
+	rc = create_bigmem_tlbs_CN(cid,va,pa,get_bigmem_size());
+
+	if( rc < 0 ) {
+	    printk(KERN_ERR "[Z] create_bigmem_tlbs(cid=%d) failed. rc=%d\n",cid,rc);
+	    return -1;
+	}
+	
+	for(i=0; i<n_bigmem_tlbs[cid];i++ ) {
+	    zepto_debug(2, "slot=%d cid=%d w0:%08x w1:%08x w2=%08x\n",
+			BIGMEM_TLB_START_SLOT + i,
+			cid,
+			bigmem_tlbs[cid][i].w0, 
+			bigmem_tlbs[cid][i].w1, 
+			bigmem_tlbs[cid][i].w2 ); 
+	}
+    }
+
+    _tlbil_all(); /* invalidate normal tlbs and computenode tlbs */
+    
+    for(i=0; i<n_bigmem_tlbs[cid];i++ )   tlbwrite( BIGMEM_TLB_START_SLOT + i, bigmem_tlbs[cid][i] );
+
+#else
+    /* ION */
+    if ( bigmem_va_start == BIGMEM_VA_UNINITIALIZED )
+	return -1;
+    va = bigmem_va_start;
+    pa = get_bigmem_pa_end();  /* grow down from pa_end to preserve
+				  alignment */
+
+    if( n_bigmem_tlbs < 1 ) {
+	int rc;
+	rc = create_bigmem_tlbs_ION(va,pa,get_bigmem_size());
+
+	if( rc < 0 ) {
+	    printk(KERN_ERR "[Z] create_bigmem_tlbs() failed. rc=%d\n",rc);
+	    return -1;
+	}
+	
+	for(i=0; i<n_bigmem_tlbs;i++ ) {
+	    zepto_debug(2, "Z: slot=%d  w0:%08x w1:%08x w2=%08x\n",
+			BIGMEM_TLB_START_SLOT + i,
+			bigmem_tlbs[i].w0, 
+			bigmem_tlbs[i].w1, 
+			bigmem_tlbs[i].w2 ); 
+	}
+    }
+
+    _tlbil_all(); /* invalidate normal tlbs and computenode tlbs */
+    
+    for(i=0; i<n_bigmem_tlbs;i++ )   tlbwrite( BIGMEM_TLB_START_SLOT + i, bigmem_tlbs[i] );
+
+#endif
+
+    /* XXX: this might be a performance issue. do we need all of them? */
+    _bgp_dcache_invalidate_all();
+    _bgp_icache_invalidate_all();
+    _bgp_msync();
+    _bgp_isync();
+
+    return 0;
+}
+
+void free_bigmem_tlb(void)
+{
+    int cid = 0;
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    extern void force_clear_dma_usage(void);    /* arch/ppc/syslib/bgdd/bluegene_dma.c */
+    extern void bgplockbox_reset(void);         /* arch/ppc/syslib/bgdd/zepto_flatmem.c */
+#endif
+
+    cid = bigmem_process_cid();
+
+    _tlbil_all();
+
+    _bgp_dcache_invalidate_all();
+    _bgp_icache_invalidate_all();
+    _bgp_msync();
+    _bgp_isync();
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    bigmem_va_start[cid] = BIGMEM_VA_UNINITIALIZED;
+#else
+    bigmem_va_start = BIGMEM_VA_UNINITIALIZED;
+#endif
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    force_clear_dma_usage();
+    bgplockbox_reset();
+#endif
+
+    zepto_debug(2,"free_bigmem_tlb() cid=%d\n",cid);
+}
+
+int init_bigmem_tlb(unsigned entry)
+{
+    int cid = 0;
+
+    cid = bigmem_process_cid();
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    if( bigmem_va_start[cid] != 0xffffffff ) {
+	printk(KERN_ERR "[Z] bigmem is in use. cid=%d\n",cid);
+	return -1;
+    }
+
+    bigmem_va_start[cid] = entry & 0xf0000000;
+#else
+    if( bigmem_va_start != 0xffffffff ) {
+	printk(KERN_ERR "[Z] bigmem is in use. cid=%d\n",cid);
+	return -1;
+    }
+
+    bigmem_va_start = entry & 0xf0000000;
+#endif
+
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    zepto_debug(2,"init_bigmem_tlb  bigmem_va_start[%d]=0x%08x\n", cid, bigmem_va_start[cid]);
+#else
+    zepto_debug(2,"init_bigmem_tlb  bigmem_va_start[%d]=0x%08x\n", cid, bigmem_va_start);
+#endif
+    return 0;
+}
+
+
+void fill_zero_bigmem(void)
+{
+    int cid = 0;
+    unsigned va;
+
+    cid = bigmem_process_cid();
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    va = bigmem_va_start[cid];
+    if( bigmem_va_start[cid]==0xffffffff ) {
+	printk(KERN_ERR "[Z] invalid bigmem_va_start[%d]\n", cid);
+	return;
+    }
+#else
+    va = bigmem_va_start;
+    if( bigmem_va_start==0xffffffff ) {
+	printk(KERN_ERR "[Z] invalid bigmem_va_start[%d]\n", cid);
+	return;
+    }
+#endif
+    zepto_debug(2,"fill_zero_bigmem() cid=%d va=%08x\n",cid, va);
+
+    memset((void*)va, 0, get_bigmem_size()); 
+    zepto_debug(2,"fill_zero_bigmem() out cid=%d\n", cid);
+}
+
+unsigned get_bigmem_region_start(void) 
+{ 
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    int cid=0;
+
+    cid = bigmem_process_cid();
+
+    return bigmem_va_start[cid];
+#else
+    return bigmem_va_start;
+#endif
+}
+
+
+unsigned get_bigmem_region_end(void)   
+{ 
+#ifdef CONFIG_ZEPTO_COMPUTENODE    
+    int cid=0;
+
+    cid = bigmem_process_cid();
+
+    return bigmem_va_start[cid]+get_bigmem_size();
+#else
+    return bigmem_va_start+get_bigmem_size();
+#endif
+}
+
+unsigned get_bigmem_pa_start(void)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    int cid=0;
+
+    cid = bigmem_process_cid();
+
+    return bigmem_pa_start[cid];
+#else
+    return bigmem_pa_start;
+#endif
+}
+
+unsigned get_bigmem_pa_end(void)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    int cid=0;
+
+    cid = bigmem_process_cid();
+
+    /* ZXXX: fix this for VN/Dual */
+    return bigmem_pa_start[cid] + get_bigmem_size();
+#else
+    return bigmem_pa_start + get_bigmem_size();
+#endif
+}
+
+unsigned  bigmem_virt2phy_cid(unsigned long va,int cid)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    return ( va - bigmem_va_start[cid] ) + bigmem_pa_start[cid];
+#else
+    return ( va - bigmem_va_start ) + bigmem_pa_start;
+#endif
+
+}
+
+unsigned  bigmem_virt2phy(unsigned long va)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    int cid= bigmem_process_cid();
+
+    return ( va - bigmem_va_start[cid] ) + bigmem_pa_start[cid];
+#else
+    return ( va - bigmem_va_start ) + bigmem_pa_start;
+#endif
+}
+
+
+static int bigmem_n_segs = 1;
+
+/* XXX: find a better place for this function */
+asmlinkage  long sys_zepto_generic(unsigned key, unsigned val)
+{
+    long ret = -EINVAL;
+
+    switch(key) {
+        case ZEPTOSC_NULL:
+	    ret = 0;
+	    break;
+	case ZEPTOSC_FLIP:
+	    ret = (~val);
+	    break;
+	case ZEPTOSC_COREID:
+	    ret = smp_processor_id();
+	    break;
+	case ZEPTOSC_ZEPTO_TASK:
+	    ret = IS_ZEPTO_TASK(current);
+	    break;
+	case ZEPTOSC_GETDEC:
+	    ret = get_dec(); /* returns a 32-bit value */
+	    break;
+	default:
+	    ret = -EINVAL;
+	    break;
+    }
+    return ret;
+}
+
+/*
+  XXX: currently only 1 segment
+ */
+asmlinkage  long sys_zepto_bigmem(unsigned key, unsigned val)
+{
+    int cid=0;
+    long ret = -EINVAL;
+
+
+    if( !(enable_bigmem&&IS_ZEPTO_TASK(current)) ) return -EINVAL;
+
+    cid = bigmem_process_cid();
+
+    switch(key) {
+	case ZEPTOSC_BIGMEM_N_SEGS:
+	    ret = bigmem_n_segs;
+	    break;
+	case ZEPTOSC_BIGMEM_VA_START:
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+	    ret = bigmem_va_start[cid];
+#else
+	    ret = bigmem_va_start;
+#endif
+	    break;
+	case ZEPTOSC_BIGMEM_PA_START:
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+	    ret = bigmem_pa_start[cid];
+#else
+	    ret = bigmem_pa_start;
+#endif
+	    break;
+	case ZEPTOSC_BIGMEM_LEN:
+	    ret = get_bigmem_size();
+	    break;
+        case ZEPTOSC_SCRATCHPAD_VA_START:
+	    ret = scratchpad_va;
+	    break;
+	case ZEPTOSC_SCRATCHPAD_PA_START:
+	    ret = scratchpad_pa;
+	    break;
+	case ZEPTOSC_SCRATCHPAD_LEN:
+	    ret = scratchpad_len;
+	    break;
+	default:
+	    ret = -EINVAL;
+	    break;
+    }
+    return ret;
+}
+
+
+int  in_bigmem(unsigned address)
+{
+    if( (address >= get_bigmem_region_start() 	&& 
+	 address < get_bigmem_region_end() ) ) {
+	return 1;
+    }
+    return 0;
+}
+
+
+void zepto_init_tlbs_for_devices(void)
+{
+#ifdef CONFIG_ZEPTO_LOCKBOX_UPC_TLB 
+    const unsigned f_SRSW   = PPC44x_TLB_SW|PPC44x_TLB_SR;
+    const unsigned f_URUW   = PPC44x_TLB_UW|PPC44x_TLB_UR;
+    const unsigned f_IALL   = PPC44x_TLB_I|PPC44x_TLB_G|PPC44x_TLB_IL1I|PPC44x_TLB_IL1D|PPC44x_TLB_IL2I|PPC44x_TLB_IL2D;
+
+    ppc450tlbentry tlbe;
+
+    tlbe = mktlb(0xffff0000, 0x7, 0xffff0000, PPC44x_TLB_16K,  f_SRSW|f_IALL );        tlbwrite(0,tlbe);  /* lockbox sup  */
+    tlbe = mktlb(0xffff4000, 0x7, 0xffff4000, PPC44x_TLB_16K,  f_SRSW|f_URUW|f_IALL ); tlbwrite(1,tlbe);  /* lockbox user */
+    tlbe = mktlb(0xfffda000, 0x7, 0x10000000, PPC44x_TLB_4K,   f_SRSW|f_URUW|f_IALL ); tlbwrite(2,tlbe);  /* UPC */
+
+#ifdef CONFIG_ZEPTO_TREE_TORUS_TLB
+    tlbe = mktlb(0xfffdc000, 0x6, 0x10000000, PPC44x_TLB_1K,   f_SRSW|f_URUW|f_IALL);  tlbwrite(3,tlbe);  /* TREE0 (CIO) */
+    tlbe = mktlb(0xfffdd000, 0x6, 0x11000000, PPC44x_TLB_1K,   f_SRSW|f_URUW|f_IALL);  tlbwrite(4,tlbe);  /* TREE1 (MPI) */
+    tlbe = mktlb(0xfffd0000, 0x6, 0x00000000, PPC44x_TLB_16K,  f_SRSW|f_URUW|f_IALL ); tlbwrite(5,tlbe);  /* DMA */
+#endif
+
+#endif // CONFIG_ZEPTO_LOCKBOX_UPC_TLB
+    
+    tlb_44x_index = REGULAR_TLB_START_SLOT;    /* set to paged TLB start slot. 
+						  tlb_44x_index is reset to REGULAR_TLB_START_SLOT when rollover  
+						  See InstructionTLBError() and DataTLBError() in head_44x.S */
+}
+
+/* ---------------------------------------------------------------------- */
+
+void devicetlbs_init_smp(void* unused)
+{
+    zepto_init_tlbs_for_devices();
+}
+
+int __init zepto_tlb_init(void)
+{
+    zepto_init_tlbs_for_devices(); /* for core 0 */
+    zepto_debug(1,"zepto_tlb_init() at core0\n");
+
+    smp_call_function(devicetlbs_init_smp, NULL, 1); /* for other cores. */
+    zepto_debug(1,"zepto_tlb_init() at other cores\n");
+    return 0;
+}
+__initcall(zepto_tlb_init);
+
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 3496bc0..091a301 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -2,7 +2,6 @@
 	bool "Bamboo"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440EP
 	select PCI
 	help
@@ -31,7 +30,6 @@
 	bool "Sequoia"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440EPX
 	help
 	  This option enables support for the AMCC PPC440EPX evaluation board.
@@ -40,7 +38,6 @@
 	bool "Taishan"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440GX
 	select PCI
 	help
@@ -51,7 +48,6 @@
 	bool "Katmai"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440SPe
 	select PCI
 	select PPC4xx_PCI_EXPRESS
@@ -62,7 +58,6 @@
 	bool "Rainier"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440GRX
 	select PCI
 	help
@@ -81,53 +76,34 @@
 	  See http://www.pikatechnologies.com/ and follow the "PIKA for Computer
 	  Telephony Developers" link for more information.
 
-config ARCHES
-	bool "Arches"
-	depends on 44x
-	default n
-	select PPC44x_SIMPLE
-	select 460EX # Odd since it uses 460GT but the effects are the same
-	select PCI
-	select PPC4xx_PCI_EXPRESS
-	help
-	  This option enables support for the AMCC Dual PPC460GT evaluation board.
-
 config CANYONLANDS
 	bool "Canyonlands"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 460EX
 	select PCI
 	select PPC4xx_PCI_EXPRESS
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
 	help
 	  This option enables support for the AMCC PPC460EX evaluation board.
 
-config GLACIER
-	bool "Glacier"
-	depends on 44x
-	default n
-	select PPC44x_SIMPLE
-	select 460EX # Odd since it uses 460GT but the effects are the same
-	select PCI
-	select PPC4xx_PCI_EXPRESS
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
-	help
-	  This option enables support for the AMCC PPC460GT evaluation board.
-
 config YOSEMITE
 	bool "Yosemite"
 	depends on 44x
 	default n
-	select PPC44x_SIMPLE
 	select 440EP
 	select PCI
 	help
 	  This option enables support for the AMCC PPC440EP evaluation board.
 
+config BGP
+    bool "Blue Gene/P"
+	depends on 44x
+	default y
+	select BLUEGENE
+	help
+      This option enables support for the IBM Blue Gene/P supercomputer.
+	
+    
 #config LUAN
 #	bool "Luan"
 #	depends on 44x
@@ -160,21 +136,6 @@
 	  Most Virtex 5 designs should use this unless it needs to do some
 	  special configuration at board probe time.
 
-config PPC44x_SIMPLE
-	bool "Simple PowerPC 44x board support"
-	depends on 44x
-	default n
-	help
-	  This option enables the simple PowerPC 44x platform support.
-
-config PPC4xx_GPIO
-	bool "PPC4xx GPIO support"
-	depends on 44x
-	select ARCH_REQUIRE_GPIOLIB
-	select GENERIC_GPIO
-	help
-	  Enable gpiolib support for ppc440 based boards
-
 # 44x specific CPU modules, selected based on the board above.
 config 440EP
 	bool
@@ -218,6 +179,8 @@
 	bool
 	select PPC_FPU
 	select IBM_NEW_EMAC_EMAC4
+	select IBM_NEW_EMAC_RGMII
+	select IBM_NEW_EMAC_ZMII
 	select IBM_NEW_EMAC_TAH
 
 # 44x errata/workaround config symbols, selected by the CPU models above
@@ -233,3 +196,271 @@
 	bool
 	select XILINX_VIRTEX
 
+config BLUEGENE
+        bool
+        select PPC_FPU
+        select PPC_DOUBLE_FPU
+        
+config BLUEGENE_NOISY_BOOT
+	bool "Send Blue Gene boot messages to the control system"
+	depends on BLUEGENE
+	default n
+	help
+		Select this if you need to diagnose faults with the IO or Compute node kernel boot.
+	
+config BLUEGENE_MAMBO
+        bool "Run on Blue Gene/P Mambo Simulator"
+        depends on BGP
+
+config L1_WRITETHROUGH
+        bool "Blue Gene enable writethrough mode"
+        depends on BLUEGENE
+        default n
+
+config BGP_DD1
+        bool "Blue Gene enable workarounds for BG/P DD1"
+        default n
+        
+config BLUEGENE_TCP
+ bool "Blue Gene/P TCP on Torus"
+ default y if BGP
+ 
+config BLUEGENE_DMA_MEMCPY
+  bool "Blue Gene copy_tofrom_user optimisation with the torus DMA unit"
+  depends on BLUEGENE
+  default n
+  help
+   'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs 
+   seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to 
+   try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed 
+   to make it work.
+  
+ 
+config BLUEGENE_COLLECTIVE_TRACE
+ bool "Activate diagnostic trace in BlueGene/P collective network"
+ default y if BGP
+ 
+config BLUEGENE_TORUS_TRACE
+ bool "Activate diagnostic trace in BlueGene/P torus network"
+ default y if BGP
+ 
+
+config BLUEGENE_TCP_WITHOUT_NAPI
+ bool "Blue Gene/P TCP interrupt every packet (no NAPI) for debugging"
+ default n
+
+config BLUEGENE_UNIPROCESSOR
+ bool "Force BlueGene to run uniprocessor (450 debugging, or vrnic)"
+ depends on BLUEGENE
+ default n
+ 
+config BLUEGENE_STATISTICS
+ bool "Maintain statistics related to BlueGene networking"
+ depends on BLUEGENE
+ default y
+ 
+config BLUEGENE_SHARE_WITH_VRNIC
+  bool "Allow vRNIC to map all of Linux memory"
+  depends on BLUEGENE
+  default n 
+ 
+config BGP_NFS_FIX
+  bool "Attempt a fix for disappearing files and directories under NFS"
+  default n
+ 
+config HUGE_KMALLOC
+  bool "Allow for 32MB kmalloc blocks"
+  default y if BGP   
+  
+config TASK_UNMAPPED_BASE
+  hex "Base virtual address for mmap"
+  depends on BGP
+  default "0x20000000"
+  help
+    processor.h will set this to (TASK_SIZE / 8 * 3) if you do not set it here
+
+config DEBUG_ALIGNMENT_HISTOGRAM
+ bool "copy_tofrom_user alignment histograms"
+ default y
+ help
+   Enables maintenance of alignment histograms for copy_tofrom_user and similar functions,
+   to explore whether alternative implementations might be useful for performance.
+
+config DEBUG_STACK_USAGE
+	bool "Stack utilization instrumentation"
+	default y if BGP
+        help
+          Enables the display of the minimum amount of free stack which each
+          task has ever had available in the sysrq-T and sysrq-P debug output.
+
+          This option will slow down process creation somewhat.
+
+config BOOKE
+	bool
+	depends on 44x
+	default y
+
+config IBM_OCP
+	bool
+	depends on ASH || BAMBOO || BLUEGENE || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || YUCCA || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
+	default y
+
+config IBM_EMAC4
+	bool
+	depends on 440GX || 440SP || 440SPE || BLUEGENE
+	default y
+
+        config 405EP
+	bool
+	depends on BUBINGA
+	default y
+
+# Some of the items below might not be quite riight; I'm putting part of the 2.6.19 Kconfig in here, enough
+# to get a BGP build working. tjcw.
+config 405GP
+	bool
+	depends on CPCI405 || EP405 || WALNUT
+	default y
+
+config 405GPR
+	bool
+	depends on SYCAMORE
+	default y
+
+config STB03xxx
+	bool
+	depends on REDWOOD_5 || REDWOOD_6
+	default y
+
+config EMBEDDEDBOOT
+	bool
+	depends on EP405 || XILINX_ML300 || XILINX_ML403
+	default y
+
+config IBM_OPENBIOS
+	bool
+	depends on ASH || REDWOOD_5 || REDWOOD_6
+	default y
+
+config PPC4xx_DMA
+	bool "PPC4xx DMA controller support"
+	depends on 4xx
+
+config PPC4xx_EDMA
+	bool
+	depends on !STB03xxx && PPC4xx_DMA
+	default y
+
+config PPC_GEN550
+	bool
+	depends on 4xx
+	default y
+
+choice
+	prompt "TTYS0 device and default console"
+	depends on 40x
+	default UART0_TTYS0
+
+config UART0_TTYS0
+	bool "UART0"
+
+config UART0_TTYS1
+	bool "UART1"
+
+endchoice
+
+config SERIAL_SICC
+	bool "SICC Serial port support"
+	depends on STB03xxx
+
+config UART1_DFLT_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+
+config SERIAL_SICC_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+
+
+# Kernel options for Zepto
+#
+# XXX: Where is the good place to put the zepto options?
+#
+menu "Zepto setup"
+
+config ZEPTO
+       bool "Enable BGP Base Zepto Features (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && BLUEGENE
+       help
+          Codes work on both CN and ION are categorized in Base Zepto
+	  Features, so mostly misc codes such as kernel params.
+
+config ZEPTO_DEBUG
+       bool "Enable Zepto Debug Message"
+       default y
+       depends on ZEPTO
+       help
+          Enable Zepto Debug Message. You can control debug output level
+	  by the zepto_debug kernel parameter.
+
+config ZEPTO_MEMORY
+       bool "Enable BGP Zepto Memory Feature"
+       depends on ZEPTO
+       help
+          Kernel automatically maps all application segments to physical
+          contiguous memory (virtual contiguous, off course) if zepto
+          attribute in ELF header is set. This is per cpu resource right now
+	  and eventually per core resource.
+
+config ZEPTO_CNS_RELOCATION
+       bool "Relocate CNS physical memory location"
+       depends on ZEPTO_MEMORY
+       help
+          CNS is located at the end of physical memory (0x7ffc0000 in 2GB main
+          memory machine), which prevents us to use last 256MB physical memory
+          region. With this option, CNS is relocated to a pre-allocated buffer in
+          kernel data section so that we can free up.
+
+config ZEPTO_LOCKBOX_UPC_TLB
+       bool "Statically install 3 TLBs for LOCKBOX and UPC"
+       help
+          This option is helpful when we need LOCKBOX and UPC on ION w/o other
+          COMPUTENODE feature. This option is on for COMPUTENODE usage.
+	  We can use this option w/o enabling ZEPTO_MEMORY.
+
+config ZEPTO_TREE_TORUS_TLB
+	bool "Statically install additional 3 TLBs for TREE and TORUS"
+	depends on ZEPTO_LOCKBOX_UPC_TLB
+	help
+	   this is basically for compute node
+
+config ZEPTO_COMPUTENODE
+       bool "Enable BGP Zepto CoputeNode Features"
+       depends on ZEPTO_MEMORY && ZEPTO_TREE_TORUS_TLB
+       help
+          Reserve TLBs for Bigmem and pin TLB down for compute node specific devices, etc
+
+config ZEPTO_EXPERIMENTAL
+       bool "Enable Zepto experimental codes"
+       depends on ZEPTO_COMPUTENODE
+       default n
+       help
+       - Synchronize OS tick globally: disabled by default. Add "globaltick" to the kernel
+       params to enable it.
+
+       - Disable the L1 parity recoverability: enabled by default. Add "noPRE"
+         to the kernel params. No guaratee on data-integrity wehn L1 parity
+         error happened
+
+       - Disble the L2 optimistic pre-fetch: it is enabled by default. Add
+         "noU3"
+
+       - System call to get the decrementer value. syscall# 1050. This is used
+         by a benchmark code that measures OS tick synchronicity
+
+
+endmenu  # Zepto setup
+
+        
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 01f51da..3596f55 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -4,3 +4,4 @@
 obj-$(CONFIG_SAM440EP) 	+= sam440ep.o
 obj-$(CONFIG_WARP)	+= warp.o
 obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
+obj-$(CONFIG_BGP)	+= bgp_cns.o bgp_bic.o bgp.o bgp_pers.o
diff --git a/arch/powerpc/platforms/44x/bgp.c b/arch/powerpc/platforms/44x/bgp.c
new file mode 100644
index 0000000..17e5d4c
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp.c
@@ -0,0 +1,217 @@
+/*
+ * Blue Gene/P board specific routines
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/root_dev.h>
+#include <linux/delay.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/ppc4xx.h>
+#include <asm/mmu-44x.h>
+#include <asm/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/bluegene.h>
+#include <asm/udbg.h>
+#include <asm/bluegene_ras.h>
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+#include <asm/bgcns.h>
+#include <asm/bgp_personality.h>
+#endif
+
+
+extern int bgWriteRasStr(unsigned int component, 
+                          unsigned int subcomponent,
+                          unsigned int errCode,
+                          char*        str,
+                          unsigned int strLen);
+extern int bgFlushOutboxMsgs(void);
+
+/*
+ * bgp_probe() is called very early; cpu 0 only
+ *	one pinned TLB
+ *	device-tree isn't unflattened
+ * Look to see if the boot wrapper says we are a Blue Gene/P.
+ * Setup udbg_ptc, but it will do nothing until the CNS interface is initialized.
+ */
+static int __init bgp_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,bluegenep"))
+		return 0;
+
+#ifdef CONFIG_BLUEGENE_NOISY_BOOT 
+	udbg_putc = bgp_udbg_putc;
+#endif
+	return 1;
+}
+
+/*
+ * There isn't a concept of a kernel asking to be rebooted on Blue Gene.
+ * The restart, power_off and halt functions should produce RAS to tell the control
+ * system this node is no longer functional.
+ */
+static void bgp_halt(void)
+{
+	bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_halted, "System Halted", 0);	
+
+        // Flush halt RAS and any other buffered outbox messages.
+        while (bgFlushOutboxMsgs()); 
+}
+
+static void bgp_panic(char *str)
+{
+	bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_panic, str, 0);
+
+        // Flush halt RAS and any other buffered outbox messages.
+        while (bgFlushOutboxMsgs());
+}
+
+/* Blue Gene is given the decrementor frequency via the device tree (personality). */
+static void __init bgp_calibrate_decr(void)
+{
+	struct device_node *pernode = of_find_node_by_path("/ibm,bluegene/personality");
+
+	ppc_tb_freq = 0;
+	if (pernode) {
+		int len;
+		const unsigned *reg = of_get_property(pernode, "frequency", &len);
+		if (reg)
+			ppc_tb_freq = *reg;
+	}
+	if (ppc_tb_freq == 0) {
+		udbg_printf("personality/frequency device-tree field not found!\n");
+		ppc_tb_freq = 850000000;	/* A very good default */
+	}
+
+	ppc_proc_freq = ppc_tb_freq;
+	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+	mtspr(SPRN_TCR, TCR_DIE);
+}
+
+/* Generic 44x init disables icache prefetch which can be enabled. */
+static void __init bgp_enable_icache_prefetch(void)
+{
+	mtspr(SPRN_CCR0, mfspr(SPRN_CCR0)|2);
+	isync();
+	mb();
+}
+
+#ifdef CONFIG_SMP
+/*
+ * The Blue Gene interrupt controller (in bgp_bic.c) can implement
+ * sending IPIs with a cpumask.   Consider changing this interface.
+ */
+static void smp_bluegene_message_pass(int target, int msg)
+{
+	unsigned int i;
+
+	if (target < NR_CPUS) {
+		bgp_send_ipi(target, msg);
+	} else {
+		for_each_online_cpu(i) {
+			if (target == MSG_ALL_BUT_SELF
+			    && i == smp_processor_id())
+				continue;
+			bgp_send_ipi(i, msg);
+		}
+	}
+}
+
+
+/* Return number of cpus possible in the system.
+ * We wire this to 4 even though it may disagree with NR_CPUS.
+ *
+ * Also a good time to register the IPI interrupt handlers.
+ * The cpu_present_map was already setup via setup_arch, so we use it.
+ */
+static int smp_bluegene_probe(void)
+{
+	return  cpus_weight(cpu_possible_map);
+}
+
+/*
+ * Start a cpu by calling firmware.
+ */
+static void smp_bluegene_kick_cpu(int cpu)
+{
+	int ret = bluegene_takeCPU(cpu, 0, (void (*)(unsigned, void *))4);
+	if (ret == 0) {
+		cpu_set(cpu, cpu_present_map);
+	} else {
+		udbg_printf("CPU %d is not available (firmware returns %d)\n", cpu, ret);
+	}
+}
+
+/*
+ * Each secondary cpu needs some initialization.
+ */
+static void __init smp_bluegene_setup_cpu(int nr)
+{
+	int cpu = smp_processor_id();
+
+	flush_instruction_cache();
+	bgp_enable_icache_prefetch();
+	bgp_init_cns();		/* map CNS for this cpu */
+
+	bgp_init_IPI(cpu, PPC_MSG_CALL_FUNCTION);
+	bgp_init_IPI(cpu, PPC_MSG_RESCHEDULE);
+	bgp_init_IPI(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+	bgp_init_IPI(cpu, PPC_MSG_DEBUGGER_BREAK);
+}
+
+static struct smp_ops_t bluegene_smp_ops = {
+	.message_pass = smp_bluegene_message_pass,
+	.probe = smp_bluegene_probe,
+	.kick_cpu = smp_bluegene_kick_cpu,
+	.setup_cpu = smp_bluegene_setup_cpu,
+};
+#endif
+
+extern void
+  bgp_setup_arch_IRQ(void) ;
+/*
+ * Initialize CNS (Common Node Services) in bgp_cns.c.
+ * Once we have initialized CNS, we can crudely print messages with
+ * udbg_printf().
+ */
+static void __init bgp_setup_arch(void)
+{
+	ROOT_DEV = Root_RAM0;
+
+	bgp_enable_icache_prefetch();
+	bgp_init_cns();
+	bgp_setup_arch_IRQ() ;
+
+
+#ifdef CONFIG_SMP
+	smp_ops = &bluegene_smp_ops;
+#endif
+}
+
+define_machine(bgp) {
+	.name			= "bgp",
+	.probe			= bgp_probe,
+	.setup_arch		= bgp_setup_arch,
+	.init_IRQ		= bgp_init_IRQ,
+	.get_irq		= bgp_get_irq,
+	.restart		= (void (*)(char *))bgp_halt,
+	.power_off		= bgp_halt,
+	.halt			= bgp_halt,
+	.panic			= bgp_panic,
+	.calibrate_decr		= bgp_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/44x/bgp_bic.c b/arch/powerpc/platforms/44x/bgp_bic.c
new file mode 100644
index 0000000..b7b10c3
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_bic.c
@@ -0,0 +1,685 @@
+/*
+ * Blue Gene/P interrupt controller
+ *
+ * Linux wants IRQs mapped to a small integer space.
+ *
+ * The bic defines 15 groups and 32 group interrupts in each group.
+ * We encode an IRQ number like this (which requires NR_IRQS=512):
+ *    GGGGIIIII
+ *   where GGGG is the 4-bit group number+1 (i.e. GGGG=0000 is not used),
+ *   and IIIII is the 5-bit interrupt index within the 32-bit word.
+ * The interrupt indexes are numbered from the left bit (powerpc-style).
+ * We avoid encoding GGGG=0000 so we never end up with an IRO=0 which is a
+ * flag for "no interrupt" in arch/powerpc.
+ *
+ * The IPIs subdivide the group 0 interrupt word as follows:
+ *
+ *  CRSD CRSD CRSD CRSD .... .... .... ....
+ *  0    4    8    12   16   20   24   28
+ *  cpu0 cpu1 cpu2 cpu3
+
+ * where C=call, R=resched, S=call-single, D=debug, and .=unused
+ *
+ * We encode IPI IRQ numbers specially.   By the above encoding they would be
+ * 32..47 for these 16 bits.
+ *
+ * The other 16 bits in group 0 are treated normally.   These will translate to
+ * IRQ = 48..63 and can be used by software to simulate hardware interrupts for
+ * other purposes.
+ *
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <asm/bluegene.h>
+
+/* #define TJCW_USE_BYTEWISE */
+/* #define BIC_DIAGNOSE 1 */
+
+#if defined(BIC_DIAGNOSE)
+extern int bgp_dma_tcp_tracemask ;
+static int bic_diagnose_count  ;
+enum {
+	k_bic_diagnose_limit = 100
+};
+static unsigned int bic_diagnosing(void)
+{
+	if( 0 == (bgp_dma_tcp_tracemask & 0x80000000) )
+		{
+		if( bic_diagnose_count < k_bic_diagnose_limit)
+			{
+				bic_diagnose_count += 1 ;
+				return 1 ;
+			}
+		}
+	else
+		{
+			bic_diagnose_count = 0 ;
+		}
+	return 0 ;
+}
+#define BIC_DIAG(X) if(bic_diagnosing()) { X ; }
+#else
+#define BIC_DIAG(X)
+#endif
+
+void bic_unmask_irq(unsigned int irq);
+EXPORT_SYMBOL(bic_unmask_irq) ;
+static void bic_mask_irq(unsigned int irq);
+static void bic_eoi_irq(unsigned int irq);
+
+static void bic_unmask_irq_bytewise(unsigned int irq) __attribute__((unused)) ;
+
+static void bic_mask_irq_bytewise(unsigned int irq) __attribute__((unused)) ;
+#if defined(TJCW_USE_BYTEWISE)
+static struct irq_chip bgp_irq_chip = {
+	.name		= "BIC",
+	.unmask		= bic_unmask_irq_bytewise,
+	.mask		= bic_mask_irq_bytewise,
+	.eoi		= bic_eoi_irq,
+};
+#else
+static struct irq_chip bgp_irq_chip = {
+	.name		= "BIC",
+	.unmask		= bic_unmask_irq,
+	.mask		= bic_mask_irq,
+	.eoi		= bic_eoi_irq,
+};
+#endif
+
+
+/* Note that the BIC (and other devices) are at phys addresses > 4GB */
+#define BIC_PHYS 0x730000000LL
+
+/* These are defined by the hardware. */
+#define NR_BIC_GROUPS 15
+#define NR_BIC_GINTS 32
+#define NR_BIC_CPUS 4
+
+/* 4-bit target value for target register */
+#define BIC_TARGET_MASK (0xf)
+#define BIC_TARGET_TYPE_NORMAL (1<<2)
+#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu))
+#define BIC_DEFAULT_CPU 0
+#define BIC_IPI_GROUP 0
+
+/* Define the layout of each group's registers.
+ * This layout should be 0x80 bytes long (including pad).
+ */
+struct bic_group_regs {
+	uint32_t status;			/* 0x00  RW */
+	uint32_t rd_clr_status;			/* 0x04  RO */
+	uint32_t status_clr;			/* 0x08  WO */
+	uint32_t status_set;			/* 0x0c  WO */
+	uint32_t target[4];			/* 0x10  RW */
+	uint32_t normal[NR_BIC_CPUS];		/* 0x20  RW */
+	uint32_t critical[NR_BIC_CPUS];		/* 0x30  RW */
+	uint32_t mcheck[NR_BIC_CPUS];		/* 0x40  RW */
+	uint32_t _pad[12];			/* 0x50     */
+};
+
+/* Define the layout of the interrupt controller mem mapped regs. */
+struct bic_regs {
+	struct bic_group_regs group[NR_BIC_GROUPS];		/* 0x000 */
+	uint32_t hier_normal[NR_BIC_CPUS];			/* 0x780 */
+	uint32_t hier_critical[NR_BIC_CPUS];			/* 0x790 */
+	uint32_t hier_mcheck[NR_BIC_CPUS];			/* 0x7a0 */
+};
+
+/*  This table is indexed by 'real' IRQ, i.e. BIC values. Linux 'virtual' IRQs are +32 */
+static volatile unsigned char intended_cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] =
+  {
+/*  0 */
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),
+      BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),
+      BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),
+
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/*  32 */
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/*  64 */
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/*  128 */
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/*  256 */
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+      BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0)
+/*  480 */
+
+  };
+
+static inline void out_be8(unsigned char * target, unsigned int val)
+{
+	*target = val ;
+}
+
+static inline unsigned int in_be8(unsigned char * target)
+{
+	return *target ;
+}
+
+/* Group is encoded in the upper 4 bits.   We account for group+1. */
+static inline unsigned bic_irq_to_hwgroup(unsigned irq)
+{
+	return ((irq >> 5) & 0xf) - 1;
+}
+/* Gint is encoded in the bottom 5 bits. */
+static inline unsigned bic_irq_to_hwgint(unsigned irq)
+{
+	return irq & 0x1f;
+}
+
+static inline unsigned bic_irq_to_hwirq(unsigned irq)
+{
+	return irq - (1 << 5);
+}
+
+/* bic_hw_to_irq(unsigned group, unsigned gint) is in bluegene.h */
+/* Need to keep a track in memory of where each interrupt is pointed at
+ * so we can reassemble the right hardware register contents even with SMP behaviour
+ */
+static volatile unsigned char cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] ;
+static void set_cpu_for_hwirq(unsigned int hwirq, unsigned int tcpu)
+  {
+    cpu_for_irq[hwirq] = tcpu ;
+  }
+
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu)
+  {
+	  unsigned int hwirq=bic_irq_to_hwirq(irq) ;
+    if( irq < NR_BIC_GROUPS*NR_BIC_GINTS )
+      {
+        intended_cpu_for_irq[hwirq] = BIC_TARGET_NORMAL(cpu) ;
+      }
+	BIC_DIAG(printk(KERN_INFO "bic_set_cpu_for_irq irq=0x%02x cpu=%d hwirq=0x%02x\n",
+			irq,cpu,hwirq)) ;
+  }
+
+/*  Stop the BIC from passing an interrupt to the CPU. The idea is to */
+/*  call this in a FLIH if you don't want a 'reinterrupt', and call */
+/*  'bic_set_cpu_for_irq' later on (e.g. from a NAPI 'poll') */
+void bic_disable_irq(unsigned int irq)
+  {
+    if( irq < NR_BIC_GROUPS*NR_BIC_GINTS )
+      {
+        intended_cpu_for_irq[bic_irq_to_hwirq(irq)] = 0 ;
+      }
+  }
+
+EXPORT_SYMBOL(bic_disable_irq) ;
+
+int bic_get_cpu_for_irq(unsigned int irq)
+  {
+    return intended_cpu_for_irq[bic_irq_to_hwirq(irq)] ;
+  }
+
+
+struct bic {
+	spinlock_t mask_lock;	/* could be finer grained if necessary */
+	struct bic_regs *regs;
+	uint32_t enabled_mask[NR_BIC_GROUPS] ; /* Hardware can report status even if a bit doesn't cause interrupt. This to mask off ... */
+} bic;
+
+
+/* ipi_to_irq(cpu, msg)
+ * Produce a Linux IRQ number given a cpu+func.
+ * The caller ensures cpu in 0..3 and func in 0..3.
+ */
+static inline unsigned ipi_to_irq(unsigned cpu, unsigned func)
+{
+	return bic_hw_to_irq(BIC_IPI_GROUP, (cpu<<2)+func);
+}
+/* Generate a 4-bit IPI range mask for this cpu retaining the unused bits. */
+static inline unsigned ipi_mask(unsigned cpu)
+{
+	return 0xf0000000U >> (cpu << 2) | 0x0000ffffU;
+}
+/* Given an gint we know is an IPI (0..15), return the cpu that
+ * should be targeted.  Remember these bits are numbered from the left.
+ */
+static inline unsigned ipi_gint_cpu(unsigned gint)
+{
+	return (gint >> 2) & 0x3;
+}
+static inline int is_ipi(unsigned group, unsigned gint)
+{
+	return (group == 0) && (gint < 16);
+}
+
+#define GINT_TO_IRQ(group, gint) (((group) << 5) | (gint))
+static unsigned int get_tcpu_for_tnum(unsigned int group, unsigned int tnum)
+  {
+    unsigned int rbase = GINT_TO_IRQ(group,(tnum<<3)) ;
+    unsigned int t0 = cpu_for_irq[rbase+0] ;
+    unsigned int t1 = cpu_for_irq[rbase+1] ;
+    unsigned int t2 = cpu_for_irq[rbase+2] ;
+    unsigned int t3 = cpu_for_irq[rbase+3] ;
+    unsigned int t4 = cpu_for_irq[rbase+4] ;
+    unsigned int t5 = cpu_for_irq[rbase+5] ;
+    unsigned int t6 = cpu_for_irq[rbase+6] ;
+    unsigned int t7 = cpu_for_irq[rbase+7] ;
+    return ((t0 & 0x0f) << 28) |
+           ((t1 & 0x0f) << 24) |
+           ((t2 & 0x0f) << 20) |
+           ((t3 & 0x0f) << 16) |
+           ((t4 & 0x0f) << 12) |
+           ((t5 & 0x0f) << 8) |
+           ((t6 & 0x0f) << 4) |
+           ((t7 & 0x0f)) ;
+
+  }
+static unsigned int get_tcpu_for_tnum_byte(unsigned int group, unsigned int tnum)
+  {
+    unsigned int rbase = GINT_TO_IRQ(group,(tnum<<1)) ;
+    unsigned int t0 = cpu_for_irq[rbase+0] ;
+    unsigned int t1 = cpu_for_irq[rbase+1] ;
+    return ((t0 & 0x0f) << 4) |
+           ((t1 & 0x0f)) ;
+
+  }
+/*
+ * Unmasking an IRQ will enable it.
+ * We reach into the bic to set the target core of the interrupt appropriately.
+ * For now, interrupts are wired to a default core, although IPIs (of course)
+ * must be directed appropriately.
+ */
+void bic_unmask_irq(unsigned int irq)
+{
+	unsigned group = bic_irq_to_hwgroup(irq);
+	unsigned gint = bic_irq_to_hwgint(irq);
+	unsigned tnum = gint >> 3;
+	unsigned tidx = gint & 7;
+/* 	unsigned orig, tmask, tcpu; */
+	unsigned tmask, tcpu;
+	uint32_t *targetp = &bic.regs->group[group].target[tnum];
+	unsigned cpu;
+	unsigned int request_tcpu ;
+	unsigned int verify_tcpu ;
+
+	spin_lock(&bic.mask_lock);
+	bic.enabled_mask[group] |= 0x80000000 >> gint ;  /*  Note that this interrupt is enabled */
+	spin_unlock(&bic.mask_lock);
+
+	tmask= ~(0xf << (7-tidx)*4);
+
+	if (group == 0 /*is_ipi(group, gint)*/) {
+		/* These bits are magic.  We know they are for IPIs
+		 * and must direct them to the correct core.
+		 */
+		cpu = ipi_gint_cpu(gint);
+		tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4;
+	} else {
+		cpu = BIC_DEFAULT_CPU;
+		tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4;
+	}
+
+
+	{
+		unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+		unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ;  /*  Note .. 'cpu' has the b'0100' bit set already if appropriate */
+		set_cpu_for_hwirq(hwirq,tgtcpu) ;
+		request_tcpu=get_tcpu_for_tnum(group,tnum) ;
+/* 		BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */
+/* 				irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */
+		BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n",
+				irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ;
+
+		out_be32(targetp, request_tcpu) ;
+		verify_tcpu=get_tcpu_for_tnum(group,tnum) ;
+		while(request_tcpu != verify_tcpu)
+		{
+			 /*  If another CPU changed the target for an interrupt while we were writing, pick up the change */
+			 /*  and set the hw register appropriately. Eventually the last writer should reflect what */
+			 /*  everyone wants. */
+			request_tcpu = verify_tcpu ;
+			printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ;
+			out_be32(targetp, request_tcpu) ;
+			verify_tcpu=get_tcpu_for_tnum(group,tnum) ;
+		}
+
+	}
+
+}
+static void bic_unmask_irq_bytewise(unsigned int irq)
+{
+	unsigned group = bic_irq_to_hwgroup(irq);
+	unsigned gint = bic_irq_to_hwgint(irq);
+	unsigned tnum = gint >> 1;
+	unsigned tidx = gint & 1;
+/* 	unsigned orig, tmask, tcpu; */
+	unsigned tmask;
+	unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ;
+	unsigned char *targetp = basep+tnum ;
+	unsigned cpu;
+	unsigned int request_tcpu ;
+	unsigned int verify_tcpu ;
+
+	spin_lock(&bic.mask_lock);
+	bic.enabled_mask[group] |= 0x80000000 >> gint ;  /*  Note that this interrupt is enabled */
+	spin_unlock(&bic.mask_lock);
+
+	tmask= ~(0xf << (1-tidx)*4);
+
+	if (group == 0 /*is_ipi(group, gint)*/) {
+		/* These bits are magic.  We know they are for IPIs
+		 * and must direct them to the correct core.
+		 */
+		cpu = ipi_gint_cpu(gint);
+	} else {
+		cpu = BIC_DEFAULT_CPU;
+	}
+
+
+	{
+		unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+		unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ;  /*  Note .. 'cpu' has the b'0100' bit set already if appropriate */
+		set_cpu_for_hwirq(hwirq,tgtcpu) ;
+		request_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+/* 		BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */
+/* 			irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */
+		BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n",
+			irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ;
+
+		out_be8(targetp, request_tcpu) ;
+		verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+		while(request_tcpu != verify_tcpu)
+		{
+			 /*  If another CPU changed the target for an interrupt while we were writing, pick up the change */
+			 /*  and set the hw register appropriately. Eventually the last writer should reflect what */
+			 /*  everyone wants. */
+			request_tcpu = verify_tcpu ;
+			printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ;
+			out_be8(targetp, request_tcpu) ;
+			verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+		}
+
+	}
+
+}
+
+/*
+ * Masking an IRQ will disable it.
+ * We do this by changing the target to disable.   This works for IPI bits,
+ */
+static void bic_mask_irq(unsigned int irq)
+{
+	unsigned group = bic_irq_to_hwgroup(irq);
+	unsigned gint = bic_irq_to_hwgint(irq);
+	unsigned tnum = gint >> 3;
+	unsigned tidx = gint & 7;
+	unsigned orig, tmask;
+	uint32_t *targetp = &bic.regs->group[group].target[tnum];
+
+	tmask = BIC_TARGET_MASK << (7-tidx)*4;
+	BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n",
+			irq,group,gint,tmask)) ;
+	spin_lock(&bic.mask_lock);
+	bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ;  /*  Note that this interrupt is disabled */
+	orig = in_be32(targetp);
+	out_be32(targetp, orig & ~tmask);
+	spin_unlock(&bic.mask_lock);
+}
+
+static void bic_mask_irq_bytewise(unsigned int irq)
+{
+	unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+	unsigned group = bic_irq_to_hwgroup(irq);
+	unsigned gint = bic_irq_to_hwgint(irq);
+	unsigned tnum = gint >> 1;
+	unsigned tidx = gint & 1;
+	unsigned orig, tmask;
+	unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ;
+	unsigned char *targetp = basep+tnum ;
+
+	set_cpu_for_hwirq(hwirq,0) ;
+	tmask = BIC_TARGET_MASK << ((1-tidx)*4);
+	BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n",
+			irq,group,gint,tmask)) ;
+	spin_lock(&bic.mask_lock);
+	bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ;  /*  Note that this interrupt is disabled */
+	orig = in_be8(targetp);
+	out_be8(targetp, orig & ~tmask);
+	spin_unlock(&bic.mask_lock);
+}
+
+/*
+ * End an interrupt.   We just need to write the bit to be cleared
+ * and the hardware handles it.   No locking needed.
+ */
+static void bic_eoi_irq(unsigned int irq)
+{
+	unsigned group = bic_irq_to_hwgroup(irq);
+	unsigned gint = bic_irq_to_hwgint(irq);
+	uint32_t gintbits = 1 << (31 - gint);
+/* 	BIC_DIAG(printk(KERN_INFO "bic_eoi_irq irq=0x%02x group=0x%02x gint=0x%02x \n",irq,group,gint)) ; */
+
+	out_be32(&bic.regs->group[group].status_clr, gintbits);
+	mb();
+}
+
+/* Return the hardware cpu index as needed by the bic.
+ * Currently this matches smp_processor_id(), but we do this explicitly
+ * in case we ever want to virtualize the processor id.
+ */
+static inline unsigned this_cpu(void)
+{
+	unsigned cpu;
+	asm volatile("mfspr %0, 0x11e" : "=r" (cpu));
+	return cpu;
+}
+
+/* Return 0..32 counting from the left (same as bic).  32=> no bit set.
+ * Could use bitops.h as long as it always matches the bic.
+ */
+static inline unsigned bic_find_first_bit(unsigned x)
+{
+    unsigned lz;
+    asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+    return lz;
+}
+
+/*
+ * Get an IRQ from the BIC.
+ * We analyze the normal hierarchy register to find which group has caused an
+ * interrupt.   Similarily, we find the first bit within a group to find the first
+ * source of interrupt.   This artificially prioritizes interrupts.
+ *
+ * We handle IPIs specially.   This core can see IPI bits which did not actually
+ * interrupt this core.   We mask off those bits and otherwise process normally.
+ */
+unsigned int bgp_get_irq(void)
+{
+	unsigned thiscpu = this_cpu();
+	unsigned nhier, group, gint;
+	uint32_t gintbits;
+	int irq = NO_IRQ;
+
+	nhier = in_be32(&(bic.regs->hier_normal[thiscpu]));
+	group = bic_find_first_bit(nhier);
+	if (group >= NR_BIC_GROUPS)
+		goto out;
+		{
+			gintbits = in_be32(&bic.regs->group[group].status) & bic.enabled_mask[group] ;
+			if (group == BIC_IPI_GROUP) {
+				/* This may be an IPI.  Mask out other cpu IPI bits so we don't try
+				 * to handle it on this core!   We don't mask the other 16 bits.
+				 */
+				unsigned mask = ipi_mask(thiscpu);
+				gintbits &= mask;
+			}
+			gint = bic_find_first_bit(gintbits);
+		}
+	if (gint >= NR_BIC_GINTS)
+		goto out;
+	irq = bic_hw_to_irq(group, gint);
+out:
+/* 	BIC_DIAG(printk(KERN_INFO "bgp_get_irq nhier=0x%02x group=0x%02x gintbits=0x%08x gint=0x%02x irq=0x%02x\n", */
+/* 			nhier,group,gintbits,gint,irq)) ; */
+	return irq;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Send an IPI to another cpu.
+ * This could be coded to send to a cpu mask.
+ */
+enum {
+	k_spinlimit = 1000000 ,
+	k_reportlimit = 100
+};
+static unsigned int reportcount  ;
+void bgp_send_ipi(int cpu, int msg)
+{
+    unsigned group = BIC_IPI_GROUP;
+    unsigned gint = ipi_to_irq(cpu, msg) & 0x1f;
+    uint32_t gintbits = 1 << (31 - gint);
+    uint32_t ngintbits;
+    unsigned int spincount = 0 ;
+
+    /* If this interrupt is already raised we must wait for it to complete else
+     * we might race with the ack by the other waiting cpu.
+     * Once it is clear there is no guarantee another cpu won't take it in tandem
+     * with this cpu.  Currently that is ok, because a reschedule race is harmless
+     * as the goal of rescheduling is met, and the others hold a lock while the
+     * operation is in progress.  Why doesn't the lock protect us?  There is a window
+     * between the lock release and the IPI interrupt ack where we will race.
+     * This plugs the race.  It may be better to reallocate the IPI bits for unique
+     * core-to-core combinations.
+     */
+    do {
+	    spincount += 1 ;
+	    ngintbits = in_be32(&bic.regs->group[group].status);
+    } while ( (ngintbits & gintbits) && (spincount < k_spinlimit) ) ;
+
+    /* Pull the interrupt. */
+    if( spincount < k_spinlimit)
+	    {
+		    out_be32(&bic.regs->group[group].status_set, gintbits);
+	    }
+    else
+	    {
+		    if(reportcount < k_reportlimit)
+			    {
+				    printk(KERN_WARNING "bgp_send_ipi cpu=%d msg=%d stuck\n", cpu, msg) ;
+				    reportcount += 1;
+			    }
+	    }
+}
+
+/* Initialize an IPI handler.   This is only here to use ipi_to_irq(), which
+ * could be exposed in bluegene.h.
+ */
+void bgp_init_IPI(int cpu, int msg)
+{
+	smp_request_message_ipi(ipi_to_irq(cpu, msg), msg);
+}
+#endif
+
+/* Initialize the bic.
+ * We set the handlers as percpu because bic interrupts are wired
+ * to specific cores (we never broadcast to all cores).
+ */
+static void __init
+  disable_all_bic_interrupts(void)
+{
+	int group ;
+	struct bic_regs * regs = bic.regs ;
+	for(group=0; group<NR_BIC_GROUPS; group += 1)
+		{
+			struct bic_group_regs *group_regs = regs->group+group ;
+			group_regs->target[0] = 0 ;
+			group_regs->target[1] = 0 ;
+			group_regs->target[2] = 0 ;
+			group_regs->target[3] = 0 ;
+			bic.enabled_mask[group] = 0 ;
+		}
+}
+
+void __init
+  bgp_setup_arch_IRQ(void)
+  {
+	bic.regs = ioremap(BIC_PHYS, sizeof(*bic.regs));
+	disable_all_bic_interrupts() ;
+
+  }
+void __init
+  bgp_init_IRQ(void)
+{
+        int irq;
+
+//	bic.regs = ioremap(BIC_PHYS, sizeof(*bic.regs));
+//	disable_all_bic_interrupts() ;
+	bic.mask_lock = SPIN_LOCK_UNLOCKED;
+	for_each_irq(irq) {
+		/* Interrupts from the BIC are percpu (we don't use broadcast)
+		 * so we may as well take the cycle advantage and declare it.
+		 */
+		set_irq_chip_and_handler(irq, &bgp_irq_chip, handle_percpu_irq);
+	}
+}
+
+EXPORT_SYMBOL(bic) ;
+EXPORT_SYMBOL(bic_set_cpu_for_irq) ;
diff --git a/arch/powerpc/platforms/44x/bgp_cns.c b/arch/powerpc/platforms/44x/bgp_cns.c
new file mode 100644
index 0000000..1ea06ae
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_cns.c
@@ -0,0 +1,398 @@
+/*
+ * Blue Gene/P Common Node Services (CNS) wrappers
+ *
+ * These are declared in asm/bluegene.h but implemented here.
+ *
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Author: Todd Inglett <tinglett@us.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <asm/pgtable.h>
+#include <asm/bluegene.h>
+#include <asm/bgcns.h>
+#ifdef CONFIG_ZEPTO
+#include <asm/udbg.h>
+#include <asm/bgp_personality.h>
+#include <mm/mmu_decl.h>
+#include <linux/zepto_debug.h>
+#endif
+
+/* The descriptor for CNS identifies location and entry point of firmware.
+ * We re-build it from data passed through the ibm,bluegene-cns device tree entry.
+ */
+BGCNS_Descriptor bgcnsd;
+BGCNS_Descriptor bgcnsd_orig;
+
+/* These functions spin on specific errors when we can't print messages.
+ * They make it easy to find the cause of the error by finding the iar in the
+ * kernel System.map.
+ */
+static void noinline __init bgp_fatal_no_ibm_bluegene_cns(void) { for (;;); }
+static void noinline __init bgp_fatal_no_base_va(void) { for (;;); }
+static void noinline __init bgp_fatal_no_base_pa(void) { for (;;); }
+static void noinline __init bgp_fatal_no_services(void) { for (;;); }
+static void noinline __init bgp_fatal_no_size(void) { for (;;); }
+static void noinline __init bgp_fatal_no_version(void) { for (;;); }
+
+/* Get the descriptor for CNS from the device tree.
+ * Don't inline so we can make out the stack trace easier when it isn't working.
+ */
+static void noinline __init get_cns_descriptor(BGCNS_Descriptor *bgcnsd)
+{
+	int len;
+	const unsigned *reg;
+	struct device_node *devcns = of_find_node_by_path("/ibm,bluegene/cns");
+
+	if (!devcns) bgp_fatal_no_ibm_bluegene_cns();
+
+	reg = of_get_property(devcns, "base-va", &len);
+	if (!reg) bgp_fatal_no_base_va();
+	bgcnsd->baseVirtualAddress = *reg;
+	reg = of_get_property(devcns, "base-pa", &len);
+	if (!reg) bgp_fatal_no_base_pa();
+	bgcnsd->basePhysicalAddress = *reg;
+	bgcnsd->basePhysicalAddressERPN = 0;	/* assumes DDR <= 4G */
+	reg = of_get_property(devcns, "services", &len);
+	if (!reg) bgp_fatal_no_services();
+	bgcnsd->services = (void *)(*reg);
+	reg = of_get_property(devcns, "size", &len);
+	if (!reg) bgp_fatal_no_size();
+	bgcnsd->size = *reg;
+	reg = of_get_property(devcns, "version", &len);
+	if (!reg) bgp_fatal_no_version();
+	bgcnsd->version = *reg;
+}
+
+void __init ppc44x_update_tlb_hwater(void);	/* from mm/44x_mmu.c */
+
+//static void noinline __init map_cns(BGCNS_Descriptor *bgcnsd)
+static void noinline  map_cns(BGCNS_Descriptor *bgcnsd)
+{
+	unsigned word0, word1, word2;
+	int entry = 62;	/* We reserve one of the PPC44x_EARLY_TLBS in asm/mmu-44x.h */
+
+	word0 = (bgcnsd->baseVirtualAddress & 0xfffff000) | PPC44x_TLB_VALID | PPC44x_TLB_256K;
+	word1 = (bgcnsd->basePhysicalAddress & 0xfffff000) | (bgcnsd->basePhysicalAddressERPN & 0xf);
+	word2 = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_WL1 | PPC44x_TLB_U2;
+	__asm__ __volatile__(
+		"tlbwe	%1,%0,0\n"
+		"tlbwe	%2,%0,1\n"
+		"tlbwe	%3,%0,2\n"
+		"isync\n" : : "r" (entry), "r" (word0), "r" (word1), "r" (word2));
+}
+
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+unsigned bgpers_rank;
+#endif
+
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+char cns_buf[256*1024]
+__attribute__((__section__(".cns.256KB_aligned")));
+
+void erase_CNS_orig(void)
+{
+    unsigned flags;
+
+    local_save_flags(flags);
+    local_irq_disable();
+
+    map_cns(&bgcnsd_orig);
+    memset( (void*)bgcnsd.baseVirtualAddress, 0, bgcnsd.size );
+    map_cns(&bgcnsd);
+
+    local_irq_restore(flags);
+
+    printk("Erased orig CNS pa:%08x\n",  bgcnsd_orig.basePhysicalAddress);
+}
+#endif
+
+extern int map_page(unsigned long va, phys_addr_t pa, int flags);
+
+/* bgp_init_cns() might be called more than one time. 
+   No interrupt occur here, btw?
+ */
+void __init bgp_init_cns(void)
+{
+	unsigned long v_start, v_end, v, p;
+
+	if (bgcnsd.size == 0) {
+		/* Get the descriptor, map CNS, and tell Linux about the mapping. */
+		get_cns_descriptor(&bgcnsd);
+		v_start = bgcnsd.baseVirtualAddress;
+		v_end = v_start + bgcnsd.size;
+		v_start -= PAGE_SIZE;		/* hack: reserve 1 extra page */
+		v = v_start;
+		p = bgcnsd.basePhysicalAddress;	/* always < 4G */
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+		{
+		    BGP_Personality_t bgpers;
+		    map_cns(&bgcnsd); /* to access the CNS region, udbg_printf, personality */
+		    bluegene_getPersonality(&bgpers, sizeof(BGP_Personality_t));
+		    bgpers_rank = bgpers.Network_Config.Rank;
+		}
+#endif
+
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+		{
+		    unsigned flags;
+
+		    map_cns(&bgcnsd); /* to access the CNS region, udbg_printf, personality */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BGP
+		    udbg_printf("Relocating CNS... pa:%08x to pa:%08x\n", 
+				bgcnsd.basePhysicalAddress, (unsigned)cns_buf-(unsigned)PAGE_OFFSET);
+#endif
+		    local_save_flags(flags);
+		    local_irq_disable();
+
+		    bgcnsd_orig = bgcnsd;
+		    bgcnsd.basePhysicalAddress = (unsigned)cns_buf - (unsigned)PAGE_OFFSET;
+
+		    /* simply copy CNS to pre-allocated space which is covered by kernel TLB */
+		    memcpy( (void*)cns_buf, (void*)bgcnsd_orig.baseVirtualAddress, bgcnsd_orig.size);
+
+		    asm volatile ("dccci 0,0"  : : : "memory");  /* dcache all */
+		    asm volatile ("iccci 0,0"  : : : "memory");  /* icache all */
+		    asm volatile ("isync");
+
+		    map_cns(&bgcnsd); /* reload new tlb */
+		    
+		    local_irq_restore(flags);
+#ifdef CONFIG_PPC_EARLY_DEBUG_BGP
+		    udbg_printf("CNS relocated\n");
+#endif
+		}
+#else
+		/* We must be careful because we could hit 4G and wrap to v == 0.
+		 * Hence the v > v_start check.
+		 */
+		for (; v < v_end && v > v_start; v += PAGE_SIZE, p += PAGE_SIZE)
+			map_page(v, p, _PAGE_RAM_TEXT);
+#endif
+	}
+
+	map_cns(&bgcnsd);
+}
+
+/* Simple udbg_putc.   We perform rudimentary buffering so it is readable. */
+static int bgp_udbg_cur = 0;
+static char bgp_udbg_buf[256];
+void bgp_udbg_putc(char c)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    /* ZXXX: support command line later */
+    if( bgpers_rank != 0 ) return;
+#endif
+
+	bgp_udbg_buf[bgp_udbg_cur++] = c;
+	if (c == '\n' || bgp_udbg_cur >= sizeof(bgp_udbg_buf)) {
+		if (bgcnsd.size)
+			bluegene_writeToMailboxConsole(bgp_udbg_buf, bgp_udbg_cur);
+		bgp_udbg_cur = 0;
+	}
+}
+
+
+#define CALLCNS(service) \
+	({ unsigned flags; \
+	   typeof(bgcnsd.services->service) ret; \
+	   local_save_flags(flags); \
+	   local_irq_disable(); \
+	   ret = bgcnsd.services->service; \
+	   local_irq_restore(flags); \
+	   ret; \
+	 })
+
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+#define CALLCNS_ORIG(service) \
+	({ unsigned flags; \
+	   typeof(bgcnsd.services->service) ret; \
+	   local_save_flags(flags); \
+	   local_irq_disable(); \
+  	   map_cns(&bgcnsd_orig); \
+	   ret = bgcnsd_orig.services->service; \
+  	   map_cns(&bgcnsd); \
+	   local_irq_restore(flags); \
+	   ret; \
+	 })
+#endif
+
+
+/* This returns non-zero if there is something in an input mailbox. */
+int bluegene_testInboxAttention(void)
+{
+	/* ToDo: this should be fast.  Read the DCR directly. */
+	return CALLCNS(testInboxAttention());
+}
+
+int bluegene_testForOutboxCompletion(void)
+{
+	return CALLCNS(testForOutboxCompletion());
+}
+
+int bluegene_writeRASEvent_nonBlocking(unsigned facility,
+				       unsigned unit,
+				       unsigned short err_code,
+				       unsigned numDetails,
+				       unsigned details[])
+{
+	return CALLCNS(writeRASEvent_nonBlocking(facility, unit, err_code, numDetails, details));
+}
+
+int bluegene_writeRASString(unsigned facility,
+			    unsigned unit,
+			    unsigned short err_code,
+			    char* str)
+{
+	return CALLCNS(writeRASString(facility, unit, err_code, str));
+}
+
+int bluegene_writeRASString_nonBlocking(unsigned facility,
+					unsigned unit,
+					unsigned short err_code,
+					char* str)
+{
+	return CALLCNS(writeRASString_nonBlocking(facility, unit, err_code, str));
+}
+
+int bluegene_writeToMailboxConsole(char *msg, unsigned msglen)
+{
+	return CALLCNS(writeToMailboxConsole(msg, msglen));
+}
+
+int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen)
+{
+	return CALLCNS(writeToMailboxConsole_nonBlocking(msg, msglen));
+}
+
+unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize)
+{
+	return CALLCNS(readFromMailboxConsole(buf, bufsize));
+}
+
+int bluegene_macResetPHY(void)
+{
+	return CALLCNS(macResetPHY());
+}
+    /* ! @brief Tests the MAC unit's link but does not block. */
+     /* ! @param[in] link_type specifies the type of link to be tested. */
+     /* ! @param[out] result points to the link status, which is valid only when the return code is */
+     /* !     BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+     /* !     indicates that it is inactive. */
+     /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+     /* !     test link sequence.  That is, callers should initiate a sequence with reset=1 and then */
+     /* !     if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+     /* !     reset=0. */
+     /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+     /* !     before returning.  If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+     /* !     to indicate that it needs additional time. */
+     /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+     /* !     if the reset operation is not yet complete.  BGCNS_RC_ERROR if the reset operation failed. */
+    int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+
+int bluegene_macTestRxLink(void)
+{
+	return CALLCNS(macTestLink(BGCNS_Receiver));
+}
+
+
+int bluegene_macTestTxLink(void)
+{
+	return CALLCNS(macTestLink(BGCNS_Transmitter));
+}
+
+int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg))
+{
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+	return CALLCNS_ORIG(takeCPU(cpu, arg, entry));
+#else
+	return CALLCNS(takeCPU(cpu, arg, entry));
+#endif
+}
+
+
+#ifdef CONFIG_ZEPTO
+
+/*
+  nprocs   mode
+  4        VN
+  2        DUAL
+  1        SMP
+
+  NOTE: this function is only called from arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c
+*/
+#define _BGP_PERS_PROCESSCONFIG_SMP   (0x0F000000)
+#define _BGP_PERS_PROCESSCONFIG_VNM   (0x08040201)
+#define _BGP_PERS_PROCESSCONFIG_2x2   (0x0C030000)
+
+void  bluegene_set_Kernel_Config_ProcessConfig(int nprocs)
+{
+    BGP_Personality_t* pers = bgcnsd.services->getPersonalityData();
+    switch(nprocs) {
+        case 4:
+            pers->Kernel_Config.ProcessConfig = _BGP_PERS_PROCESSCONFIG_VNM;
+            break;
+        case 2:
+            pers->Kernel_Config.ProcessConfig = _BGP_PERS_PROCESSCONFIG_2x2;
+            break;
+        default:
+            pers->Kernel_Config.ProcessConfig = _BGP_PERS_PROCESSCONFIG_SMP;
+    }
+
+    zepto_debug(1,"bluegene_set_Kernel_Config_ProcessConfig  nprocs=%d   ProcessConfig=%08x\n",
+                nprocs, pers->Kernel_Config.ProcessConfig );
+}
+#endif
+
+int bluegene_getPersonality(void *buff, unsigned buffSize)
+{
+	int sz;
+	unsigned flags;
+
+	local_save_flags(flags);
+	local_irq_disable();
+	sz = bgcnsd.services->getPersonalitySize();
+	if (sz > buffSize)
+		sz = buffSize;
+	memcpy(buff, bgcnsd.services->getPersonalityData(), sz);
+	local_irq_restore(flags);
+
+	return sz;
+}
+
+int bluegene_isIONode(void)
+{
+	int ret;
+	unsigned flags;
+	
+	local_save_flags(flags);
+	local_irq_disable();
+	ret = bgcnsd.services->isIONode();	
+	local_irq_restore(flags);
+	return ret;
+}
+
+int bluegene_mapXEMAC(void* baseAddr)
+{
+	return CALLCNS(mapDevice(BGCNS_XEMAC, baseAddr));
+}
+
+int bluegene_globalBarrier_nonBlocking(unsigned int channel, int reset, unsigned int timeoutInMillis)
+  {
+    return CALLCNS(globalBarrier_nonBlocking(channel,reset,timeoutInMillis)) ;
+  }
+
+EXPORT_SYMBOL(bluegene_getPersonality) ;
+EXPORT_SYMBOL(bluegene_globalBarrier_nonBlocking) ;
+EXPORT_SYMBOL(bgcnsd) ;
diff --git a/arch/powerpc/platforms/44x/bgp_pers.c b/arch/powerpc/platforms/44x/bgp_pers.c
new file mode 100644
index 0000000..1151b8e
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_pers.c
@@ -0,0 +1,345 @@
+/*
+ *
+ * Blue Gene personality /proc interface with the control system
+ *
+ * Copyright 2003,2005 International Business Machines
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * User apps can mmap /proc/personality to directly access the binary
+ * personality in SRAM (see bglpersonality.h), or they can read
+ * /proc/personality.sh which expands to shell commands (so it can be sourced)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <asm/bluegene.h>
+#include <asm/bgp_personality.h>
+
+
+static struct proc_dir_entry *personality_proc_entry = NULL;
+static struct proc_dir_entry *personality_sh_proc_entry = NULL;
+
+
+static BGP_Personality_t bgpers;
+
+/* Binary personality interface.  Doesn't need to be fast. */
+static int bgpersonality_read(char *page, char **start, off_t offset,
+			      int count, int *eof, void *data)
+{
+	bluegene_getPersonality(&bgpers, count);
+	memcpy(page, &bgpers, count);
+	*eof = 1;
+
+	return count;
+}
+
+
+static void* bgpers_sh_seq_start(struct seq_file* f,
+				loff_t* pos)
+{
+	return *pos <= 32 ? (void*) pos : (void*) NULL;
+}
+
+
+static void* bgpers_sh_seq_next(struct seq_file* f,
+				void* v,
+				loff_t* pos)
+{
+	return  ++(*pos) <= 32 ? (void*) pos : (void*) NULL;
+}
+
+
+static void bgpers_sh_seq_stop(struct seq_file* f,
+			       void* v)
+{
+	return;
+}
+
+
+/*  Produce a personality in a form parsable by a shell. */
+static int bgpers_sh_seq_show(struct seq_file* f,
+				void* v)
+{
+	loff_t offset = *((loff_t*) v);
+	BGP_UCI_ComputeCard_t* uci;
+
+	bluegene_getPersonality(&bgpers, sizeof(bgpers));
+	uci = (BGP_UCI_ComputeCard_t*) &bgpers.Kernel_Config.UniversalComponentIdentifier;
+
+	switch((unsigned long) offset) {
+		case 0:
+			seq_printf(f, "BG_UCI=%08x\n",
+				   bgpers.Kernel_Config.UniversalComponentIdentifier);
+			break;
+		case 1:
+			seq_printf(f, "BG_LOCATION=R%1x%1x-M%c-N%02d-J%02d\n",
+				   uci->RackRow, uci->RackColumn, (uci->Midplane ? '1' : '0'),
+				   uci->NodeCard, uci->ComputeCard);
+			break;
+		case 2:
+			seq_printf(f, "BG_MAC=%02x:%02x:%02x:%02x:%02x:%02x\n",
+				   bgpers.Ethernet_Config.EmacID[0],
+				   bgpers.Ethernet_Config.EmacID[1],
+				   bgpers.Ethernet_Config.EmacID[2],
+				   bgpers.Ethernet_Config.EmacID[3],
+				   bgpers.Ethernet_Config.EmacID[4],
+				   bgpers.Ethernet_Config.EmacID[5]);
+			break;
+		case 3:
+			seq_printf(f, "BG_IP=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.IPAddress.octet[12],
+				   bgpers.Ethernet_Config.IPAddress.octet[13],
+				   bgpers.Ethernet_Config.IPAddress.octet[14],
+				   bgpers.Ethernet_Config.IPAddress.octet[15]);
+			break;
+		case 4:
+			seq_printf(f, "BG_NETMASK=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.IPNetmask.octet[12],
+				   bgpers.Ethernet_Config.IPNetmask.octet[13],
+				   bgpers.Ethernet_Config.IPNetmask.octet[14],
+				   bgpers.Ethernet_Config.IPNetmask.octet[15]);
+			break;
+		case 5:
+			seq_printf(f, "BG_BROADCAST=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.IPBroadcast.octet[12],
+				   bgpers.Ethernet_Config.IPBroadcast.octet[13],
+				   bgpers.Ethernet_Config.IPBroadcast.octet[14],
+				   bgpers.Ethernet_Config.IPBroadcast.octet[15]);
+			break;
+		case 6:
+			seq_printf(f, "BG_GATEWAY=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.IPGateway.octet[12],
+			   	   bgpers.Ethernet_Config.IPGateway.octet[13],
+			    	   bgpers.Ethernet_Config.IPGateway.octet[14],
+			  	   bgpers.Ethernet_Config.IPGateway.octet[15]);
+			break;
+		case 7:
+        		seq_printf(f, "BG_MTU=%d\n", bgpers.Ethernet_Config.MTU);
+			break;
+		case 8:
+			seq_printf(f, "BG_FS=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.NFSServer.octet[12],
+				   bgpers.Ethernet_Config.NFSServer.octet[13],
+				   bgpers.Ethernet_Config.NFSServer.octet[14],
+				   bgpers.Ethernet_Config.NFSServer.octet[15]);
+			break;
+		case 9:
+			seq_printf(f, "BG_EXPORTDIR=\"%s\"\n", bgpers.Ethernet_Config.NFSExportDir);
+			break;
+		case 10:
+			seq_printf(f, "BG_SIMULATION=%d\n",
+			(bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_Simulation ? 1 : 0));
+			break;
+		case 11:
+			seq_printf(f, "BG_PSETNUM=%d\n", bgpers.Network_Config.PSetNum);
+			break;
+		case 12:
+			seq_printf(f, "BG_NUMPSETS=%d\n", bgpers.Network_Config.IOnodes);
+			break;
+		case 13:
+			seq_printf(f, "BG_NODESINPSET=%d\n", bgpers.Network_Config.PSetSize);
+			break;
+		case 14:
+			seq_printf(f, "BG_XSIZE=%d\n", bgpers.Network_Config.Xnodes);
+			break;
+		case 15:
+			seq_printf(f, "BG_YSIZE=%d\n", bgpers.Network_Config.Ynodes);
+			break;
+		case 16:
+			seq_printf(f, "BG_ZSIZE=%d\n", bgpers.Network_Config.Znodes);
+			break;
+		case 17:
+			seq_printf(f, "BG_VERBOSE=%d\n", (bgpers.Kernel_Config.TraceConfig & BGP_TRACE_VERBOSE) ? 1 : 0);
+			break;
+		case 18:
+			switch (bgpers.Network_Config.PSetSize) {
+				case 16:
+					seq_printf(f, "BG_PSETSIZE=\"4 2 2\"\n");
+					break;
+				case 32:
+					seq_printf(f, "BG_PSETSIZE=\"4 4 2\"\n");
+					break;
+				case 64:
+					seq_printf(f, "BG_PSETSIZE=\"4 4 4\"\n");
+					break;
+				case 128:
+					seq_printf(f, "BG_PSETSIZE=\"4 4 8\"\n");
+					break;
+				case 256:
+					seq_printf(f, "BG_PSETSIZE=\"8 4 8\"\n");
+					break;
+				case 512:
+					seq_printf(f, "BG_PSETSIZE=\"8 8 8\"\n");
+					break;
+				default:
+					seq_printf(f, "BG_PSETSIZE=\"? ? ?\"\n");
+			}
+			break;
+		case 19:
+/* 			if (bgpers.Network_Config.RankInPSet) */
+/* 				// Not an IO node so display pset origin. */
+				seq_printf(f, "BG_PSETORG=\"%d %d %d\"\n",
+					   bgpers.Network_Config.Xcoord,
+					   bgpers.Network_Config.Ycoord,
+					   bgpers.Network_Config.Zcoord);
+			break;
+		case 20:
+			seq_printf(f, "BG_CLOCKHZ=%d\n", bgpers.Kernel_Config.FreqMHz);
+			break;
+		case 21:
+			seq_printf(f, "BG_GLINTS=%d\n",
+				   (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_GlobalInts) ? 1 : 0);
+			break;
+		case 22:
+			seq_printf(f, "BG_ISTORUS=\"%s%s%s\"\n",
+				   (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshX) ? "X" : "",
+                        	   (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshY) ? "Y" : "",
+                        	   (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshZ) ? "Z" : "");
+			 break;
+		case 23: {
+			char blockID[BGP_PERSONALITY_LEN_NFSDIR+1];
+
+			strncpy(blockID, bgpers.Ethernet_Config.NFSMountDir, sizeof(blockID));
+			blockID[sizeof(blockID)-1] = '\0';
+			seq_printf(f, "BG_BLOCKID=\"%s\"\n", blockID);
+			break;
+		}
+		case 24:
+			seq_printf(f, "BG_SN=%d.%d.%d.%d\n",
+				   bgpers.Ethernet_Config.serviceNode.octet[12],
+				   bgpers.Ethernet_Config.serviceNode.octet[13],
+				   bgpers.Ethernet_Config.serviceNode.octet[14],
+				   bgpers.Ethernet_Config.serviceNode.octet[15]);
+			break;
+		case 25:
+			seq_printf(f, "BG_IS_IO_NODE=%d\n", (bgpers.Network_Config.RankInPSet ? 0 : 1));
+			break;
+		case 26:
+			seq_printf(f, "BG_RANK_IN_PSET=%d\nBG_RANK=%d\n",
+					bgpers.Network_Config.RankInPSet,
+					bgpers.Network_Config.Rank);
+			break;
+		case 27:
+			seq_printf(f, "BG_IP_OVER_COL=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollective) ? 1 : 0);
+			break;
+		case 28:
+			seq_printf(f, "BG_IP_OVER_TOR=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverTorus) ? 1 : 0);
+			break;
+		case 29:
+			seq_printf(f, "BG_IP_OVER_COL_VC=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0);
+			break;
+		case 30:
+			if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_MuxOnly))
+				seq_printf(f, "BG_CIO_MODE=MUX_ONLY\n");
+			else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_None))
+				seq_printf(f, "BG_CIO_MODE=NONE\n");
+			else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full))
+				seq_printf(f, "BG_CIO_MODE=FULL\n");
+			else
+				seq_printf(f, "BG_CIO_MODE=UNKNOWN\n");
+			break;
+		case 31:
+			if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+				seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv3\n");
+			else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv4))
+                                seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv4\n");
+			else
+				seq_printf(f, "BG_BGSYS_FS_TYPE=UNKNOWN\n");
+			break;
+		case 32:
+			seq_printf(f, "BG_HTC_MODE=%d\n",
+                                   (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_HighThroughput) ? 1 : 0);
+			break;
+		default:
+			seq_printf(f, "Illegal offset %d\n", (unsigned int) offset);
+	}
+
+	return 0;
+}
+
+void bgpersonality_cleanup_module(void)
+{
+	if (personality_proc_entry) {
+		remove_proc_entry(personality_proc_entry->name, NULL);
+	}
+
+	if (personality_sh_proc_entry) {
+		remove_proc_entry(personality_sh_proc_entry->name, NULL);
+	}
+}
+
+
+
+static struct seq_operations bgpers_sh_seq_ops = {
+	.start = bgpers_sh_seq_start,
+	.next = bgpers_sh_seq_next,
+	.stop = bgpers_sh_seq_stop,
+	.show = bgpers_sh_seq_show
+};
+
+
+
+static int bgpers_sh_proc_open(struct inode* inode,
+			       struct file* f)
+{
+	return seq_open(f, &bgpers_sh_seq_ops);
+}
+
+
+static struct file_operations bgpers_sh_fops = {
+	.owner = THIS_MODULE,
+	.open = bgpers_sh_proc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+
+
+int bgpersonality_init_module(void)
+{
+	personality_proc_entry = create_proc_read_entry("personality", 0644, NULL,
+							 bgpersonality_read, (void *) 0);
+	if (!personality_proc_entry)
+		goto out;
+
+	personality_sh_proc_entry = create_proc_entry("personality.sh", 0, NULL);
+	if (!personality_sh_proc_entry)
+		goto out;
+	else
+		personality_sh_proc_entry->proc_fops = &bgpers_sh_fops;
+
+	return 0;
+
+out:
+	bgpersonality_cleanup_module();
+
+	return -ENOMEM;
+}
+
+
+module_init(bgpersonality_init_module);
+module_exit(bgpersonality_cleanup_module);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e868b5c..928d46f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -231,7 +231,7 @@
 	  If in doubt, say Y here.
 
 config SMP
-	depends on PPC_STD_MMU || FSL_BOOKE
+	depends on PPC_STD_MMU || BOOKE
 	bool "Symmetric multi-processing support"
 	---help---
 	  This enables support for systems with more than one CPU. If you have
@@ -259,9 +259,13 @@
 config NOT_COHERENT_CACHE
 	bool
 	depends on 4xx || 8xx || E200 || PPC_MPC512x
+	default n if BGP
 	default y
 
 config CHECK_CACHE_COHERENCY
 	bool
 
+config L1_WRITETHROUGH
+	bool
+
 endmenu
diff --git a/arch/powerpc/syslib/bgdd/Makefile b/arch/powerpc/syslib/bgdd/Makefile
new file mode 100644
index 0000000..7dc535c
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/Makefile
@@ -0,0 +1,22 @@
+#CFLAGS += -Wa,-m450
+ 
+EXTRA_CFLAGS := -D__LINUX_KERNEL__ -Wno-declaration-after-statement
+
+obj-$(CONFIG_ZEPTO)             += zepto_bluegene_lockbox.o
+obj-$(CONFIG_ZEPTO_MEMORY)      += zepto_bigmem_explicit_mmap.o zepto_task.o zepto_setup_treeroute.o 
+obj-$(CONFIG_ZEPTO_COMPUTENODE) += zepto_bluegene_dma.o 
+obj-$(CONFIG_BGP_DMA)	        += bgp_dma_spi.o
+
+ZSPI_INC=-Iarch/powerpc/include/zspi/
+CFLAGS_zepto_bigmem_explicit_mmap.o=$(ZSPI_INC)
+CFLAGS_zepto_task.o=$(ZSPI_INC)
+CFLAGS_zepto_bluegene_dma.o=$(ZSPI_INC)
+CFLAGS_zepto_bluegene_lockbox.o=$(ZSPI_INC)
+CFLAGS_zepto_setup_treeroute.o=$(ZSPI_INC)
+
+
+
+bgp_dma_spi-y := bgp_dma_base.o 
+bgp_dma_spi-y += spi/DMA_InjFifo.o
+bgp_dma_spi-y += spi/DMA_RecFifo.o  
+bgp_dma_spi-y += spi/DMA_Descriptors.o 
diff --git a/arch/powerpc/syslib/bgdd/bgp_dma_base.c b/arch/powerpc/syslib/bgdd/bgp_dma_base.c
new file mode 100644
index 0000000..9608027
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/bgp_dma_base.c
@@ -0,0 +1,1292 @@
+/**********************************************************************
+ *
+ * Copyright (c) 2007, 2009 International Business Machines
+ * Chris Ward <tjcw@uk.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ **********************************************************************/
+
+
+/* ************************************************************************* */
+/*                includes                                                   */
+/* ************************************************************************* */
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <linux/vmalloc.h>
+
+#include <linux/hugetlb.h>
+/*  #include <asm/bluegene.h> */
+
+#include <asm/bgcns.h>
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_DIAGNOSE_BROKEN_SMP)
+#define TORNIC_TORUS_AFFINITY
+#endif
+
+/* int bgp_dma_irq ; */
+#if defined(TORNIC_TORUS_AFFINITY)
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+enum {
+  k_TorusAffinityCPU = 2
+};
+#endif
+
+#define TRACE(x) printk x
+
+#define CHECK_RET(x) if (x) { TRACE((KERN_INFO                                  \
+					"bgpdma: Return due error at line %d\n",\
+                                     __LINE__)); \
+                              return ret; }
+
+#undef CHECK_PARAM
+#define CHECK_PARAM(x) if (!(x)) { printk( KERN_INFO                             \
+				      "(E) bgpdma: Assertion failed in %s:%d\n", \
+				      __FILE__,__LINE__);                    \
+                                      return -EINVAL; }
+#undef  HPC_MODE
+/* #define HPC_MODE */
+
+
+/* ************************************************************************* */
+/*                                                          Include firmware */
+/* ************************************************************************* */
+
+/* ************************************************************************* */
+/* Defines and friends required by DMA SPI in kernel mode                    */
+/* ************************************************************************* */
+
+#include <spi/linux_kernel_spi.h>
+
+/* #include "bgp_bic_diagnosis.h" */
+/* ************************************************************************* */
+/*                             IOCTL commands                                */
+/* ************************************************************************* */
+
+/*  size of mmap'ed IO memory */
+#define BGP_DMA_MMAP_SIZE        (4096 * 4)
+/*  */
+///* ************************************************************************* */
+///*                  network device structures                                */
+///* ************************************************************************* */
+/*  */
+struct bgpdma_state_t
+{
+  uint32_t inj_counters[4];   /*  for each group, a bit mask of which injection counter subgroups allocated */
+                              /*  bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */
+  uint32_t rec_counters[4];   /*  for each group, a bit mask of which reception counter subgroups allocated */
+                              /*  bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */
+  uint32_t inj_fifos[4];      /*  for each group, a bit mask of which injection fifos have been allocated */
+                              /*  bits 0 - 31 are valid */
+
+  uint32_t rec_fifo_set_map;        /*  if 1, _bgp_DMA_RecFifoSetMap has already been called */
+
+  uint32_t rec_fifo_init[2];      /*  set bit to 1 if receive fifo has already been intialized, */
+                                  /*  bits 0-31 of rec_fifo_init[0] for normal fifos */
+                                  /*  bits 0-3  of rec_fifo_init[1] for header fifos */
+};
+
+/* max number of registered interrupt handlers */
+#define MAX_NUM_IRQ 4
+
+
+/* interrupt info sctructure */
+struct dma_irq
+{
+  int                        irq;         /*  irq number for this group */
+                                          /*  (fixed at module init time) */
+  Kernel_CommThreadHandler   func;
+  u32                        arg1;
+};
+
+
+
+struct bgpdma_dev_t
+{
+  unsigned long long    pa_addr;                  /* physical address */
+  struct bgpdma_state_t state;                    /* dma resource state */
+  struct dma_irq        irqInfo[ MAX_NUM_IRQ  ];  /* dma interrupts */
+};
+/*  */
+static struct bgpdma_dev_t bgpdma_dev;
+
+/* ************************************************************************* */
+/*                       Linux module header                                 */
+/* ************************************************************************* */
+
+MODULE_DESCRIPTION("BG/P DMA driver");
+MODULE_LICENSE("GPL");
+
+#define BGP_DMA_NAME  "bgpdma"
+
+/*  Threshold crossed irq number for rec fifo groups */
+#define DMA_RECFIFO_THRESHOLD_IRQ(group)  ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(28+group))
+#define DMA_RECFIFO_THRESHOLD_IRQ_GINT(group)  (28+group)
+
+/*  Threshold crossed irq number for rec fifo groups */
+#define TORUS_RECFIFO_WATERMARK_IRQ(fifo)  ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(8+fifo))
+#define TORUS_RECFIFO_WATERMARK_IRQ_GINT(fifo)  (8+fifo)
+
+/* ************************************************************************* */
+/*                 module initialization/cleanup                             */
+/* ************************************************************************* */
+
+static int  __init
+  bgpdma_module_init    (void);
+static void __exit
+  bgpdma_module_cleanup (void);
+
+extern BGCNS_Descriptor bgcnsd;
+
+module_init(bgpdma_module_init);
+module_exit(bgpdma_module_cleanup);
+
+/* ************************************************************************* */
+/*                       BG/P DMA initialization                             */
+/* ************************************************************************* */
+
+/*  dma physical address */
+#define _BGP_UA_DMA          (0x6)
+#define _BGP_PA_DMA          (0x00000000)
+
+/*  virtual kernel based address of DMA */
+void * bgpdma_kaddr;
+EXPORT_SYMBOL(bgpdma_kaddr);
+
+
+/*  check if DMA is mapped by the kernel */
+#define CHECK_DMA_ACCESS  if ( ! bgpdma_kaddr ) { printk( KERN_INFO "(E) DMA is not mapped\n"); return -ENODEV; }
+
+
+
+/* dma interrupt handler */
+/* static unsigned int dmaHandlerCount ; */
+irqreturn_t dmaIrqHandler(int irq, void * arg)
+{
+  struct dma_irq * irqInfo = ( struct dma_irq * )arg;
+
+
+/*   dmaHandlerCount += 1 ; */
+/*   if( irq != 92 || dmaHandlerCount < 20 ) */
+/*     { */
+/*   printk( KERN_INFO "(I) bgpdma: rec fifo irq dmaIrqHandler called irq:%d arg:%08x\n", */
+/* 	  irq, (int)arg); */
+/* //  show_bic_regs() ; */
+/*     } */
+  (*irqInfo->func)(irqInfo->arg1,0,0,0);
+  return IRQ_HANDLED;
+}
+
+/* irqreturn_t watermarkIrqHandler(int irq, void * arg) */
+/* { */
+/*   struct dma_irq * irqInfo = ( struct dma_irq * )arg; */
+/*  */
+/*  */
+/*   dmaHandlerCount += 1 ; */
+/*   if( irq != 92 || dmaHandlerCount < 20 ) */
+/*     { */
+/*   printk( KERN_INFO "(I) bgpdma: rec fifo irq watermarkIrqHandler called irq:%d arg:%08x\n", */
+/*           irq, (int)arg); */
+/* //  show_bic_regs() ; */
+/*     } */
+/*   (*irqInfo->func)(irqInfo->arg1,0,0,0); */
+/*   return IRQ_HANDLED; */
+/* } */
+
+irqreturn_t dummyIrqHandler(int irq, void * arg)
+{
+  printk( KERN_INFO "(I) bgpdma: dummy irq handler called irq:%d arg:%08x\n",
+	  irq, (int)arg);
+  return IRQ_HANDLED;
+}
+
+
+static int /*__init*/ bgpdma_module_init (void)
+{
+/*  int ret = -1; */
+/*  dev_t devno; */
+
+  TRACE((
+		  KERN_INFO "bgpdma: module initialization\n"
+		  ));
+
+  bgpdma_dev.pa_addr = ((unsigned long long)_BGP_UA_DMA << 32) | _BGP_PA_DMA;
+
+     /*  map DMA into kernel space */
+
+  if (  bgcnsd.services->isIONode()  )
+    {
+      TRACE((
+		      KERN_INFO "(I) DMA is not mapped on IO node\n"
+		      ));
+      bgpdma_kaddr = NULL;
+      return 0;
+    }
+
+  bgpdma_kaddr = ioremap( bgpdma_dev.pa_addr, BGP_DMA_MMAP_SIZE );
+
+  if ( bgpdma_kaddr == NULL )
+    {
+       printk( KERN_INFO "(E) bgpdma: vmap() failed\n" );
+       return -ENOMEM;
+    }
+
+   /*  Let bgcnsd know about the new address of the dma */
+  unsigned long flags;
+  local_irq_save(flags);
+  bgcnsd.services->mapDevice(BGCNS_DMA,  bgpdma_kaddr );
+  local_irq_restore(flags);
+
+
+  TRACE((
+		  KERN_INFO "bgpdma: module initialization finished, dma kaddr:%08x\n",
+	 (unsigned)bgpdma_kaddr));
+
+  return 0;
+}
+
+/* ************************************************************************* */
+/*                       BG/P net module cleanup                             */
+/* ************************************************************************* */
+
+static void __exit
+	bgpdma_module_cleanup()
+{
+
+   /*  release kernel mapping of dma */
+  iounmap ( bgpdma_kaddr );
+}
+
+
+
+/*
+ *   Query free counter subgroups
+ */
+u32 Kernel_CounterGroupQueryFree( u32   type,
+				  u32   grp,
+				  u32 * num_subgrps,
+				  u32 * subgrps )
+{
+  CHECK_DMA_ACCESS;
+
+  int ret = 0;
+  uint32_t counters;
+  int i;
+
+  if ( grp < 0 || grp >= 4 || type < 0 || type > 1 ) return -EINVAL;
+  if ( num_subgrps == NULL || subgrps == NULL  )     return -EINVAL;
+
+  if ( type == 0 )
+   counters = bgpdma_dev.state.inj_counters[grp];
+  else
+   counters = bgpdma_dev.state.rec_counters[grp];
+
+  (*num_subgrps) = 0;
+  for(i=0; i < DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP; i++ )
+    {
+      if ( ( counters & _BN(i) ) == 0)
+	{
+	  subgrps[*num_subgrps] = i;
+	  (*num_subgrps)++;
+	}
+    }
+
+  TRACE((
+		  KERN_INFO "Allocated counters:%08x  num_free:%d\n",counters,(int)num_subgrps));
+
+  return ret;
+}
+EXPORT_SYMBOL(Kernel_CounterGroupQueryFree);
+
+
+/*
+ *   Allocate counter subgroups
+ */
+u32 Kernel_CounterGroupAllocate( u32   type,
+				 u32   grp,
+				 u32   num_subgrps,
+				 u32 * subgrps,
+				 u32   target,         /* not used */
+				 u32   handler,        /* not used */
+				 u32 * handler_parm,   /* not used */
+				 u32   interruptGroup, /* not used */
+				 u32 * cg )
+{
+  CHECK_DMA_ACCESS;
+
+  unsigned i,j;
+  u32 *counters;
+  u32 c_bits;
+  int min_id, max_id, word_id, bit_id, global_subgrp;
+  DMA_CounterGroup_t * cg_ptr = (DMA_CounterGroup_t *)cg;
+  if ( type > 1 )                                           return -EINVAL;
+  if ( grp >= 4 )                                           return -EINVAL;
+  if ( subgrps == NULL )                                    return -EINVAL;
+  if ( num_subgrps <= 0 )                                   return -EINVAL;
+  if ( num_subgrps >  DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL;
+  if ( cg_ptr == NULL )                                     return -EINVAL;
+
+  if ( type == DMA_Type_Injection )
+    counters = &bgpdma_dev.state.inj_counters[grp];
+  else
+    counters = &bgpdma_dev.state.rec_counters[grp];
+
+  c_bits = 0;
+  for(i=0;i< num_subgrps;i++)
+    {
+      if ( subgrps[i] < 0 )                                   return -EINVAL;
+      if (subgrps[i] >= DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL;
+      if ( *counters & _BN(subgrps[i]) )
+	{
+	  printk( KERN_WARNING
+		  "bgpdma: tried to allocate busy counters grp:%d subgrps:%d\n",
+		  grp, subgrps[i]);
+	  return -EBUSY;
+	}
+      c_bits |= _BN(subgrps[i]);
+    }
+
+  memset( cg_ptr, 0, sizeof(DMA_CounterGroup_t));
+  cg_ptr->type     = type;
+  cg_ptr->group_id = grp;
+
+  if ( type == DMA_Type_Injection )
+    cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_iDMA_COUNTER_ENABLED(grp,0);
+  else
+    cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_rDMA_COUNTER_ENABLED(grp,0);
+
+  for(i=0;i< num_subgrps;i++)
+    {
+      min_id = subgrps[i] * DMA_NUM_COUNTERS_PER_SUBGROUP;
+      max_id = min_id + DMA_NUM_COUNTERS_PER_SUBGROUP;
+      global_subgrp = (grp * DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) + subgrps[i];
+
+      cg_ptr->grp_permissions |= _BN( global_subgrp );
+      for ( j = min_id; j < max_id; j++ )
+	{
+	  word_id =  DMA_COUNTER_GROUP_WORD_ID(j);
+	  bit_id  =  DMA_COUNTER_GROUP_WORD_BIT_ID(j);
+	  cg_ptr->permissions[ word_id ] |= _BN(bit_id);
+
+	  if ( type == DMA_Type_Injection )
+	    {
+	      cg_ptr->counter[j].counter_hw_ptr =
+		( DMA_CounterHw_t *)  _BGP_VA_iDMA_COUNTER(grp,j);
+	      DMA_CounterSetValueBaseHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0);
+	       /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */
+	       /* CHECK_RET(ret); */
+	       /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */
+	       /* CHECK_RET(ret); */
+              if(0) 
+	      TRACE((
+			      KERN_INFO "DMA Injection cntr allocated: %d(%08x)\n",
+		     j,(unsigned)cg_ptr->counter[j].counter_hw_ptr));
+
+	    }
+	  else
+	    {
+	      cg_ptr->counter[j].counter_hw_ptr =
+		( DMA_CounterHw_t *)  _BGP_VA_rDMA_COUNTER(grp,j);
+	      DMA_CounterSetValueBaseMaxHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0, 0);
+	       /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */
+	       /* CHECK_RET(ret); */
+	       /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */
+	       /* CHECK_RET(ret); */
+	       /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_max); */
+	       /* CHECK_RET(ret); */
+	      if(0) 
+	      TRACE((
+			      KERN_INFO "DMA Reception cntr allocated: %d(%08x)\n",
+		     j,(unsigned)cg_ptr->counter[j].counter_hw_ptr));
+
+	    }
+	   /*  disable the counter, clear it's hit-zero */
+	   /* DMA_CounterSetDisableById  ( cg_ptr,j ); */
+	  cg_ptr->status_ptr->disable[word_id] = _BN(bit_id);
+	   /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->disable[word_id] ); */
+	   /* CHECK_RET(ret); */
+	   /* DMA_CounterClearHitZeroById( &cg,j ); */
+	  cg_ptr->status_ptr->clear_hit_zero[word_id] = _BN(bit_id);
+	   /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->clear_hit_zero[word_id] ); */
+	   /* CHECK_RET(ret); */
+	}
+    }
+
+  _bgp_msync();
+
+   /*  mark counters allocated in the global state */
+  *counters |= c_bits;
+
+  TRACE((
+		  KERN_INFO "Allocated counters:%08x\n",*counters));
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_CounterGroupAllocate);
+
+
+/*
+ *   Query free inj fifos
+ */
+u32 Kernel_InjFifoGroupQueryFree( u32 grp, u32 * num_fifos, u32 * fifo_ids )
+{
+  CHECK_DMA_ACCESS;
+
+  int ret = 0;
+  u32 state;
+  int i;
+
+  if ( grp  >= DMA_NUM_INJ_FIFO_GROUPS )        return  -EINVAL;
+  if ( num_fifos == NULL || fifo_ids == NULL )  return  -EINVAL;
+
+  state = bgpdma_dev.state.inj_fifos[grp];
+
+  (*num_fifos) = 0;
+  for(i=0;i< DMA_NUM_INJ_FIFOS_PER_GROUP;i++)
+    {
+      if ( ( state & _BN(i) ) == 0 )
+	{
+	  fifo_ids[(*num_fifos)] = i;
+	  (*num_fifos)++;
+	  TRACE((
+			  KERN_INFO "Free inj fifo: %d\n",i));
+	}
+    }
+
+  return ret;
+}
+EXPORT_SYMBOL(Kernel_InjFifoGroupQueryFree);
+
+
+/*
+ *   Allocate inj fifos from a group
+ */
+u32 Kernel_InjFifoGroupAllocate( u32   grp,
+				 u32   num_fifos,
+				 u32 * ids,
+				 u16 * pri,
+				 u16 * loc,
+				 u8  * map,
+				 u32 * fg )
+{
+  CHECK_DMA_ACCESS;
+
+   /*  MUST be called when the DMA is inactive, prior to any DMA activity */
+  int i;
+  u32 f_bits =0;
+  u32 p_bits =0;
+  u32 l_bits =0;
+  DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+  if ( fg_ptr == NULL )                                                return -EINVAL;
+  if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS )                         return -EINVAL;
+  if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP )     return -EINVAL;
+  if ( ids == NULL || pri == NULL || map == NULL )                     return -EINVAL;
+
+  f_bits = 0;  /*  holds a bit vector of all fifos used in this allocation */
+  for ( i = 0; i < num_fifos; i++ )
+    {
+      if ( ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+      if ( pri[i] > 1 || loc[i] > 1 )              return -EINVAL;
+      if ( loc[i] == 0 && map[i] == 0 )            return -EINVAL;
+      if ( loc[i] == 1 && map[i] != 0 )            return -EINVAL;
+
+      if ( bgpdma_dev.state.inj_fifos[grp] & _BN(ids[i]) )
+	{
+	  printk( KERN_WARNING
+		  "bgpdma: tried to allocate busy inj fifos grp:%d fifo_id:%d\n",
+		  grp, ids[i]);
+	  return -EBUSY;
+	}
+
+      f_bits |= _BN(ids[i]);
+      if ( loc[i] == 1 ) l_bits |= _BN(i);
+      if ( pri[i] == 1 ) p_bits |= _BN(i);
+    }
+
+
+  memset( fg_ptr, 0, sizeof(DMA_InjFifoGroup_t));
+  fg_ptr->status_ptr   = (DMA_InjFifoStatus_t *) _BGP_VA_iDMA_NOT_EMPTY(grp);
+  fg_ptr->group_id     = grp;
+  fg_ptr->permissions |= f_bits;
+
+   /*  Disable interrupts and the injection FIFOs */
+  unsigned long flags;
+  local_irq_save(flags);
+   bgcnsd.services->
+     setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifoInterrupt, grp,f_bits,NULL );
+   bgcnsd.services->
+     setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifo,          grp,f_bits,NULL );
+   local_irq_restore(flags);
+
+    /*  deactivate all these fifos */
+   fg_ptr->status_ptr->deactivate = f_bits;
+    /* ret = put_user( f_bits, &fg.status_ptr->deactivate ); */
+    /* CHECK_RET(ret); */
+
+   _bgp_mbar();  /*  make sure write is in the DMA */
+
+   local_irq_save(flags);
+   bgcnsd.services->setDmaInjectionMap( grp, (unsigned*)ids, map, num_fifos );
+   local_irq_restore(flags);
+
+   for ( i=0;i< num_fifos; i++)
+      {
+	fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr =
+	  ( DMA_FifoHW_t *) _BGP_VA_iDMA_START(grp, ids[i]);
+	fg_ptr->fifos[ids[i]].fifo_id      = ids[i];
+	fg_ptr->fifos[ids[i]].desc_count   = 0;
+	fg_ptr->fifos[ids[i]].occupiedSize = 0;
+	fg_ptr->fifos[ids[i]].priority     = pri[i] ;
+	fg_ptr->fifos[ids[i]].local        = loc[i];
+	fg_ptr->fifos[ids[i]].ts_inj_map   = map[i];
+
+	 /*  write 0's to the hw fifo */
+	fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start = 0;
+	 /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start ); */
+	 /* CHECK_RET(ret); */
+	fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head = 0;
+	 /* ret = put_user ( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head ); */
+	 /* CHECK_RET(ret); */
+	fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+	 /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail ); */
+	 /* CHECK_RET(ret); */
+	fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end = 0;
+	 /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end ); */
+	 /* CHECK_RET(ret); */
+
+/* 	TRACE((KERN_INFO "Allocate inj fifo: %d",ids[i])); */
+      }
+
+    /*  clear the threshold crossed */
+   _bgp_mbar();    /*  no previous write will pass this one */
+   fg_ptr->status_ptr->clear_threshold_crossed = f_bits;
+    /* ret = put_user( f_bits, &fg.status_ptr->clear_threshold_crossed ); */
+    /* CHECK_RET(ret); */
+
+   local_irq_save(flags);
+    /*  set the local copy bits */
+   bgcnsd.services->setDmaLocalCopies(BGCNS_Enable, grp, l_bits);
+    /*  set the priority bits */
+   bgcnsd.services->setDmaPriority(BGCNS_Enable, grp, p_bits);
+
+    /*  Enable interrupts for these fifos. */
+    /*  NOTE: enablement of the injection FIFO will take place during FIFO init. */
+    /*  _bgp_cns()->setDmaFifoControls( BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, f_ids, NULL ); */
+   local_irq_restore(flags);
+
+    /*  mark fifos allocated in the global state */
+   bgpdma_dev.state.inj_fifos[grp] |= f_bits;
+
+   return 0;
+}
+EXPORT_SYMBOL(Kernel_InjFifoGroupAllocate);
+
+/*
+ *   General fifo init
+ */
+static inline int FifoInit( DMA_Fifo_t * f_ptr,
+			    void       * va_start,
+			    void       * va_head,
+			    void       * va_end )
+{
+  int ret = 0;
+  uint32_t pa_start, pa_head, pa_end;
+  unsigned bytes;
+/*
+  TRACE((
+		  KERN_INFO "FifoInit va_start:%08x va_head:%08x va_end:%08x\n",
+	 (u32)va_start,(u32)va_head,(u32)va_end));
+*/
+  if ( f_ptr == NULL )                    return -EINVAL;
+  if ( f_ptr->fifo_hw_ptr == NULL )       return -EINVAL;
+  if ( ((uint32_t)va_start & 0x1F) != 0 ) return -EINVAL;
+  if ( ((uint32_t)va_end   & 0x1F) != 0 ) return -EINVAL;
+  if ( ((uint32_t)va_head  & 0xF ) != 0 ) return -EINVAL;
+
+  bytes = (uint32_t)va_end - (uint32_t)va_start;
+
+   /*  translate start address ( and check if the region is contigouos) */
+  pa_start = virt_to_phys ( va_start );
+/*   TRACE((KERN_INFO "bgpdma: FifoInit() va_start:%08x pa_start:%08x shifted:%08x", */
+/* 	 (u32)va_start, pa_start, pa_start>>4 )); */
+  pa_start >>= 4;  /*  we need 16-byte aligned address */
+
+   /* ret = VaTo4bitShiftedPa( va_start, bytes, &pa_start ); */
+   /* CHECK_RET(ret); */
+
+   /*  physical region is contigouos, we can compute pa_end and pa_head */
+  pa_end  = pa_start + ( bytes >> 4 );
+  pa_head = pa_start + ( ((uint32_t)va_head - (uint32_t)va_start ) >> 4 );
+
+ /* Write the start, end , head and tail(= head) */
+  f_ptr->fifo_hw_ptr->pa_start = pa_start;
+   /* ret = put_user ( pa_start, &f_ptr->fifo_hw_ptr->pa_start ); */
+   /* CHECK_RET(ret); */
+  f_ptr->fifo_hw_ptr->pa_head  = pa_head;
+   /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_head ); */
+   /* CHECK_RET(ret); */
+  f_ptr->fifo_hw_ptr->pa_tail  = pa_head;
+   /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_tail ); */
+   /* CHECK_RET(ret); */
+  f_ptr->fifo_hw_ptr->pa_end   = pa_end;
+   /* ret = put_user( pa_end, &f_ptr->fifo_hw_ptr->pa_end ); */
+   /* CHECK_RET(ret); */
+
+  _bgp_mbar();
+
+  /* Save the shadows in the structure */
+  f_ptr->pa_start = pa_start;
+  f_ptr->va_start = va_start;
+  f_ptr->va_end   = va_end;
+  f_ptr->va_head  = va_head;
+  f_ptr->va_tail  = va_head;
+
+  /* Compute the free space */
+  f_ptr->fifo_size  = bytes >> 4; /* Number of 16B quads */
+  f_ptr->free_space = f_ptr->fifo_size;
+
+  return ret;
+}
+
+
+/*
+ *   Initialize an injection fifo
+ */
+u32 Kernel_InjFifoInitById( u32 * fg,
+			    int    fifo_id,
+			    u32 * va_start,
+			    u32 * va_head,
+			    u32 * va_end )
+{
+  CHECK_DMA_ACCESS;
+
+  int ret = 0;
+  int grp;
+  uint32_t x_phead, x_vstart, x_pstart, x_vtail;
+  DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+  if ( fg_ptr == NULL )                                               return -EINVAL;
+  if ( fifo_id < 0 || fifo_id >= DMA_NUM_INJ_FIFOS_PER_GROUP )        return -EINVAL;
+  if ( va_start >= va_end || va_start > va_head || va_head > va_end ) return -EINVAL;
+  if ( (u32)va_head+DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES > (u32)va_end ) return -EINVAL;
+  if ( (u32)va_end - (u32)va_start < DMA_MIN_INJ_FIFO_SIZE_IN_BYTES ) return -EINVAL;
+  if ( ((u32)va_start & 0x1F) != 0 )                                  return -EINVAL;
+  if ( ((u32)va_end   & 0x1F) != 0 )                                  return -EINVAL;
+  if ( ((u32)va_head  & 0xF)  != 0 )                                  return -EINVAL;
+
+  if (( fg_ptr->permissions & _BN(fifo_id)) == 0 ) return -EBUSY;
+
+  grp = fg_ptr->group_id;
+
+
+   /*  Disable the injection FIFO and its interrupt: */
+  unsigned long flags;
+  local_irq_save(flags);
+  bgcnsd.services->
+    setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL);
+  bgcnsd.services->
+    setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL );
+  local_irq_restore(flags);
+
+
+  /* Deactivate the fifo */
+  fg_ptr->status_ptr->deactivate = _BN(fifo_id);
+   /* ret = put_user ( _BN(fifo_id), &fg.status_ptr->deactivate ); */
+   /* CHECK_RET(ret); */
+
+  /* Initialize the fifo */
+  ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end );
+  CHECK_RET(ret);
+
+  /* Initialize the descriptor count and occupied size */
+  fg_ptr->fifos[fifo_id].desc_count   = 0;
+  fg_ptr->fifos[fifo_id].occupiedSize = 0;
+
+   /*  clear the threshold crossed */
+  fg_ptr->status_ptr->clear_threshold_crossed = _BN(fifo_id);
+   /* ret = put_user( _BN(fifo_id), &fg.status_ptr->clear_threshold_crossed ); */
+   /* CHECK_RET(ret); */
+
+   /*  read back something from the dma to ensure all writes have occurred */
+   /*  head should equal tail */
+  x_phead  = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head;
+   /* ret = get_user( x_phead, &fg.fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */
+   /* CHECK_RET(ret); */
+  x_vstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_start);
+  x_pstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.pa_start);
+  x_vtail  = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_tail);
+  if ( x_vstart + ( (x_phead - x_pstart)  << 4 ) != x_vtail ) return -EIO;
+
+
+
+   /*  Enable the FIFO and its interrupt: */
+  local_irq_save(flags);
+  bgcnsd.services->
+    setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL);
+   /* bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL); */
+  local_irq_restore(flags);
+
+   /*  Activate the fifo */
+  fg_ptr->status_ptr->activate = _BN(fifo_id);
+   /* ret = put_user( _BN(fifo_id), &fg.status_ptr->activate ); */
+   /* CHECK_RET(ret); */
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_InjFifoInitById);
+
+
+/*
+ * Free inj fifos
+ */
+uint32_t Kernel_InjFifoGroupFree(uint32_t   grp,
+				 uint32_t   num_fifos,
+				 uint32_t * fifo_ids,
+				 uint32_t * fg)
+{
+  int ret = 0;
+  u32 f_bits =0;
+  int i;
+  DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+  if ( fg_ptr == NULL )                                            return -EINVAL;
+  if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS )                     return -EINVAL;
+  if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+  if ( fifo_ids == NULL )                                          return -EINVAL;
+
+  f_bits = 0;  /*  holds a bit vector of all fifos used in this allocation */
+  for ( i = 0; i < num_fifos; i++ )
+    {
+      if ( fifo_ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+
+      if ( ! (bgpdma_dev.state.inj_fifos[grp] & _BN(fifo_ids[i])) )
+	{
+	  printk( KERN_WARNING
+		  "bgpdma: tried to free a non-allocated inj fifo grp:%d fifo_id:%d\n",
+		  grp, fifo_ids[i]);
+	  return -EBUSY;
+	}
+
+      f_bits |= _BN(fifo_ids[i]);
+    }
+
+   for ( i=0;i< num_fifos; i++)
+     fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr = NULL;
+
+  fg_ptr->permissions ^= f_bits;
+  fg_ptr->status_ptr->deactivate = f_bits;
+
+  // Record that the injection FIFOs are free
+  bgpdma_dev.state.inj_fifos[grp] &= ~ f_bits;
+
+  return ret;
+}
+
+
+
+/*
+ * Set the reception fifos map
+ */
+int Kernel_RecFifoSetMap( u32 * map )
+{
+  CHECK_DMA_ACCESS;
+
+  int i, g;
+  DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map;
+
+   /*   NEED TO PUT A LOCK AROUND THIS, Assume either the syscall mechanism does this */
+   /*   or it has to be put here */
+
+   /*   MUST BE CALLED ONCE, Prior to Any DMA activity */
+   /*   Specifically, must be called after _bgp_DMA_Reset_Release */
+   /*   and prior to any  _BGP_rDMA_Fifo_Get_Fifo_Group calls */
+
+  if ( map_ptr == NULL )            return -EINVAL;
+  if ( map_ptr->save_headers > 1 )  return -EINVAL;
+
+  for (i=0; i< DMA_NUM_NORMAL_REC_FIFOS; i++)
+    if ( ( map_ptr->fifo_types[i] < 0 ) || ( map_ptr->fifo_types[i] > 1)) return -EINVAL;
+
+   /*  rec fifo map can be set only once */
+  if ( bgpdma_dev.state.rec_fifo_set_map != 0 ) return -EBUSY;
+
+  if ( map_ptr->save_headers == 1)
+    for (i=0; i< DMA_NUM_HEADER_REC_FIFOS; i++)
+      if ( ( map_ptr->hdr_fifo_types[i] <0 ) ||  ( map_ptr->hdr_fifo_types[i] > 1 ))
+	return  -EINVAL;
+
+  for (g=0; g< DMA_NUM_REC_FIFO_GROUPS;g++)
+    for (i=0; i<  DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP; i++)
+      if ( map_ptr->ts_rec_map[g][i] >= DMA_NUM_NORMAL_REC_FIFOS)
+	return  -EINVAL;
+
+  TRACE((
+		  KERN_INFO "bgpdma: Kernel_RecFifoSetMap() disabling reception FIFO interrupts\n"));
+
+  unsigned long flags;
+  local_irq_save(flags);
+   /*  Disable the reception FIFOs */
+  bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, 0xFFFFFFFF, NULL );
+  bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, BGCNS_DMA_ALL_GROUPS, 0 /* mask not used */, NULL );
+
+   /*  Set the map: */
+  bgcnsd.services->setDmaReceptionMap(map_ptr->ts_rec_map,
+			      map_ptr->fifo_types,
+			      map_ptr->save_headers ? map_ptr->hdr_fifo_types : NULL,
+			      map_ptr->threshold );
+
+  local_irq_restore(flags);
+
+   /*  Don't enable the fifos here,  the fifo init will do that */
+  bgpdma_dev.state.rec_fifo_set_map = 1;
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoSetMap);
+
+
+/*
+ * Get the reception fifos map
+ */
+int Kernel_RecFifoGetMap( u32 * map )
+{
+  CHECK_DMA_ACCESS;
+
+  int ret;
+  DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map;
+
+  if ( map_ptr == NULL ) return -EINVAL;
+
+  memset( map_ptr, 0, sizeof(DMA_RecFifoMap_t) );
+
+  unsigned long flags;
+  local_irq_save(flags);
+
+  ret = bgcnsd.services->getDmaReceptionMap( map_ptr->ts_rec_map,
+				     map_ptr->fifo_types,
+				     &(map_ptr->save_headers),
+				     map_ptr->hdr_fifo_types,
+				     map_ptr->threshold);
+
+  local_irq_restore(flags);
+
+  CHECK_RET(ret);
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoGetMap);
+
+/*
+ *   Initialize a receiver fifo group
+ */
+int Kernel_RecFifoGetFifoGroup( u32  * fg,
+				int    grp,                /* group number */
+				int    target,             /* not used */
+				void * normal_handler,     /* not used */
+				void * normal_handler_parm,/* not used */
+				void * header_handler,     /* not used */
+				void * header_handler_parm,/* not used */
+				void * interruptGroup )    /* not used */
+{
+  CHECK_DMA_ACCESS;
+
+  int ret;
+  DMA_RecFifoMap_t   map;
+
+  uint32_t used_fifos;
+  int g,i,j,min_id,max_id,idx;
+  uint32_t x;
+  DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg;
+
+  if ( fg_ptr == NULL )                           return -EINVAL;
+  if ( grp < 0 || grp > DMA_NUM_REC_FIFO_GROUPS ) return -EINVAL;
+   /*  if ( target < 0 || target > 4 )                 return -EINVAL; */
+
+  memset( fg_ptr, 0, sizeof(DMA_RecFifoGroup_t) );
+
+
+   /*  get the map */
+  unsigned long flags;
+  local_irq_save(flags);
+  ret = bgcnsd.services->getDmaReceptionMap( map.ts_rec_map,
+				     map.fifo_types,
+				     &(map.save_headers),
+				     map.hdr_fifo_types,
+				     map.threshold);
+  local_irq_restore(flags);
+
+  CHECK_RET(ret);
+
+   /*  set the mask */
+  fg_ptr->group_id = grp;
+  switch(grp)
+    {
+    case 0: fg_ptr->mask   = 0xFF000000; break;
+    case 1: fg_ptr->mask   = 0x00FF0000; break;
+    case 2: fg_ptr->mask   = 0x0000FF00; break;
+    case 3: fg_ptr->mask   = 0x000000FF; break;
+    }
+
+   /*  set the status pointer */
+  fg_ptr->status_ptr = ( DMA_RecFifoStatus_t *) _BGP_VA_rDMA_NOT_EMPTY(grp,0);
+
+   /*  figure out which normal fifos are being used */
+  min_id = (grp*DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP);
+  max_id =  min_id +DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1;
+
+  used_fifos = 0;
+  for (g=0;g< DMA_NUM_REC_FIFO_GROUPS;g++)
+    for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;i++)
+      if (  ( map.ts_rec_map[g][i] >=  min_id ) && (map.ts_rec_map[g][i] <=  max_id) )
+	used_fifos |= _BN(map.ts_rec_map[g][i]);
+
+  idx = 0;
+  for(j= 0;j<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;j++)
+    {
+      i = min_id + j;
+      if ( ( _BN(i) & used_fifos) != 0 )
+	{
+	  fg_ptr->fifos[idx].type           =  map.fifo_types[i];
+	  fg_ptr->fifos[idx].global_fifo_id = i;
+	  fg_ptr->fifos[idx].num_packets_processed_since_moving_fifo_head = 0;
+	  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr = ( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp,j);
+	   /*  Make sure this fifo is disabled */
+	  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start = 0;
+	   /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start ); */
+	   /* CHECK_RET(ret); */
+	  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head = 0;
+	   /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head ); */
+	   /* CHECK_RET(ret); */
+	  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+	   /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail ); */
+	   /* CHECK_RET(ret); */
+	  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end = 0;
+	   /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end ); */
+	   /* CHECK_RET(ret); */
+
+	  idx++;
+	}
+    }   /*  j loop */
+
+   /*  are we saving headers? */
+  if ( map.save_headers == 1 )
+    {
+      fg_ptr->num_hdr_fifos = 1;
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].type = map.hdr_fifo_types[grp];
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].global_fifo_id = DMA_NUM_NORMAL_REC_FIFOS+grp;
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].num_packets_processed_since_moving_fifo_head = 0;
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr =
+	( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp, DMA_HEADER_REC_FIFO_ID);
+
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start = 0;
+       /* ret = */
+       /* 	put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start ); */
+       /* CHECK_RET(ret); */
+
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head = 0;
+       /* ret = */
+       /* 	put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head ); */
+       /* CHECK_RET(ret); */
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+       /* ret = */
+       /* 	put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail ); */
+       /* CHECK_RET(ret); */
+      fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end = 0;
+       /* ret = */
+       /* 	put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end ); */
+       /* CHECK_RET(ret); */
+    }
+
+  fg_ptr->num_normal_fifos = idx;
+  fg_ptr->status_ptr->clear_threshold_crossed[0] = fg_ptr->mask;
+   /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[0] ); */
+   /* CHECK_RET(ret); */
+  fg_ptr->status_ptr->clear_threshold_crossed[1] = fg_ptr->mask;
+   /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[1] ); */
+   /* CHECK_RET(ret); */
+
+   /*  read back from the dma to ensure all writes have occurred */
+  _bgp_mbar();
+  x = fg_ptr->status_ptr->threshold_crossed[0];
+   /* ret = get_user( x, &fg_ptr->status_ptr->threshold_crossed[0] ); */
+   /* if ( ret )                 return ret; */
+  if ( (x & fg_ptr->mask) != 0  ) return -EIO;
+
+   /*  reenable interrupts, if necessary */
+   /*  */
+   /*  DCRs 0xD71, 0xD72, 0xD73, and 0xD74 contain bits indicating which */
+   /*  reception fifos will be enabled for interrupt 0, 1, 2, and 3, respectively. */
+   /*  These interrupts correspond to BIC interrupt group 2, IRQs 28, 29, 30, and */
+   /*  31, respectively.  Thus, if bit i is on in DCR 0xD7z, and rec fifo i's */
+   /*  free space drops below the threshold for that fifo, then IRQ 28 + (z-1) */
+   /*  will fire. */
+   /*  */
+   /*  For each reception fifo in this group, turn on bit i in DCR 0xD7z, where */
+   /*  z-1 is the group number. */
+   /*  */
+
+  used_fifos = 0;
+  for (i = 0; i < fg_ptr->num_normal_fifos; i++)
+    used_fifos |= _BN(fg_ptr->fifos[i].global_fifo_id);
+
+  TRACE((
+		  KERN_INFO "bgpdma: Kernel_RecFifoGetFifoGroup() enabling reception FIFO interrupts\n"));
+  local_irq_save(flags);
+
+  bgcnsd.services->setDmaFifoControls(BGCNS_Enable,
+			       BGCNS_ReceptionFifoInterrupt,
+			       fg_ptr->group_id,
+			       used_fifos,
+			       NULL);
+
+   local_irq_restore(flags);
+
+
+  _bgp_msync();
+  _bgp_isync();
+
+
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoGetFifoGroup);
+
+/*
+ *   Initialize a reception fifo
+ */
+int Kernel_RecFifoInitById( u32  * fg,
+			    int    fifo_id,
+			    void * va_start,
+			    void * va_head,
+			    void * va_end )
+{
+  CHECK_DMA_ACCESS;
+
+  int ret;
+  uint32_t st_word, st_mask;
+  uint32_t x_phead, x_vtail, x_vstart, x_pstart;
+  int i, grp, g_fifo_id;
+  DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg;
+  uint32_t xint[4] = {0,0,0,0};
+
+  if ( fg_ptr == NULL )                                                return -EINVAL;
+  if ( fifo_id < 0 || fifo_id >= DMA_NUM_REC_FIFOS_PER_GROUP )         return -EINVAL;
+  if ( va_start >= va_end || va_start > va_head || va_head > va_end )  return -EINVAL;
+  if ( ((u32)va_start & 0x1F) != 0 )                                   return -EINVAL;
+  if ( ((u32)va_end   & 0x1F) != 0 )                                   return -EINVAL;
+  if ( ((u32)va_head  & 0xF)  != 0 )                                   return -EINVAL;
+   /* if ( (u32)va_end - (u32)va_start <  DMA_MIN_REC_FIFO_SIZE_IN_BYTES ) return -EINVAL; */
+
+  /*
+   * Note:  The reception fifos are in a disabled state upon return from
+   *        DMA_RecFifoSetMap(), so we assume they are disabled at this point,
+   *        making it safe to set the start, head, etc.
+   */
+
+   /*  NOTE:  This assumes the interrupt enables have been previously set as desired, */
+   /*   in _bgp_DMA_RecFifoGetFifoGroup, so we simply read those dcrs, disable all fifos, */
+   /*   and write them back at the end */
+
+  grp       = fg_ptr->group_id;
+  g_fifo_id = fg_ptr->fifos[fifo_id].global_fifo_id;
+
+  if ( g_fifo_id <  DMA_NUM_NORMAL_REC_FIFOS)  /*  normal fifo */
+     {
+       st_word = 0;                         /*  status word for this fifo */
+       st_mask = _BN(g_fifo_id) & fg_ptr->mask;  /*  status mask for this fifo */
+
+        /*  see if this fifo has already been initialized */
+       if ((bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id)) !=0 ) return -EBUSY;
+        /*  Disable the FIFO and all interrupts (interrupts will be restored below) */
+       TRACE((
+		       KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception FIFO interrupts\n"));
+       unsigned long flags;
+       local_irq_save(flags);
+       bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL );
+
+       for (i=0; i<4; i++)
+	 bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifoInterrupt, i, 0xFFFFFFFF, &(xint[i]) );  /*  save for re-enablement below */
+       local_irq_restore(flags);
+     }
+  else  /*  header fifo */
+     {
+       st_word = 1;        /*  status word for this fifo */
+       st_mask = fg_ptr->mask;  /*  status mask for this fifo (only one bit is used by the HW) */
+
+        /*  see if this fifo has already been initialized */
+       if ( (bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id-32)) != 0 )
+	 return -EBUSY;
+
+        /*  remember that this fifo has been initialized */
+       bgpdma_dev.state.rec_fifo_init[st_word] |= _BN(g_fifo_id-32);
+
+        /*  Disable the reception header FIFO and its interrupts */
+       TRACE((
+		       KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception header FIFO interrupts\n"));
+       unsigned long flags;
+       local_irq_save(flags);
+       bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL );
+       bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0xFFFFFFFF, xint );
+       local_irq_restore(flags);
+
+     }
+
+   /*  Initialize the fifo */
+  ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end );
+  CHECK_RET(ret);
+
+
+   /*  remember that this fifo has been initialized */
+  if ( g_fifo_id <  DMA_NUM_NORMAL_REC_FIFOS )                 /*  normal fifo */
+    bgpdma_dev.state.rec_fifo_init[0] |= _BN(g_fifo_id);
+  else                                                         /*  header fifo */
+    bgpdma_dev.state.rec_fifo_init[1] |= _BN(g_fifo_id-32);
+
+   /*  clear the threshold crossed */
+  fg_ptr->status_ptr->clear_threshold_crossed[st_word] = st_mask;
+   /* ret = put_user( st_mask, &fg_ptr->status_ptr->clear_threshold_crossed[st_word] ); */
+   /* CHECK_RET(ret); */
+
+   /*  read back something from the dma to ensure all writes have occurred */
+   /*  head should equal tail */
+  x_phead  = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head;
+   /* ret = get_user( x_phead, &fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */
+   /* CHECK_RET(ret); */
+  x_vstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_start;
+  x_pstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.pa_start;
+  x_vtail  = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_tail;
+  if ( x_vstart + ( (x_phead - x_pstart)  << 4 ) != x_vtail ) return -EIO;
+
+   /*  Enable the FIFO and re-enable interrupts */
+   unsigned long flags;
+  local_irq_save(flags);
+
+  if ( g_fifo_id <  DMA_NUM_NORMAL_REC_FIFOS) {  /*  Normal fifo */
+    TRACE((
+		    KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception FIFO interrupts\n"));
+    bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL);
+
+    for (i=0; i<4; i++)
+      bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionFifoInterrupt, i, 0 /* mask not used */, &(xint[i]) );  /*  Restore saved state */
+  }
+  else {  /*  Header FIFO */
+    TRACE((
+		    KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception header FIFO interrupts\n"));
+      bgcnsd.services->setDmaFifoControls(BGCNS_Enable,   BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL );
+       /*  bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0, xint ); */
+  }
+
+  local_irq_restore(flags);
+
+  return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoInitById);
+
+/*
+ *  Register interrupt handlers
+ */
+int Kernel_SetCommThreadConfig(int irq,
+			       int opcode,
+			       LockBox_Counter_t cntrid,
+			       Kernel_CommThreadHandler handler,
+			       uint32_t arg1,
+			       uint32_t arg2,
+			       uint32_t arg3,
+			       uint32_t arg4)
+{
+  int ret = 0;
+  int i;
+
+  CHECK_PARAM( arg2 == 0 && arg3 == 0 &&  arg4 == 0 );
+
+
+
+  for ( i = 0; i < MAX_NUM_IRQ; i++ )
+    if ( bgpdma_dev.irqInfo[i].irq == 0 || bgpdma_dev.irqInfo[i].irq == irq )
+      break;
+
+  if ( i == MAX_NUM_IRQ )
+    {
+      printk(KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: No more irq info slot\n" );
+      return -ENOSPC;
+    }
+
+  bgpdma_dev.irqInfo[i].func = handler;
+  bgpdma_dev.irqInfo[i].arg1 = arg1;
+
+  if ( bgpdma_dev.irqInfo[i].irq == irq )
+    {
+      TRACE((
+		      KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: Re-registering handler "
+	     "for irq:%d func:%08x arg1:%d\n",irq, (int)handler, arg1 ));
+      return 0;
+    }
+
+  bgpdma_dev.irqInfo[i].irq  = irq;
+
+/*   bgp_dma_irq = irq ; */
+#if defined(TORNIC_TORUS_AFFINITY)
+  bic_set_cpu_for_irq(irq,k_TorusAffinityCPU) ;
+  TRACE((
+		  KERN_INFO "bgpdma: setting affinity irq=%d affinity=%d\n",irq, k_TorusAffinityCPU ));
+#endif
+
+
+  ret = request_irq(irq,
+		    dmaIrqHandler,
+		    IRQF_DISABLED,
+		    BGP_DMA_NAME,
+		    &bgpdma_dev.irqInfo[i]);
+
+  TRACE((
+		  KERN_INFO "bgpdma: request_irq irq=%d i=%d func=%p arg1=%08x ret=%d\n",irq, i, handler, arg1, ret ));
+  CHECK_RET(ret);
+
+  TRACE((
+		  KERN_INFO "bgpdma: Kernel_SetCommThreadConfig() finished\n"));
+  return ret;
+}
+
+EXPORT_SYMBOL(Kernel_SetCommThreadConfig) ;
+
+/*
+ * Remove commthread from the run queue ... not implemented
+ */
+int pthread_poof_np( void )
+{
+  printk(KERN_INFO "bgpdma: pthread_poof_np() called !!! (bgp_dma.c:%d)\n",
+	 __LINE__);
+  return 0;
+}
+
+
+
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c
new file mode 100644
index 0000000..6f96f18
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c
@@ -0,0 +1,1588 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2006,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*!
+ * \file DMA_Descriptors.c
+ *
+ * \brief Implementations for Functions defined in bgp/arch/include/spi/DMA_Descriptors.h
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+
+#ifndef __LINUX_KERNEL__
+
+#include <bpcore/bgp_types.h>
+
+/*!
+ * \brief For kernel_interface.h so that rts_get_personality gets defined
+ */
+#define SPI_DEPRECATED 1
+#include <spi/kernel_interface.h>
+
+#include <spi/DMA_Descriptors.h>
+#include <spi/DMA_Counter.h>
+#include <spi/DMA_InjFifo.h>
+#include <spi/DMA_RecFifo.h>
+
+#include <spi/DMA_Assert.h>
+
+#ifdef __CNK__
+#include <cnk/PersUtils.h>
+#endif
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+
+#endif /* ! __LINUX_KERNEL__ */
+
+
+/*!
+ * \brief Static Info from Personality
+ *
+ * The following structure defines information from the personality.
+ * They are intended to be static so, once the info is retrieved from
+ * the personality, it does not need to be retrieved again (it is a
+ * system call to retrieve personality info).
+ *
+ * It is assumed that this is initialized to zero when the program is
+ * loaded.
+ *
+ */
+static DMA_PersonalityInfo_t personality_info;
+
+
+/*!
+ * \brief Get Personality Information
+ *
+ * Gets personality information into the "personality_info" static structure.
+ *
+ * \post The personality information is retrieved into the structure
+ *
+ */
+void DMA_GetPersonalityInfo(void)
+{
+  _BGP_Personality_t *pers_ptr;
+
+#ifndef __CNK__
+
+  _BGP_Personality_t pers;
+
+  rts_get_personality( &pers,
+		       sizeof(pers) );
+
+  pers_ptr = &pers;
+
+#else
+
+  pers_ptr = _bgp_GetPersonality();
+
+#endif
+
+  personality_info.nodeXCoordinate     = pers_ptr->Network_Config.Xcoord;
+  personality_info.nodeYCoordinate     = pers_ptr->Network_Config.Ycoord;
+  personality_info.nodeZCoordinate     = pers_ptr->Network_Config.Zcoord;
+  personality_info.xNodes              = pers_ptr->Network_Config.Xnodes;
+  personality_info.yNodes              = pers_ptr->Network_Config.Ynodes;
+  personality_info.zNodes              = pers_ptr->Network_Config.Znodes;
+
+  _bgp_msync(); /* Ensure the info has been stored before setting the flag */
+  personality_info.personalityRetrieved = 1;
+  _bgp_msync();
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Message
+ *
+ * A torus direct put message is one that is sent to another node and its data
+ * is directly put into memory by the DMA on the destination node...it does
+ * not go into a reception fifo.
+ *
+ * A torus direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusDirectPutDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         x,
+				  unsigned int         y,
+				  unsigned int         z,
+				  unsigned int         hints,
+				  unsigned int         vc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_ctr_grp_id,
+				  unsigned int         recv_ctr_id,
+				  unsigned int         recv_offset,
+				  unsigned int         msg_len
+				 )
+{
+  int c;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( (hints & 0x0000003F) == hints );
+  SPI_assert( vc <= 3 );
+  SPI_assert( inj_ctr_grp_id  < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id      < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( recv_ctr_id     < DMA_NUM_COUNTERS_PER_GROUP );
+
+#ifndef NDEBUG
+
+  if ( personality_info.personalityRetrieved == 0 )
+    {
+      DMA_GetPersonalityInfo();
+    }
+
+  SPI_assert( x < personality_info.xNodes );
+  SPI_assert( y < personality_info.yNodes );
+  SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits       */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  msg_len;
+
+  /* Torus Headers */
+
+  desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP;    /* Checksum all but header        */
+  desc->hwHdr.Sk        = DMA_CSUM_BIT;     /* Checksum entire packet         */
+  desc->hwHdr.Hint      = hints;            /* Hint Bits from caller          */
+
+  DMA_SetDescriptorPids( desc,
+			 recv_ctr_grp_id ); /* Pids based on recv group id    */
+
+  c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first   */
+                                 /* packet.                                   */
+  SPI_assert( c!=0 );
+  desc->hwHdr.Chunks = c - 1;    /* Packet header has 0 for 1 chunk, ... ,    */
+                                 /* 7 for 8 chunks).                          */
+
+  desc->hwHdr.Dm        = 1;                /* 1=DMA Mode, 0=Fifo Mode        */
+
+  DMA_SetVc( desc,
+	     vc );                          /* Virtual channel & Dynamic.     */
+
+  desc->hwHdr.X         = x;                /* Destination coordinates        */
+  desc->hwHdr.Y         = y;
+  desc->hwHdr.Z         = z;
+
+  desc->hwHdr.Put_Offset   = recv_offset;
+  desc->hwHdr.rDMA_Counter =
+    recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP);
+
+  /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA            */
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Direct Put Message
+ *
+ * A local direct put message is one that is targeted within the same node, and
+ * its data is directly put into memory by the DMA...it does not go into a
+ * reception fifo.  This is essentially a memcpy via DMA.
+ *
+ * A local direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id".
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length..  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       The only use for the pid bits is for debug, ie, if headers are
+ *       being saved.
+ */
+int  DMA_LocalDirectPutDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_ctr_grp_id,
+				  unsigned int         recv_ctr_id,
+				  unsigned int         recv_offset,
+				  unsigned int         msg_len
+				 )
+{
+  int c;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id  < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id      < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( recv_ctr_id     < DMA_NUM_COUNTERS_PER_GROUP );
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->local_memcopy  = 1; /* 1 bit */
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  msg_len;
+
+   /*  Torus Headers */
+
+  DMA_SetDescriptorPids( desc,
+			 recv_ctr_grp_id );
+
+  c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first   */
+                                 /* packet.                                   */
+  SPI_assert( c!=0 );
+  desc->hwHdr.Chunks = c - 1;    /* Packet header has 0 for 1 chunk, ... ,    */
+                                 /* 7 for 8 chunks).                          */
+
+  desc->hwHdr.Dm        = 1;                /* 1=DMA Mode, 0=Fifo Mode        */
+
+  desc->hwHdr.Put_Offset   = recv_offset;
+  desc->hwHdr.rDMA_Counter =
+    recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP);
+
+  /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA            */
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message
+ *
+ * A local prefetch is one in which the DMA simply prefetches the send buffer
+ * into L3.
+ *
+ * A local prefetch DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 1
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the message being
+ *                       prefetched.  This counter contains the base address of
+ *                       the message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = 0 (not used).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ */
+int  DMA_LocalPrefetchOnlyDescriptor(
+				     DMA_InjDescriptor_t *desc,
+				     unsigned int         inj_ctr_grp_id,
+				     unsigned int         inj_ctr_id,
+				     unsigned int         send_offset,
+				     unsigned int         msg_len
+				    )
+{
+  int c;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id  < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id      < DMA_NUM_COUNTERS_PER_GROUP );
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->local_memcopy  = 1; /* 1 bit */
+  desc->prefetch_only  = 1; /* 1 bit */
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  msg_len;
+
+   /*  Torus Headers */
+  c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first   */
+                                 /* packet.                                   */
+  SPI_assert( c!=0 );
+  desc->hwHdr.Chunks = c - 1;    /* Packet header has 0 for 1 chunk, ... ,    */
+                                 /* 7 for 8 chunks).                          */
+
+  desc->hwHdr.Dm        = 1;                /* 1=DMA Mode, 0=Fifo Mode        */
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Remote-Get Message
+ *
+ * A torus remote-get message is one that is sent to another node and its data
+ * is directly put by the DMA into an injection fifo on the destination
+ * node...it does not go into a reception fifo.  Therefore, the payload of this
+ * message is one (or more) descriptors for another message that is to be sent
+ * back to the originating node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor.  Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * It is also assumed that the payload is NOT checksummed, since it is not
+ * always reproducible.  Things like idma_counterId and base_offset may be
+ * different on another run, making checksumming inconsistent.
+ *
+ * A torus remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = 32.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used because Sk is 1).
+ *   - Sk              = 1 (do not checksum this packet).
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_inj_fifo_id" (see note).
+ *   - Chunks          = Set to (2)-1 = 1.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 1.
+ *   - iDMA_Fifo_ID    = Injection fifo ID where the payload will be injected.
+ *                       Set based on caller's recv_inj_ctr_grp_id and
+ *                       recv_inj_ctr_id.
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_inj_fifo_grp_id  Injection fifo group ID where payload
+ *                                       will be injected on destination node
+ *                                       (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in]      recv_inj_fifo_id      Injection fifo ID (within the
+ *                                       recv_inj_fifo_grp_id group)
+ *                                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_inj_fifo_grp_id:
+ *       - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+ */
+int  DMA_TorusRemoteGetDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         x,
+				  unsigned int         y,
+				  unsigned int         z,
+				  unsigned int         hints,
+				  unsigned int         vc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_inj_fifo_grp_id,
+				  unsigned int         recv_inj_fifo_id
+				 )
+{
+
+  SPI_assert( desc != NULL );
+  SPI_assert( (hints & 0x0000003F) == hints );
+  SPI_assert( vc <= 3 );
+  SPI_assert( inj_ctr_grp_id       < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id           < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS );
+  SPI_assert( recv_inj_fifo_id     < DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+#ifndef NDEBUG
+
+  if ( personality_info.personalityRetrieved == 0 )
+    {
+      DMA_GetPersonalityInfo();
+    }
+
+  SPI_assert( x < personality_info.xNodes );
+  SPI_assert( y < personality_info.yNodes );
+  SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  32;
+
+   /*  Torus Headers */
+
+  desc->hwHdr.Sk      =  1;  /* Don't checksum this packet */
+
+  desc->hwHdr.Hint    = hints;              /* Hint Bits from caller          */
+
+  DMA_SetDescriptorPids( desc,
+			 recv_inj_fifo_grp_id ); /* Pids based on recv fifo   */
+                                                 /* group id                  */
+
+  desc->hwHdr.Chunks    = 1;   /* Size in Chunks of 32B 1 => 64 bytes         */
+  desc->hwHdr.Dm        = 1;   /* 1=DMA Mode, 0=Fifo Mode                     */
+
+  DMA_SetVc(desc,vc);          /* Set virtual channel and dynamic             */
+
+  desc->hwHdr.X         = x;   /* Destination coordinates                     */
+  desc->hwHdr.Y         = y;
+  desc->hwHdr.Z         = z;
+
+  desc->hwHdr.Flags          = 0x1;  /* Flags[7]=Remote-Get                   */
+  desc->hwHdr.iDMA_Fifo_ID   =       /* Destination inj fifo ID               */
+    recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Remote-Get Message
+ *
+ * A local remote-get message is one whose data is directly put by the DMA into
+ * an injection fifo on the local node...it does not go into a reception fifo.
+ * Therefore, the payload of this message is one (or more) descriptors for
+ * another message that is to be injected on the local node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor.  Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * A local remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = 32.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_inj_fifo_id" (see note).
+ *   - Chunks          = Set to (2)-1 = 1.
+ *   - Dm              = 1 (Indicates a DMA packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 1.
+ *   - iDMA_Fifo_ID    = Injection fifo ID where the payload will be injected.
+ *                       Set based on caller's inj_ctr_grp_id and inj_ctr_id.
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_inj_fifo_grp_id  Injection fifo group ID where payload
+ *                                       will be injected on local node
+ *                                       (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in]      recv_inj_fifo_id      Injection fifo ID (within the
+ *                                       recv_inj_fifo_grp_id group)
+ *                                       (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ *       hardware packet header are determined by the recv_inj_fifo_grp_id:
+ *       - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *
+ */
+int  DMA_LocalRemoteGetDescriptor(
+				  DMA_InjDescriptor_t *desc,
+				  unsigned int         inj_ctr_grp_id,
+				  unsigned int         inj_ctr_id,
+				  unsigned int         send_offset,
+				  unsigned int         recv_inj_fifo_grp_id,
+				  unsigned int         recv_inj_fifo_id
+				 )
+{
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id       < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id           < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS );
+  SPI_assert( recv_inj_fifo_id     < DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->local_memcopy  =  1; /* 1 bit */
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  32;
+
+   /*  Torus Headers */
+  DMA_SetDescriptorPids( desc,
+			 recv_inj_fifo_grp_id ); /* Pids based on recv fifo   */
+                                                 /* group id                  */
+
+  desc->hwHdr.Chunks    = 1;   /* Size in Chunks of 32B 1 => 64 bytes         */
+  desc->hwHdr.Dm        = 1;   /* 1=DMA Mode, 0=Fifo Mode                     */
+
+  desc->hwHdr.Flags          = 0x1;  /* Flags[7]=Remote-Get                   */
+  desc->hwHdr.iDMA_Fifo_ID   =       /* Destination inj fifo ID               */
+    recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Message
+ *
+ * A torus memory fifo message is one that is sent to another node and its data
+ * is put into a reception memory fifo by the DMA on the destination node.
+ *
+ * A torus memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = Set based on caller's "vc".
+ *   - VC              = Set to caller's "vc".
+ *   - X,Y,Z           = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (initialized to 0, and unchanged in the first packet.
+ *                          Increased by 240 in each subsequent packet, reflecting
+ *                          the number of bytes transferred in all previous
+ *                          packets).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on the destination node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      x                The destination's x coordinate (8 bits).
+ * \param[in]      y                The destination's y coordinate (8 bits).
+ * \param[in]      z                The destination's z coordinate (8 bits).
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Both of x+ and x- cannot be set at the same
+ *                                  time...same with y and z.
+ * \param[in]      vc               The virtual channel that the packet must go
+ *                                  into if it fails to win the bypass
+ *                                  arbitration in the receiving node.
+ *                                  - 0 = Virtual channel dynamic 0
+ *                                  - 1 = Virtual channel dynamic 1
+ *                                  - 2 = Virtual channel deterministic bubble
+ *                                  - 3 = Virtual channel deterministic priority
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on the
+ *                                  destination node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusMemFifoDescriptor(
+				DMA_InjDescriptor_t *desc,
+				unsigned int         x,
+				unsigned int         y,
+				unsigned int         z,
+				unsigned int         recv_fifo_grp_id,
+				unsigned int         hints,
+				unsigned int         vc,
+				unsigned int         sw_arg,
+				unsigned int         function_id,
+				unsigned int         inj_ctr_grp_id,
+				unsigned int         inj_ctr_id,
+				unsigned int         send_offset,
+				unsigned int         msg_len
+			       )
+{
+  int c;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( (hints & 0x0000003F) == hints );
+  SPI_assert( vc <= 3 );
+  SPI_assert( inj_ctr_grp_id   < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id       < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS );
+
+#ifndef NDEBUG
+
+  if ( personality_info.personalityRetrieved == 0 )
+    {
+      DMA_GetPersonalityInfo();
+    }
+
+  SPI_assert( x < personality_info.xNodes );
+  SPI_assert( y < personality_info.yNodes );
+  SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits       */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  msg_len;
+
+   /*  Torus Headers */
+
+  desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP;    /* Checksum all but header        */
+  desc->hwHdr.Sk        = DMA_CSUM_BIT;     /* Checksum entire packet         */
+  desc->hwHdr.Hint      = hints;            /* Hint Bits from caller          */
+
+  DMA_SetDescriptorPids( desc,
+			 recv_fifo_grp_id ); /* Pids based on recv group id   */
+
+  c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first   */
+                                 /* packet.                                   */
+  SPI_assert( c!=0 );
+  desc->hwHdr.Chunks = c - 1;    /* Packet header has 0 for 1 chunk, ... ,    */
+                                 /* 7 for 8 chunks).                          */
+
+  DMA_SetVc( desc,
+	     vc );                          /* Virtual channel & Dynamic.     */
+
+  desc->hwHdr.X         = x;                /* Destination coordinates        */
+  desc->hwHdr.Y         = y;
+  desc->hwHdr.Z         = z;
+
+  desc->hwHdr.SW_Arg    = sw_arg;           /* User-defined                   */
+  desc->hwHdr.Func_Id   = function_id;      /* Registration id                */
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Memory Fifo Message
+ *
+ * A local memory fifo message is one whose data is put into a reception
+ * memory fifo on the same node by the DMA.
+ *
+ * A local memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 1
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = 0 (not used).
+ *   - Sk              = 0 (not used).
+ *   - Hint            = 0 (not used).
+ *   - Pid0, Pid1      = Set based on caller's "recv_fifo_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = 0 (not used).
+ *   - VC              = 0 (not used).
+ *   - X,Y,Z           = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on this local node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on this
+ *                                  local node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+*/
+int  DMA_LocalMemFifoDescriptor(
+				DMA_InjDescriptor_t *desc,
+				unsigned int         recv_fifo_grp_id,
+				unsigned int         sw_arg,
+				unsigned int         function_id,
+				unsigned int         inj_ctr_grp_id,
+				unsigned int         inj_ctr_id,
+				unsigned int         send_offset,
+				unsigned int         msg_len
+			       )
+{
+  int c;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id   < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id       < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS );
+
+  DMA_ZeroOutDescriptor(desc);
+
+  desc->local_memcopy  =  1; /* 1 bit */
+
+  desc->idma_counterId =
+    inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits       */
+
+  desc->base_offset    =  send_offset;
+  desc->msg_length     =  msg_len;
+
+   /*  Torus Headers */
+  DMA_SetDescriptorPids( desc,
+			 recv_fifo_grp_id ); /* Pids based on recv group id   */
+
+  c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first   */
+                                 /* packet.                                   */
+  SPI_assert( c!=0 );
+  desc->hwHdr.Chunks = c - 1;    /* Packet header has 0 for 1 chunk, ... ,    */
+                                 /* 7 for 8 chunks).                          */
+
+  desc->hwHdr.SW_Arg    = sw_arg;           /* User-defined                   */
+  desc->hwHdr.Func_Id   = function_id;      /* Registration id                */
+
+#ifdef DEBUG_MSG
+  Dump_InjDescriptor(desc);
+#endif
+
+  return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message
+ *
+ * A torus direct put broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data
+ * is directly put into memory on the nodes along that line by the DMA on those
+ * nodes...it does not go into a reception fifo.  Only one hint bit can be
+ * specified, dictating the direction (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum.  Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus direct-put broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_ctr_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 1 (Indicates a direct-put packet).
+ *   - Dynamic         = 0 (Deterministic).
+ *   - VC              = Virtual Channel: Deterministic Bubble Normal.
+ *   - X,Y,Z           = Set according to the hints:
+ *                       Two of the directions are set to this node's
+ *                       coordinates (no movement in those directions).
+ *                       One direction is set to the dest specified
+ *                       by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = Destination message offset (from the reception
+ *                       counter's base address).  Set to caller's recv_offset.
+ *   - rDMA_Counter    = Reception counter ID.  This counter is located on the
+ *                       destination node and contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       recv_ctr_grp_id and recv_ctr_id.
+ *   - Payload_Bytes   = Number of valid bytes in the payload.  Set by iDMA.
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - Func_Id         = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      dest             The final torus destination coordinate
+ *                                  along the line specified by the hints.
+ *                                  Should not exceed the number of nodes in
+ *                                  the direction of travel.
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Only one bit may be specified.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      recv_ctr_grp_id  Reception counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      recv_ctr_id      Reception counter ID (within the recv counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      recv_offset      Offset of the payload from the pa_base
+ *                                  associated with the specified reception
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_ctr_grp_id:
+ *       - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusDirectPutBcastDescriptor(
+				       DMA_InjDescriptor_t *desc,
+				       unsigned int         dest,
+				       unsigned int         hints,
+				       unsigned int         inj_ctr_grp_id,
+				       unsigned int         inj_ctr_id,
+				       unsigned int         send_offset,
+				       unsigned int         recv_ctr_grp_id,
+				       unsigned int         recv_ctr_id,
+				       unsigned int         recv_offset,
+				       unsigned int         msg_len
+				      )
+{
+
+  int dest_x,dest_y,dest_z;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id  < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id      < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( recv_ctr_id     < DMA_NUM_COUNTERS_PER_GROUP );
+
+  /*
+   * Previous code to retrieve our node's x,y,z coords:
+   *   BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z );
+   *
+   * If the node's x,y,z coordinates have not yet been retrieved from the
+   * personality, go get the personality and set the DMA_NodeXCoordinate,
+   * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from
+   * the personality info.  Then, use this to init the dest_x,y,z variables.
+   */
+  if ( personality_info.personalityRetrieved == 0 )
+    {
+      DMA_GetPersonalityInfo();
+    }
+
+  dest_x = personality_info.nodeXCoordinate;
+  dest_y = personality_info.nodeYCoordinate;
+  dest_z = personality_info.nodeZCoordinate;
+
+  /*
+   * Examine the hint bits specified by the caller:
+   * - Ensure only one of them is specified
+   * - Ensure dest is valid for the direction of the broadcast
+   * - Override x, y, or z with dest for the specified direction
+   */
+
+  switch(hints) {
+
+  case  DMA_PACKET_HINT_XP:
+  case  DMA_PACKET_HINT_XM:
+    dest_x = dest;
+    SPI_assert( dest <= personality_info.xNodes );
+    break;
+
+  case  DMA_PACKET_HINT_YP:
+  case  DMA_PACKET_HINT_YM:
+    dest_y = dest;
+    SPI_assert( dest <= personality_info.yNodes );
+    break;
+
+  case  DMA_PACKET_HINT_ZP:
+  case  DMA_PACKET_HINT_ZM:
+    dest_z = dest;
+    SPI_assert( dest <= personality_info.zNodes );
+    break;
+
+  default:
+    SPI_assert(0);
+
+  }
+
+  /* Build the descriptor */
+  DMA_TorusDirectPutDescriptor(desc,
+			       dest_x,
+			       dest_y,
+			       dest_z,
+			       hints,
+			       DMA_PACKET_VC_BN,
+			       inj_ctr_grp_id,
+			       inj_ctr_id,
+			       send_offset,
+			       recv_ctr_grp_id,
+			       recv_ctr_id,
+			       recv_offset,
+			       msg_len);
+
+   /*  set the deposit bit */
+  desc->hwHdr.Dp =1;
+
+
+  return 0;
+}
+
+
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message
+ *
+ * A torus memory fifo broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data is
+ * put into a reception memory fifo by the DMA on the destination nodes along
+ * that line.  Only one hint bit can be specified, dictating the direction
+ * (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum.  Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus memory fifo broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ *   - prefetch_only   = 0
+ *   - local_memcopy   = 0
+ *   - idma_counterId  = Injection counter ID associated with the data being
+ *                       sent.  This counter contains the base address of the
+ *                       message and the message length.  Set based on caller's
+ *                       inj_ctr_grp_id and inj_ctr_id.
+ *   - base_offset     = Message offset (from the injection counter's base
+ *                       address).  Set to caller's send_offset.
+ *   - msg_length      = Message length.  Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ *   - CSum_Skip       = DMA_CSUM_SKIP.
+ *   - Sk              = DMA_CSUM_BIT.
+ *   - Hint            = Set to caller's "hints".
+ *   - Pid0, Pid1      = Set based on caller's "recv_fifo_grp_id" (see note).
+ *   - Chunks          = Set to largest size consistent with msg_len.
+ *   - Dm              = 0 (Indicates a memory fifo packet).
+ *   - Dynamic         = 0 (Deterministic).
+ *   - VC              = Virtual Channel: Deterministic Bubble Normal.
+ *   - X,Y,Z           = Set according to the hints:
+ *                       Two of the directions are set to this node's
+ *                       coordinates (no movement in those directions).
+ *                       One direction is set to the dest specified
+ *                       by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ *   - Put_Offset      = 0 (not used).
+ *   - rDMA_Counter    = 0 (not used).
+ *   - Payload_Bytes   = 0 (not used).
+ *   - Flags           = Pacing     = 0.
+ *                       Remote-Get = 0.
+ *   - iDMA_Fifo_ID    = 0 (not used).
+ *   - SW_Arg          = User-defined 24 bits.  Set to caller's sw_arg.
+ *   - Func_Id         = The registration ID of a function to receive control
+ *                       on the destination node to process the packet.
+ *                       Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out]  desc             Pointer to the storage where the descriptor
+ *                                  will be created.
+ * \param[in]      dest             The final torus destination coordinate
+ *                                  along the line specified by the hints.
+ *                                  Should not exceed the number of nodes in
+ *                                  the direction of travel.
+ * \param[in]      recv_fifo_grp_id Reception fifo group ID
+ *                                  (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in]      hints            Hint bits for torus routing (6 bits).
+ *                                  Each bit corresponds to x+, x-, y+, y-,
+ *                                  z+, z-.  If a bit is set, it indicates that
+ *                                  the packet wants to travel along the
+ *                                  corresponding direction.  If all bits are
+ *                                  zero, the hardware calculates the hint bits.
+ *                                  Only one bit may be specified.
+ * \param[in]      sw_arg           User-defined 24 bits to be placed into the
+ *                                  packets (bits 8-31).
+ * \param[in]      function_id      Function id (8 bit registration ID) of the
+ *                                  function to receive control on the
+ *                                  destination node to process packets for this
+ *                                  message.
+ * \param[in]      inj_ctr_grp_id   Injection counter group ID
+ *                                  (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in]      inj_ctr_id       Injection counter ID (within the inj counter
+ *                                  group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in]      send_offset      Offset of the send payload from the pa_base
+ *                                  associated with the specified injection
+ *                                  counter.
+ * \param[in]      msg_len          Total message length (in bytes).
+ *
+ * \retval  0         Success
+ * \retval  non-zero  Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ *       checksum.  In the first byte of the torus hardware packet header,
+ *       this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ *       The defaults can be changed by changing DMA_CSUM_SKIP and
+ *       DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ *       with the message size.  For example,
+ *       - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ *         with the possible exception of the last packet, which could contain
+ *         fewer chunks (209... of payload + 16 header).
+ *       - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ *       torus hardware packet header are determined by the recv_fifo_grp_id:
+ *       - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ *       - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ *       - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ *       - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *       Pid0 determines into which physical torus fifo group on the destination
+ *       node the packet is put, prior to the dma receiving it.  Other than that,
+ *       the only use for the pid bits is for debug, ie, if headers are being
+ *       saved.
+*/
+int  DMA_TorusMemFifoBcastDescriptor(
+				     DMA_InjDescriptor_t *desc,
+				     unsigned int         dest,
+				     unsigned int         recv_fifo_grp_id,
+				     unsigned int         hints,
+				     unsigned int         sw_arg,
+				     unsigned int         function_id,
+				     unsigned int         inj_ctr_grp_id,
+				     unsigned int         inj_ctr_id,
+				     unsigned int         send_offset,
+				     unsigned int         msg_len
+				    )
+{
+  int dest_x,dest_y,dest_z;
+
+  SPI_assert( desc != NULL );
+  SPI_assert( inj_ctr_grp_id   < DMA_NUM_COUNTER_GROUPS );
+  SPI_assert( inj_ctr_id       < DMA_NUM_COUNTERS_PER_GROUP );
+  SPI_assert( recv_fifo_grp_id < DMA_NUM_COUNTER_GROUPS );
+
+  /*
+   * Previous code to retrieve our node's x,y,z coords:
+   *   BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z );
+   *
+   * If the node's x,y,z coordinates have not yet been retrieved from the
+   * personality, go get the personality and set the DMA_NodeXCoordinate,
+   * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from
+   * the personality info.  Then, use this to init the dest_x,y,z variables.
+   */
+  if ( personality_info.personalityRetrieved == 0 )
+    {
+      DMA_GetPersonalityInfo();
+    }
+
+  dest_x = personality_info.nodeXCoordinate;
+  dest_y = personality_info.nodeYCoordinate;
+  dest_z = personality_info.nodeZCoordinate;
+
+  /*
+   * Examine the hint bits specified by the caller:
+   * - Ensure only one of them is specified
+   * - Ensure dest is valid for the direction of the broadcast
+   * - Override x, y, or z with dest for the specified direction
+   */
+
+  switch(hints) {
+
+  case  DMA_PACKET_HINT_XP:
+  case  DMA_PACKET_HINT_XM:
+    dest_x = dest;
+    SPI_assert( dest <= personality_info.xNodes );
+    break;
+
+  case  DMA_PACKET_HINT_YP:
+  case  DMA_PACKET_HINT_YM:
+    dest_y = dest;
+    SPI_assert( dest <= personality_info.yNodes );
+    break;
+
+  case  DMA_PACKET_HINT_ZP:
+  case  DMA_PACKET_HINT_ZM:
+    dest_z = dest;
+    SPI_assert( dest <= personality_info.zNodes );
+    break;
+
+  default:
+    SPI_assert(0);
+
+  }
+
+  /* Build the descriptor */
+  DMA_TorusMemFifoDescriptor(
+    desc,
+    dest_x,
+    dest_y,
+    dest_z,
+    recv_fifo_grp_id,
+    hints,
+    DMA_PACKET_VC_BN,
+    sw_arg,
+    function_id,
+    inj_ctr_grp_id,
+    inj_ctr_id,
+    send_offset,
+    msg_len);
+
+   /*  set the deposit bit */
+  desc->hwHdr.Dp =1;
+
+
+  return 0;
+}
+EXPORT_SYMBOL(DMA_GetPersonalityInfo) ;
+EXPORT_SYMBOL(DMA_TorusDirectPutDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalDirectPutDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalPrefetchOnlyDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusRemoteGetDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalRemoteGetDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusMemFifoDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalMemFifoDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusDirectPutBcastDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusMemFifoBcastDescriptor) ;
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c
new file mode 100644
index 0000000..171dee1
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c
@@ -0,0 +1,207 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*! \file DMA_InjFifo.c
+ *
+ * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_InjFifo.h.
+ *
+ */
+
+#undef  DEBUG_PRINT
+/* #define DEBUG_PRINT 1 */
+
+#ifndef __LINUX_KERNEL__
+
+#include <common/bgp_personality_inlines.h>
+#include <spi/bgp_SPI.h>
+#include <stdio.h>
+#include <errno.h>
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+
+#endif /* ! __LINUX_KERNEL__ */
+
+/*!
+ *
+ * \brief Remote Get Fifo Full Handler Table
+ *
+ * An array of entries, one per injection fifo.  Each entry specifies the fifo
+ * group structure and the handler function that will receive control to
+ * handle a remote get fifo full condition for fifos in that fifo group.
+ */
+DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS];
+
+
+/*!
+ * \brief Remote Get Fifo Full Init Has Been Done Indicator
+ *
+ *  0 means the initialization has not been done.
+ *  1 means the initialization has been done.
+ */
+int DMA_InjFifoRgetFifoFullInitHasBeenDone = 0;
+
+
+/*!
+ * \brief Pointer to Barrier function Used By Remote Get Fifo Full Interrupt Handler
+ */
+static void (*DMA_RgetFifoFullHandlerBarrierFcn)(void *);
+/*!
+ * \brief Generic arg for Barrier function
+ */
+static void *DMA_RgetFifoFullHandlerBarrierArg;
+
+
+/*!
+ * \brief Remote Get Fifo Full Interrupt Handler
+ *
+ * This function receives control when a remote get fifo becomes full
+ * It attempts to recover from the condition and restart the DMA.
+ * It receives control in all cores (a broadcast interrupt).
+ *
+ * Upon entry, the DMA is assumed to have been stopped, both the iDMA
+ * and the rDMA.  This has been done by the kernel's interrupt
+ * handler that invoked this function.
+ */
+void DMA_InjFifoRgetFifoFullInterruptHandler(uint32_t arg1,
+                                             uint32_t arg2,
+                                             uint32_t arg3,
+                                             uint32_t arg4)
+{
+  uint32_t global_fnum, freeSpaceInBytes;
+  uint32_t core_num = Kernel_PhysicalProcessorID();
+
+  /* If Init has not been done yet, ignore the interrupt.
+   */
+  if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 )
+  {
+    pthread_poof_np();  /*  Return from this interrupt. */
+  }
+
+  /*
+   * Barrier across all cores.  This is needed to ensure that
+   * 1. The DMA has been stopped (only the last core to see this interrupt
+   *    stops the DMA).
+   * 2. We don't exit from this handler until the core that needs to handle
+   *    the rget fifo full condition has cleared the condition causing the
+   *    interrupt, or else it will fire right away again.
+   *
+   * This barrier, while allocated by the main core of each process on the
+   * compute node, has been modified during DMA SPI Setup to expect the
+   * appropriate number of cores to participate.
+   */
+
+  DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg );
+
+  /*
+   * For each injection fifo...
+   *   For each entry of the RgetFifoFullHandlerTable that is managed
+   *   by our core and has a registered rget fifo full handler,
+   *   1. Determine whether this rget fifo is full (or nearly so)
+   *   2. If full, call the registered handler to handle the condition.
+   */
+  for ( global_fnum=0; global_fnum<DMA_NUM_INJ_FIFOS; global_fnum++)
+  {
+    if ( ( DMA_RgetFifoFullHandlerTable[global_fnum].core_num == core_num ) &&
+	 ( DMA_RgetFifoFullHandlerTable[global_fnum].handler ) )
+    {
+      /* The rget fifo is considered full (or nearly so) if there is
+       * only enough freespace in the fifo to hold one descriptor or less.
+       */
+      freeSpaceInBytes =
+	DMA_InjFifoGetFreeSpaceById (
+			      DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr,
+			      global_fnum & 0x1f,  /*  relative fifo number */
+			      1,
+			      1) << 4;
+      if ( freeSpaceInBytes <= (DMA_MIN_INJECT_SIZE_IN_QUADS*16) +
+	                        DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES )
+      {
+	/*
+	 * Call the handler function to free up space in the fifo,
+	 * if possible.
+	 */
+
+	(*(DMA_RgetFifoFullHandlerTable[global_fnum].handler))(
+			DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr,
+			global_fnum & 0x1F,
+			DMA_RgetFifoFullHandlerTable[global_fnum].handler_parm);
+      }
+    }
+  }
+
+  /*
+   * Barrier.  Wait here until all cores reach this point in the interrupt
+   * handler.
+   */
+
+  DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg );
+
+  /*
+   * Exit from the interrupt.
+   */
+  pthread_poof_np();
+}
+
+/*!
+ * \brief Remote Get Fifo Full Initialization
+ *
+ * Initialize data structures and interrupt handlers to handle a remote get
+ * fifo full condition.
+ */
+void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t  rget_interruptGroup,
+                                  void                   (*rget_barrier)(void *) ,
+                                  void                    *rget_barrier_arg
+                                )
+{
+   int i;
+
+   /*
+    * Clear the handler table.
+    */
+   for ( i=0; i<DMA_NUM_INJ_FIFOS; i++ )
+   {
+     DMA_RgetFifoFullHandlerTable[i].fg_ptr       = NULL;
+     DMA_RgetFifoFullHandlerTable[i].handler      = NULL;
+     DMA_RgetFifoFullHandlerTable[i].handler_parm = NULL;
+     DMA_RgetFifoFullHandlerTable[i].core_num     = 0;
+   }
+
+   /*
+    * Clear the lockbox counter associated with this interrupt.
+    * The lockbox keeps track of which cores have entered and exited
+    * the kernel's interrupt handler.
+    */
+   LockBox_FetchAndClear( rget_interruptGroup );
+
+   DMA_RgetFifoFullHandlerBarrierFcn = rget_barrier;
+   DMA_RgetFifoFullHandlerBarrierArg = rget_barrier_arg;
+
+   /*
+    * Register the interrupt handler to handle the remote get
+    * fifo full condition.
+    */
+   Kernel_SetCommThreadConfig(Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, 24),
+                              COMMTHRD_OPCODE_BCAST             |
+                              COMMTHRD_OPCODE_CALLFUNC,
+                              rget_interruptGroup,
+			      DMA_InjFifoRgetFifoFullInterruptHandler,
+			      0, 0, 0, 0);
+}
+
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c
new file mode 100644
index 0000000..c9292e9
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c
@@ -0,0 +1,3017 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2006,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*! \file DMA_RecFifo.c
+ *
+ * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_RecFifo.h.
+ *
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <asm/bitops.h>
+
+#undef  DEBUG_PRINT
+/* #define DEBUG_PRINT 1 */
+
+#ifndef __LINUX_KERNEL__
+
+#include <spi/DMA_RecFifo.h>
+#include <stdio.h>
+#include <bpcore/ppc450_inlines.h>
+#include <bpcore/ic_memmap.h>
+#include <common/bgp_bitnumbers.h>
+#include <errno.h>
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+/* Interrupt encoding for Blue Gene/P hardware).
+ * Given a BIC group and bit index within the group,
+ * bic_hw_to_irq(group, gint) returns the Linux IRQ number.
+ * ( really from asm/bluegene.h but we get mismatches if we include it)
+ */
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#include <linux/dma-mapping.h>
+
+#define TRACE(x) printk x
+
+
+#if defined(BGP_DD1_WORKAROUNDS)
+
+/*!
+ * \brief Number of times the poll functions have been called and returned
+ *        no packets processed.
+ *
+ * Special Value:  -1 means that the Kernel_ClearFullReceptionFifo() syscall
+ *                 has been invoked, but no packets have been processed
+ *                 since.  This tells the poll function that even if it
+ *                 does not process any packets, it should not increment
+ *                 this counter and ultimately issue the syscall again, because
+ *                 there is no need.
+ */
+int NumEmptyPollFunctionCalls = -1;
+
+/*!
+ * \brief Limit for NumEmptyPollFunctionCalls
+ */
+const int NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT = 10;
+
+#endif
+
+#if defined(CONFIG_BGP_STATISTICS)
+int reception_fifo_histogram[33] ;
+unsigned int reception_hi_watermark ;
+#endif
+static inline int get_tlb_pageid(int tlbindex)
+  {
+    int rc ;
+     /*  PPC44x_TLB_PAGEID is 0 */
+    asm volatile( "tlbre  %[rc],%[index],0"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+    return rc ;
+ }
+
+static inline int get_tlb_xlat(int tlbindex)
+  {
+    int rc ;
+     /*  PPC44x_TLB_XLAT is 1 */
+    asm volatile( "tlbre  %[rc],%[index],1"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+    return rc ;
+ }
+
+static inline int get_tlb_attrib(int tlbindex)
+  {
+    int rc ;
+     /*  PPC44x_TLB_ATTRIB is 2 */
+    asm volatile( "tlbre  %[rc],%[index],2"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+    return rc ;
+ }
+
+static inline int search_tlb(unsigned int vaddr)
+  {
+    int rc ;
+     /*  PPC44x_TLB_ATTRIB is 2 */
+    asm volatile( "tlbsx  %[rc],0,%[vaddr]"
+                    : [rc] "=r" (rc)
+                    : [vaddr] "r" (vaddr)
+                    ) ;
+    return rc ;
+ }
+
+static void show_tlbs(unsigned int mioaddr) __attribute__((unused)) ;
+static void show_tlbs(unsigned int mioaddr)
+{
+  int i ;
+  int tlb_index = search_tlb(mioaddr) ;
+  for(i=0;i<64;i+=1)
+    {
+      int pageid=get_tlb_pageid(i) ;
+      int xlat=get_tlb_xlat(i) ;
+      int attrib=get_tlb_attrib(i) ;
+      if( pageid & 0x00000200)
+        {
+          printk(KERN_INFO "tlb[%02d]=[%08x %08x %08x]\n",i,pageid,xlat,attrib) ;
+        }
+    }
+  printk(KERN_INFO "mioaddr=0x%08x tlb_index=%d\n", mioaddr,tlb_index) ;
+}
+
+/* char temp_packet[256] __attribute__ ((aligned ( 16))) ; */
+
+/*!
+ * \brief DMA Reception Fifo Shared Memory Structure
+ *
+ * This structure must be shared among the processors in a compute node.  It
+ * contains info that must be maintained and shared for the duration of a job.
+ * This storage is static, maintained across function calls.
+ * In sharedmemory mode, core 0 maintains this info.
+ * In virtual node mode, each core maintains its own info.
+ *
+ */
+typedef struct DMA_RecFifoSharedMemory_t
+{
+  DMA_RecFifoRecvFunction_t recvFunctions[256]; /*!< The registered "normal"
+                                         reception fifo receive functions.
+                                         Filled in by calls to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  void *recvFunctionsParms[256];    /*!< recvFunctionsParms[i] is the
+                                         parameter to pass to
+                                         recvFunctions[i].
+                                         Filled in by calls to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  DMA_RecFifoRecvFunction_t headerRecvFunction; /*!< The registered "header"
+                                         reception fifo receive function.
+                                         Filled in by a call to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  void *headerRecvFunctionParm;     /*!< The parameter to pass to
+                                         headerRecvFunction.
+                                         Filled in by a call to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  DMA_RecFifoRecvFunction_t errorRecvFunction; /*!< The registered "error"
+                                         reception fifo receive function.
+                                         Defaulted to
+                                         &DMA_RecFifoDefaultErrorRecvFunction.
+                                         Filled in by a call to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  void *errorRecvFunctionParm;      /*!< The parameter to pass to
+                                         errorRecvFunction.
+                                         Filled in by a call to
+                                         DMA_RecFifoRegisterRecvFunction().   */
+
+  DMA_RecFifoGroup_t groups[DMA_NUM_REC_FIFO_GROUPS]; /*!< Reception fifo
+                                         group structures, one for each group.
+                                         groups[i] is the group shared by all
+                                         users of reception fifo group i.     */
+
+  unsigned int groupsInitialized[DMA_NUM_REC_FIFO_GROUPS]; /*!< Indicator of
+                                         groups[i] having been initialized.
+                                         0 = not initialized by a call to
+                                             DMA_RecFifoGetFifoGroup() for
+                                             group i.
+                                         1 = initialized.                     */
+
+} DMA_RecFifoSharedMemory_t;
+
+
+/*!
+ * \brief Storage for the Reception Fifo Shared Memory Structure
+ *
+ * This storage is static, maintained across function calls.
+ * In sharedmemory mode, core 0 maintains reception fifo info.
+ * In virtual node mode, each core maintains its own reception fifo info.
+ */
+static DMA_RecFifoSharedMemory_t DMA_RecFifoInfo;
+
+
+/*!
+ * \brief DMA Packet I/O Vector Structure
+ *
+ * This structure describes the payload of a memory fifo packet.
+ * Because of fifo wrapping, the payload may consist of 0, 1, or 2 segments:
+ * - 0 segments:   this is a packet in the header-only, debug fifo.
+ * - 1 segment:    the packet does not wrap the fifo.
+ * - 2 segments:   the packet does wrap the fifo.
+ *
+ */
+typedef struct DMA_PacketIovec_t
+{
+  int   num_segments;    /*!< Number of segments in the payload               */
+  void *payload_ptr[2] ; /*!< Pointer to the payloads in each segment (NULL
+                              if not used).                                   */
+  int   num_bytes[2];    /*!< Number of payload bytes in each segment (0 if
+                              not used).                                      */
+}
+ALIGN_L1D_CACHE DMA_PacketIovec_t;
+
+
+static void dumpmem(const void *address, unsigned int length, const char * label)
+  {
+    int x ;
+    printk(KERN_INFO "(>)[%s:%d] Memory dump: %s\n",__func__, __LINE__,label) ;
+    for (x=0;x<length;x+=32)
+      {
+        int *v = (int *)(address+x) ;
+        printk(KERN_INFO "%p: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+            v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+            ) ;
+      }
+    printk(KERN_INFO "(<)[%s:%d] Memory dump\n",__func__, __LINE__) ;
+  }
+
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs.  If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed).  In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in]  grp      The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in]  target   The core that will receive the interrupt when a
+ *                      fifo in this group reaches its threshold
+ *                      (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ *                      Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler  A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             normal fifo in this group reaches its threshold.
+ *                             This function must be coded to take 4 uint32_t
+ *                             parameters:
+ *                             - A pointer to storage specific to this
+ *                               handler.  This is the normal_handler_parm
+ *                               specified on this function call.
+ *                             - 3 uint32_t parameters that are not used.
+ *                             If normal_handler is NULL, threshold interrupts
+ *                             are not delivered for normal fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  normal_handler_parm   A pointer to storage that should be passed
+ *                                   to the normal interrupt handling function
+ *                                   (see normal_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  header_handler  ** This parameter is deprecated.  Specify NULL.**
+ *                             A pointer to the function to receive control in
+ *                             the I/O thread to handle the interrupt when a
+ *                             header fifo in this group reaches its threshold.
+ *                             This function must be coded to take 2 parameters:
+ *                               void* A pointer to storage specific to this
+ *                                     handler.  This is the header_handler_parm
+ *                                     specified on this function call.
+ *                               int   The global fifo ID of the fifo that hit
+ *                                     its threshold (0 through
+ *                                     NUM_DMA_REC_FIFOS-1).
+ *                             If header_handler is NULL, threshold interrupts
+ *                             are not delivered for header fifos in this group.
+ *                             Ignored on subsequent call with the same group.
+ * \param[in]  header_handler_parm   ** This parameter is deprecated.  Specify
+ *                                      NULL. **
+ *                                   A pointer to storage that should be passed
+ *                                   to the header interrupt handling function
+ *                                   (see header_handler parameter).
+ *                                   Ignored on subsequent call with the same
+ *                                   group.
+ * \param[in]  interruptGroup  A InterruptGroup_t that identifies the
+ *                             group of interrupts that the fifos in this group
+ *                             will become part of.
+ *                             Ignored on subsequent call with the same group.
+ *
+ * \return  RecFifoGroupStruct  Pointer to a DMA Reception Fifo Group structure
+ *                              that reflects the fifos that are being used in
+ *                              this group.  This same structure is shared by
+ *                              all users of this reception fifo group.
+ *                              NULL is returned if an error occurs.
+ *
+ * \note  The following comments from Phil about the internals of the syscall:
+ *   - error checks
+ *     - 0 <= group_id < 4
+ *     - the start of the fifo group is a valid virtual address (tlb mapped)?
+ *   - disable the rDMA
+ *   - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ *   - loop through the map to determine how many and which fifos in this group
+ *     are used, including headers
+ *   - filling in the addresses of used fifos
+ *     - In particular, any pointer to any fifo in the group that is not used
+ *       will have a null pointer
+ *   - furthermore,
+ *     - write starting values to all used fifos
+ *     - make sure all interrupts are cleared
+ *     - enable rDMA
+ *
+ */
+DMA_RecFifoGroup_t *
+DMA_RecFifoGetFifoGroup(
+			int                               grp,
+			int                               target,
+			Kernel_CommThreadHandler          normal_handler,
+			void                             *normal_handler_parm,
+			Kernel_CommThreadHandler          header_handler,
+			void                             *header_handler_parm,
+			Kernel_InterruptGroup_t           interruptGroup
+		       )
+{
+  int rc;
+
+  TRACE((
+		  KERN_INFO "(>) DMA_RecFifoGetFifoGroup\n"));
+
+  SPI_assert( (0 <= grp   ) && (grp    < DMA_NUM_REC_FIFO_GROUPS ) );
+  SPI_assert( (0 <= target) && (target < DMA_NUM_REC_FIFO_GROUPS ) );
+
+  if ( DMA_RecFifoInfo.groupsInitialized[grp] == 0 ) /* Is                    */
+                                             /* DMA_RecFifoGroups[grp] not    */
+                                             /* filled-in yet?                */
+    {
+      /*
+       * If an interrupt handler has been specified, invoke the system call
+       * to configure the kernel to invoke the handler when the reception
+       * fifo threshold crossed interrupt fires.
+       */
+
+      if (normal_handler)
+      {
+          {
+	/*
+	 * Calculate the IRQ to be one of
+	 * - 28: rec fifo type 0 crossed threshold
+	 * - 29: rec fifo type 0 crossed threshold
+	 * - 30: rec fifo type 0 crossed threshold
+	 * - 31: rec fifo type 0 crossed threshold
+	 * based on the DMA group number.
+	 */
+	unsigned irqInGroup = 28 + grp;
+/*  tjcw ???? not sure what gets the right interrupts ... */
+/*  28+ gives something to do with memory tranfers. */
+/*  we want 8+, which is related to FIFO fullness */
+/*   unsigned irqInGroup = 8 + grp; */
+
+	/*
+	 * Calculate an interrupt ID, which is the BIC interrupt group (2)
+	 * combined with the IRQ number.
+	 */
+/* 	int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G2_HIER_POS, */
+/* 					       irqInGroup); */
+	int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G2_HIER_POS,irqInGroup);
+
+	/*
+	 * Calculate the opcode indicating
+	 * - the target core for interrupt
+	 * - to call the specified function when the interrupt fires
+	 * - to disable interrupts before calling the specified function
+	 * - to enable interrupts after callling the specified function
+	 */
+	int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) |
+	               COMMTHRD_OPCODE_CALLFUNC |
+	               COMMTHRD_OPCODE_DISABLEINTONENTRY |
+	               COMMTHRD_OPCODE_ENABLEINTONPOOF  ;
+
+	/*
+	 * Configure this interrupt with the kernel.
+	 */
+	  TRACE((
+			  KERN_INFO "(=) DMA_RecFifoGetFifoGroup interruptID=%d\n",interruptID));
+	rc = Kernel_SetCommThreadConfig(interruptID,
+					opcode,
+					(uint32_t*)interruptGroup,
+					normal_handler,
+					(uint32_t)normal_handler_parm,
+					(uint32_t)NULL,
+					(uint32_t)NULL,
+					(uint32_t)NULL);
+	if (rc) return NULL;
+          }
+
+      /*
+       * Proceed to get the reception fifo group
+       */
+      rc = Kernel_RecFifoGetFifoGroup( (uint32_t*)&(DMA_RecFifoInfo.groups[grp]),
+				       grp,
+				       target,
+				       (uint32_t) NULL, /* Normal handler.       Not used */
+                                       (uint32_t) NULL, /* Normal handler parm.  Not used */
+                                       (uint32_t) NULL, /* Header handler.       Not used */
+                                       (uint32_t) NULL, /* Header handler parm.  Not used */
+                                       (uint32_t) NULL  /* InterruptGroup.       Not used */
+                                     );
+      if ( rc == 0 ) /* Success? */
+	{
+	  DMA_RecFifoInfo.groupsInitialized[grp] = 1; /* Remember success.    */
+	}
+      else
+	{
+	  return NULL; /* Failure */
+	}
+    }
+    }
+  TRACE((
+		  KERN_INFO "(<) DMA_RecFifoGetFifoGroup\n"));
+
+  return &(DMA_RecFifoInfo.groups[grp]);  /* Return the pointer.              */
+
+}
+
+
+/*!
+ * \brief Register a Reception Fifo Receive Function
+ *
+ * Register a specified receive function to handle packets having a specific
+ * "registration ID".  It returns a registration ID (0-255) that is to be used
+ * in the packet header Func_Id field, such that packets that arrive in a
+ * reception fifo will result in the corresponding receive function being called
+ * when that fifo is processed by a polling or interrupt handler function.
+ *
+ * \param[in]  recv_func          Pointer to the receive function.
+ * \param[in]  recv_func_parm     Arbitrary pointer to be passed to the
+ *                                recv_func when it is called.
+ * \param[in]  is_error_function  1 means this is the receiver function
+ *                                to be called if a packet contains an invalid
+ *                                (unregistered) registration ID.  The return
+ *                                value from this function is zero, indicating
+ *                                success, not indicating a registration ID.
+ *                                A default function is provided if one is not
+ *                                registered.  If there is already a non-default
+ *                                error receive function registered, -EBUSY is
+ *                                returned.
+ *                                0 means this is not the error receiver
+ *                                function.
+ * \param[in]  is_header_fifo     Indicates whether the fifo is normal or
+ *                                header:
+ *                                - 0 is normal.  The return code is the
+ *                                  registration ID.
+ *                                - 1 is header.  The return code is 0,
+ *                                  indicating success, because packets in
+ *                                  header fifos are direct-put packets, and
+ *                                  hence have no registration ID.
+ *                                If there is already a header receive function
+ *                                registered, -EBUSY is returned.
+ *
+ * If both is_error_function and is_header_fifo are 1, -EINVAL is returned.
+ *
+ * \retval   0            This is a registration ID if is_error_function=0 and
+ *                        is_header_fifo=0.  Otherwise, it indicates success.
+ *           1-255        This is a registration ID.  Successful.
+ *           negative     Failure.  This is a negative errno value.
+ *
+ * \see DMA_RecFifoDeRegisterRecvFunction
+ */
+static int DMA_RecFifoRegisterRecvFunction_next_free_ID = 0;
+int DMA_RecFifoRegisterRecvFunction(
+			        DMA_RecFifoRecvFunction_t  recv_func,
+				void                      *recv_func_parm,
+				int                        is_error_function,
+				int                        is_header_fifo
+				)
+{
+  int next_free_ID = DMA_RecFifoRegisterRecvFunction_next_free_ID;
+  int i;
+
+  /* Perform error checks */
+  if ( ( recv_func == NULL ) ||
+       ( ( is_error_function != 0 ) &&
+	 ( is_error_function != 1 ) ) ||
+       ( ( is_header_fifo    != 0 ) &&
+	 ( is_header_fifo    != 1 ) ) ||
+       ( ( is_header_fifo == 1 ) && ( is_error_function == 1 ) ) )
+  {
+    return -EINVAL;
+  }
+
+  /*
+   * Handle a "header" receive function.
+   */
+  if ( is_header_fifo == 1 )
+    {
+      if ( DMA_RecFifoInfo.headerRecvFunction != NULL ) /* Already registered?*/
+	{
+	  return -EBUSY;
+	}
+      DMA_RecFifoInfo.headerRecvFunction     = recv_func;
+      DMA_RecFifoInfo.headerRecvFunctionParm = recv_func_parm;
+      return 0; /* Indicate success */
+    }
+
+  /*
+   * Handle a "error" receive function.
+   */
+  if ( is_error_function == 1 )
+    {
+      if ( DMA_RecFifoInfo.errorRecvFunction !=
+	     &DMA_RecFifoDefaultErrorRecvFunction ) /* Already registered? */
+	{
+	  return -EBUSY;
+	}
+      DMA_RecFifoInfo.errorRecvFunction     = recv_func;
+      DMA_RecFifoInfo.errorRecvFunctionParm = recv_func_parm;
+      return 0; /* Indicate success */
+    }
+
+  /*
+   * Handle a "normal" receive function.
+   */
+
+  for (i=next_free_ID; i < 256; i++) /* Search for an empty slot */
+    {
+      if ( DMA_RecFifoInfo.recvFunctions[i] == NULL ) /* Found a slot? */
+	{
+	  DMA_RecFifoInfo.recvFunctions[i]      = recv_func;
+	  DMA_RecFifoInfo.recvFunctionsParms[i] = recv_func_parm;
+	  next_free_ID = i;
+	  return i; /* Return the registration ID */
+	}
+    }
+  DMA_RecFifoRegisterRecvFunction_next_free_ID = next_free_ID;
+
+  return -EBUSY; /* No open slots */
+
+}
+
+
+/*!
+ * \brief De-Register a Reception Fifo Receive Function
+ *
+ * De-register a previously registered receive function.
+ *
+ * \param[in]  registrationId     Registration Id returned from
+ *                                DMA_RecFifoRegisterRecvFunction (0..255).
+ *                                A negative value means that no
+ *                                registration id is specified.
+ * \param[in]  is_error_function  1 means the error receive function is
+ *                                to be de-registered.
+ *                                0 otherwise.
+ * \param[in]  is_header_fifo     1 means the header fifo receive function is
+ *                                to be de-registered.
+ *                                0 otherwise.
+ *
+ * \retval   0            Success
+ *           negative     Error value
+ *
+ * \see DMA_RecFifoRegisterRecvFunction
+ */
+int DMA_RecFifoDeRegisterRecvFunction(
+				      int registrationId,
+				      int is_error_function,
+				      int is_header_fifo
+				     )
+{
+  /* Perform error checks */
+  if ( ( registrationId > 255 ) ||
+       ( ( is_error_function != 0 ) &&
+	 ( is_error_function != 1 ) ) ||
+       ( ( is_header_fifo    != 0 ) &&
+	 ( is_header_fifo    != 1 ) ) )
+  {
+    return -EINVAL;
+  }
+
+  /*
+   * Handle a "header" receive function.
+   */
+  if ( is_header_fifo == 1 )
+  {
+    DMA_RecFifoInfo.headerRecvFunction     = NULL;
+    DMA_RecFifoInfo.headerRecvFunctionParm = NULL;
+  }
+
+  /*
+   * Handle a "error" receive function.
+   */
+  if ( is_error_function == 1 )
+  {
+    DMA_RecFifoInfo.errorRecvFunction     = NULL;
+    DMA_RecFifoInfo.errorRecvFunctionParm = NULL;
+  }
+
+  /*
+   * Handle a "normal" receive function.
+   */
+
+  if ( registrationId >= 0 )
+  {
+    DMA_RecFifoInfo.recvFunctions[registrationId]      = NULL;
+    DMA_RecFifoInfo.recvFunctionsParms[registrationId] = NULL;
+    DMA_RecFifoRegisterRecvFunction_next_free_ID = 0; /* Start at beginning next time */
+  }
+
+  return 0;
+
+}
+
+
+/*!
+ * \brief DMA Reception Fifo Default Error Receive Function
+ *
+ * This is the default function that will handle packets having an
+ * unregistered registration ID.
+ *
+ * \param[in]  f_ptr           Pointer to the reception fifo.
+ * \param[in]  packet_ptr      Pointer to the packet header (== va_head).
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  recv_func_parm  Pointer to storage specific to this receive
+ *                             function.  This pointer was specified when the
+ *                             receive function was registered with the kernel,
+ *                             and is passed to the receive function
+ *                             unchanged.
+ * \param[in]  payload_ptr     Pointer to the beginning of the payload.
+ *                             This is quad-aligned for optimized copying.
+ * \param[in]  payload_bytes   Number of bytes in the payload
+ *
+ * \retval  -1  An unregistered packet was just processed.  This is considered
+ *              an error.
+ */
+int  DMA_RecFifoDefaultErrorRecvFunction(
+					 DMA_RecFifo_t      *f_ptr,
+					 DMA_PacketHeader_t *packet_ptr,
+					 void               *recv_func_parm,
+					 char               *payload_ptr,
+					 int                 payload_bytes
+					)
+{
+  int i;
+
+  printf ( "\nUnregistered Packet Received in Reception Fifo %d\n",
+	   f_ptr->global_fifo_id);
+
+  printf ( "Packet Header:\n");
+  printf ( "%08x%08x%08x%08x\n",*((int*)&packet_ptr[0]),
+	                        *((int*)&packet_ptr[4]),
+	                        *((int*)&packet_ptr[8]),
+	                        *((int*)&packet_ptr[12]));
+  printf ( "Packet Payload:\n");
+
+  for (i=0; i<payload_bytes; i+=16);
+    {
+      printf ( "%08x%08x%08x%08x\n",*((int*)&payload_ptr[i]),
+	                            *((int*)&payload_ptr[i+4]),
+                                    *((int*)&payload_ptr[i+8]),
+	                            *((int*)&payload_ptr[i+12]));
+    }
+
+  SPI_assert(0);
+
+  return -1;
+}
+
+
+/*!
+ * \brief DMA Reception Fifo Get Addresses
+ *
+ * Analyze the packet at the head of the reception fifo and return a
+ * DMA_PacketIovec_t describing the payload of the packet.  In particular,
+ * determine if the packet is contiguous in the fifo, or whether it wraps
+ * around to the start of the fifo.
+ *
+ * \param[in]      f_ptr   Pointer to the reception fifo structure.
+ * \param[in,out]  io_vec  Pointer to the packet I/O vector structure to
+ *                         be filled in.
+ *
+ * \return  The io_vec structure has been filled-in.
+ *
+ * \pre  The caller has determined that the fifo has a packet in it (it
+ *       is not empty).
+ *
+ * \note
+ * - For non-header packets, only non-DMA packets (memory fifo packets)
+ *   are in the fifo and need to be handled.
+ */
+void DMA_RecFifoGetAddresses(
+			     DMA_RecFifo_t     *f_ptr,
+			     DMA_PacketIovec_t *io_vec
+			    )
+{
+  DMA_PacketHeader_t *packet_ptr;
+  unsigned int        payload_bytes;
+  unsigned int        payload_bytes_to_end_of_fifo = 0;
+
+  SPI_assert( f_ptr  != NULL );
+  SPI_assert( io_vec != NULL );
+
+      if ( f_ptr->global_fifo_id < DMA_NUM_NORMAL_REC_FIFOS )  /* Is this a   */
+	                                                       /* normal fifo?*/
+	{ /* Yes.  Process a normal packet */
+	  packet_ptr = (DMA_PacketHeader_t*)f_ptr->dma_fifo.va_head; /* Point */
+    	                                                   /*  to the packet. */
+
+	  payload_bytes = ( (packet_ptr->Chunks + 1) << 5 ) -
+	    sizeof(DMA_PacketHeader_t);           /* Calculate payload bytes. */
+
+	  io_vec->payload_ptr[0] =
+	    (char*)packet_ptr +
+	      sizeof(DMA_PacketHeader_t);         /* Set first payload ptr    */
+
+	  /* Determine if the payload is contiguous in the fifo, and set up   */
+	  /* the iovec accordingly.                                           */
+	  if ( ( payload_bytes <= 16 ) || /* A 32-byte packet will always be  */
+               	                          /* contiguous...this is an          */
+                                          /* optimization to avoid the next   */
+                                          /* set of calculations.             */
+	       ( payload_bytes <=         /* Calculate how much space to the  */
+		 ( payload_bytes_to_end_of_fifo = /* end of the fifo.         */
+		   ( (unsigned)f_ptr->dma_fifo.va_end - /* Check if entire    */
+		     (unsigned)io_vec->payload_ptr[0] ) ) ) ) /* payload fits.*/
+	    {
+	      /* Set up io_vec for contiguous payload                         */
+	      io_vec->num_segments   = 1;  /* Indicate contiguous payload.    */
+	      io_vec->num_bytes[0]   = payload_bytes;
+	      io_vec->payload_ptr[1] = NULL;
+	      io_vec->num_bytes[1]   = 0;
+	      return;
+	    }
+	  else
+	    { /* Set up io_vec for non-contiguous payload.                    */
+
+	      io_vec->num_segments   = 2; /* Indicate split payload.          */
+	      io_vec->num_bytes[0]   = payload_bytes_to_end_of_fifo;
+	      io_vec->payload_ptr[1] = f_ptr->dma_fifo.va_start;
+	      io_vec->num_bytes[1]   = payload_bytes -
+		                         payload_bytes_to_end_of_fifo;
+	      return;
+	    }
+	} /* End: Non-header packet */
+
+      else /* Header packet. */
+
+	{ /* Header packet */
+	  io_vec->num_segments   = 0;    /* Indicate header fifo.             */
+	  io_vec->payload_ptr[0] = NULL; /* Everything else is NULL or zero.  */
+	  io_vec->payload_ptr[1] = NULL;
+	  io_vec->num_bytes[0]   = 0;
+	  io_vec->num_bytes[1]   = 0;
+	  return;
+	}
+
+} /* End: DMA_RecFifoGetAddresses() */
+
+
+/*!
+ * \brief Get Index of Next Reception Fifo in Group
+ *
+ * A reception fifo group contains up to DMA_NUM_REC_FIFOS_PER_GROUP.
+ * It contains an array of fifos.  Up to fg_ptr->num_normal_fifos normal
+ * fifos are in the first array slots.  Up to 1 header fifo is in the
+ * last array slot.
+ *
+ * This function returns the array index of the next normal fifo in the group
+ * that is being used, based upon the desired fifo_index and the not-empty
+ * status.
+ *
+ * If *not_empty_status is -1, the status is fetched from the DMA SRAM (first
+ * time condition).
+ *
+ * If the DMA SRAM not-empty status for this group is all zero (all fifos are
+ * empty), the status is checked num_empty_passes times with a slight delay
+ * in between to give the DMA time to make progress before returning a -1,
+ * indicating that there is nothing more to process.
+ *
+ * \param[in]  fg_ptr              Pointer to the fifo group
+ * \param[in]  desired_fifo_index  Index of the fifo that is desired to be
+ *                                 processed.
+ * \param[in,out]  fifo_bit        Pointer to the bit in the not_empty_status
+ *                                 that corresponds to the desired_fifo_index
+ *                                 (on input) and the returned next_fifo_index
+ *                                 (on output).
+ * \param[in]  num_empty_passes    When the not-empty status indicates that all
+ *                                 fifos in the group are emtpy, this is the
+ *                                 number of times the not-empty status is
+ *                                 re-fetched and re-checked before officially
+ *                                 declaring that they are indeed empty
+ *                                 (0 means no extra passes are made).
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in,out]  not_empty_status  Pointer to the location to shadow the
+ *                                   not empty status.
+ *
+ * \retval  next_fifo_index  Index of the next fifo in the group to be
+ *                           processed.
+ * \retval  -1               Indicates that the normal fifos in the group are
+ *                           all empty.
+ *
+ * \post The va_tail of the fifo that is returned has been refreshed from
+ *       the DMA hardware.
+ *
+ */
+__INLINE__ int DMA_RecFifoGetNextFifo(
+				      DMA_RecFifoGroup_t *fg_ptr,
+				      int                 desired_fifo_index,
+				      unsigned int       *fifo_bit,
+				      int                 num_empty_passes,
+				      int                 not_empty_poll_delay,
+				      unsigned int       *not_empty_status
+				     )
+{
+  unsigned int status     = *not_empty_status; /* Make a local copy */
+  unsigned int status_bit = *fifo_bit;
+  int          fifo_index = desired_fifo_index;
+
+  /*
+   * If *not_empty_status is 0, either the status has not been fetched yet
+   * (first-time condition), or all fifos were emptied.  Go fetch the
+   * not-empty status again.
+   */
+  if ( status ==  0 )
+    {
+      status = DMA_RecFifoGetNotEmpty( fg_ptr,
+				       0 );   /* Get Normal fifo   */
+                                              /* not-empty status. */
+      *not_empty_status = status; /* Return the status to the caller */
+
+#ifdef DEBUG_PRINT
+      printf("New notEmptyStatus1=0x%08x\n",*not_empty_status);
+#endif
+    }
+
+  /*
+   * If the DMA SRAM not-empty status for this group is all zero (all fifos are
+   * empty), the status is checked num_empty_passes times with a slight delay
+   * in between to give the DMA time to make progress before returning a -1,
+   * indicating that there is nothing more to process.
+   */
+  while ( ( status == 0 ) &&
+	  ( num_empty_passes-- > 0 ) )
+    {
+      /* Delay, allowing the DMA to update its status */
+      unsigned int pclks = not_empty_poll_delay;
+      while( pclks-- )
+	{
+	  asm volatile("nop;");
+	}
+
+      /* Re-fetch the not-empty status */
+      status = DMA_RecFifoGetNotEmpty( fg_ptr,
+				       0 );   /* Get Normal fifo   */
+                                              /* not-empty status. */
+      *not_empty_status = status; /* Return the status to the caller */
+
+#ifdef DEBUG_PRINT
+      printf("New notEmptyStatus2=0x%08x\n",*not_empty_status);
+#endif
+    }
+
+  if ( status == 0 ) return (-1);  /* Can't find any not empty     */
+
+  /*
+   * We have some fifos that are not empty.
+   * Determine the fifo_index to be returned.
+   * Loop until we hit a non-empty fifo.
+   */
+#ifdef DEBUG_PRINT
+  printf("Checking status1 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit);
+#endif
+
+  while ( ( status & status_bit ) == 0 )
+    {
+      fifo_index++;                     /* Try next fifo.                     */
+      if ( fifo_index >= fg_ptr->num_normal_fifos ) /* Wrap?                  */
+	fifo_index = 0;                 /* Start over with zero.              */
+
+      status_bit = _BN(fg_ptr->fifos[fifo_index].global_fifo_id); /* Map to   */
+     		                        /* proper not-empty bit.              */
+
+#ifdef DEBUG_PRINT
+      printf("Checking status2 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit);
+#endif
+    }
+
+  /* Refresh the tail because the DMA may have moved it */
+  DMA_RecFifoGetTailById( fg_ptr,
+			  fifo_index );
+
+  *fifo_bit = status_bit;               /* Return the fifo index and its bit  */
+
+#ifdef DEBUG_PRINT
+  printf("Returning fifo_index=%d, status bit 0x%08x\n",fifo_index,status_bit);
+#endif
+
+  return (fifo_index);
+
+} /* End: DMA_RecFifoGetNextFifo() */
+
+
+/*!
+ * \brief Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1.  Total_packets packets are received
+ * 2.  All the fifos are empty
+ * 3.  A receive function returns a non-zero value
+ * 4.  The last packet removed from a fifo has an invalid registration id.  The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group, beginning with array index starting_index. If a fifo has a packet,
+ * the appropriate receive function is called.  Upon return, the packet is
+ * removed from the fifo (the fifo head is moved past the packet).
+ *
+ * After processing packets_per_fifo packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed.  When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  total_packets     The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  packets_per_fifo  The maximum number of packets that will be
+ *                               processed in a given fifo before switching
+ *                               to the next fifo.
+ * \param[in]  starting_index    The fifos in the fifo group are maintained
+ *                               in an array.  This is the array index of the
+ *                               first fifo to be processed (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  num_empty_passes  The number of passes over the normal fifos
+ *                               while they are empty that this function
+ *                               should tolerate before giving up and
+ *                               returning.  This is an optimization
+ *                               to catch late arriving packets.
+ *                               (0 means no extra passes are made).
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ * \param[out] next_fifo_index   Pointer to an int where the recommended
+ *                               starting_index for the next call is returned.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ *                                next_fifo_index is set.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.  next_fifo_index is
+ *                                set.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ * \note next_fifo_index is set to the index of the fifo that had the last
+ *       packet received if all packets_per_fifo packets were not received from
+ *       that fifo.  However, if all packets_per_fifo packets were received
+ *       from that fifo, the index of the next fifo will be returned.
+ *
+ */
+int DMA_RecFifoPollNormalFifos(int                 total_packets,
+			       int                 packets_per_fifo,
+			       int                 starting_index,
+			       int                 num_empty_passes,
+			       int                 not_empty_poll_delay,
+			       DMA_RecFifoGroup_t *fg_ptr,
+			       int                *next_fifo_index
+			      )
+{
+  int fifo_index;                          /* Index of fifo being processed   */
+  unsigned int fifo_bit_number;            /* The bit number of the fifo      */
+                                           /* being processed.  Group0: 0-7,  */
+                                           /* Group1: 8-15, Group2: 16-23,    */
+                                           /* Group3: 24-31.  Corresponds to  */
+                                           /* the DMA not-empty status bits.  */
+  int num_fifos_in_group;                  /* Number of fifos in this group.  */
+  int num_packets_in_fifo;                 /* Count of packets processed in a */
+					   /* fifo.                           */
+  unsigned int not_empty_status=0;         /* Snapshot of the not empty status*/
+                                           /* for this group.  0 indicates    */
+                                           /* that no snapshot has occurred   */
+                                           /* yet.                            */
+  int rc = 0;                              /* Return code from recv_func.     */
+  int num_processed = 0;                   /* Number of packets processed     */
+  DMA_PacketIovec_t io_vec;                /* Payload I/O vector              */
+  DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function     */
+  void                     *recv_func_parm;/* Receive function parameter      */
+  int                       recv_func_id;  /* Function ID from the packet     */
+                                           /* header.                         */
+  void                  *recv_func_payload;/* Pointer to recv func payload    */
+  void                  *recv_func_packet; /* Pointer to recv func packet     */
+  DMA_RecFifo_t *fifo_ptr;                 /* Pointer to fifo being processed */
+  char temp_packet[256] ALIGN_QUADWORD;    /* Temporary packet copy.          */
+                                           /* Align for efficient copying.    */
+  char *load_ptr, *store_ptr;              /* Used for copying bytes          */
+  int num_quads;                           /* Number of quads to copy         */
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+
+  SPI_assert( total_packets     > 0 );
+  SPI_assert( packets_per_fifo  > 0 );
+  SPI_assert( packets_per_fifo <= total_packets );
+  SPI_assert( num_empty_passes  >= 0 );
+  SPI_assert( fg_ptr           != NULL );
+  SPI_assert( next_fifo_index  != NULL );
+  SPI_assert( ( starting_index >= 0 ) &&
+	   ( starting_index < fg_ptr->num_normal_fifos ) );
+
+  num_fifos_in_group = fg_ptr->num_normal_fifos;
+  *next_fifo_index = starting_index; /* Tell caller to start with the same    */
+                                     /* fifo next time.                       */
+  fifo_index       = starting_index; /* Start with the fifo the caller says to*/
+
+#ifdef DEBUG_PRINT
+  int i;
+  for (i=0; i<fg_ptr->num_normal_fifos; i++)
+    printf("FifoIndex=%d <--> GlobalID=%d\n",i,fg_ptr->fifos[i].global_fifo_id);
+#endif
+
+  /*
+   * Circularly loop through the not-empty fifos in the fifo group.
+   * Keep going until one of the termination conditions documented in the
+   * prolog occurs.
+   *
+   */
+  for (;;)
+    {
+      /*
+       * Find the next fifo to process.
+       */
+      fifo_ptr        = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/
+      fifo_bit_number = _BN(fifo_ptr->global_fifo_id);/* The fifo's status bit*/
+
+      fifo_index = DMA_RecFifoGetNextFifo(fg_ptr,
+					  fifo_index,
+					  &fifo_bit_number,
+					  num_empty_passes,
+					  not_empty_poll_delay,
+					  &not_empty_status);
+      if (fifo_index < 0) { /* No more packets to process? */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+	/*
+	 * If there are no more non-empty fifos, count the number of consecutive
+	 * times the poll function came up dry (num_processed == 0), and if it
+	 * exceeds a threshold, issue a system call to clear the rDMA's "full
+	 * reception fifo" condition so it begins to receive packets again.
+	 *
+	 * When a non-empty fifo is returned, its shadow va_tail pointer has been
+	 * updated to reflect the amount of packet data in the fifo.
+	 */
+	if (num_processed > 0) { /* Did we process at least 1 packet? */
+	  NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                                 /* likely not encountered a fifo full */
+ 	                                 /* condition and stopped.  Reset the  */
+    	                                 /* fifo counter so we will start      */
+                                         /* tracking empty calls to poll.      */
+	}
+	else {
+	  if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+	       (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+	     /*  printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+	    rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+                                             /* reception fifos filled and the */
+                                             /* DMA has stopped.               */
+	     /*  printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+	    NumEmptyPollFunctionCalls = -1; /* The DMA is active.  Reset the    */
+                                            /* fill-fifo counter.               */
+	  }
+	}
+#endif
+	 /* 	printf("Poll: returned %d processed\n",num_processed); */
+	return (num_processed);
+      }
+
+      *next_fifo_index = fifo_index; /* Tell caller to start with this fifo   */
+                                     /* next time.                            */
+      fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+      num_packets_in_fifo = 0;
+
+      /*
+       * MSYNC before we look at the data in the fifo to ensure that snoops
+       * issued by the DMA have completed.  This ensures the L1 cache
+       * invalidations have completed so we don't look at stale data.
+       */
+      _bgp_msync();
+
+      /*
+       * Within a fifo: The area between the va_head and va_tail shadow pointers
+       * contains packets to be processed.  Loop, processing those packets until
+       * we have processed packets_per_fifo of them, or all of them, or other
+       * issues come up.
+       *
+       */
+#if defined(CONFIG_BGP_STATISTICS)
+      {
+      unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head)
+                 ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 )
+                 : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) )
+                 ;
+                 reception_fifo_histogram[fls(used_space)] += 1 ;
+      }
+#endif
+      while ( ( num_packets_in_fifo < packets_per_fifo ) &&
+	      ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+	{
+	  DMA_RecFifoGetAddresses( fifo_ptr,
+				   &io_vec ); /* Get the payload pointer(s)   */
+      	                                      /* for the packet at the head   */
+                                              /* of the fifo.                 */
+
+	  packet_ptr = (DMA_PacketHeader_t*)
+	                 fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+
+#ifdef DEBUG_PRINT
+	  printf("ReceivedPacketHead = 0x%08x\n",(unsigned)packet_ptr);
+	  printf("ReceivedPacketIovec= 0x%08x %d, 0x%08x %d\n",
+		 (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0],
+		 (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]);
+#endif
+	  /*
+	   * Determine the receive function to call.  Index into
+	   * recvFunctions array is in the packet header.
+	   */
+	  recv_func_id  = packet_ptr->Func_Id;
+	  recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+	  if ( recv_func_ptr != NULL )
+	    {
+	      recv_func_parm =
+		DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+	    }
+	  else
+	    {
+	      recv_func_ptr  = DMA_RecFifoInfo.errorRecvFunction;
+	      recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+	    }
+	  /*
+	   * Use a temporary copy of the packet, when the payload
+	   * wraps.
+	   */
+	  if ( io_vec.num_segments > 1 )
+	    {
+#ifdef DEBUG_PRINT
+	      printf("Payload Wraps: Packet Header: 0x%08x, Iovecs: 0x%08x %d, 0x%08x %d\n",
+		     (unsigned)packet_ptr,
+		     (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0],
+		     (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]);
+#endif
+
+	      /* Copy packet header and first payload segment */
+	      load_ptr  = (char*)packet_ptr;
+	      store_ptr = temp_packet;
+	      num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+	      while ( num_quads > 0 )
+		{
+#ifdef DEBUG_PRINT
+		  printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n",
+			 (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4),
+			 *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12));
+#endif
+		  _bgp_QuadLoad ( load_ptr,     0 );
+
+		  _bgp_QuadStore( store_ptr,    0 );
+#ifdef DEBUG_PRINT
+		  printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n",
+			 (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4),
+			 *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12));
+#endif
+
+		  load_ptr  += 16;
+		  store_ptr += 16;
+		  num_quads--;
+		}
+	      /* Copy second payload segment */
+	      load_ptr  = (char*)io_vec.payload_ptr[1];
+	      num_quads = io_vec.num_bytes[1] >> 4;
+	      while ( num_quads > 0 )
+		{
+#ifdef DEBUG_PRINT
+		  printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n",
+			 (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4),
+			 *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12));
+#endif
+		  _bgp_QuadLoad ( load_ptr,     0 );
+
+		  _bgp_QuadStore( store_ptr,    0 );
+#ifdef DEBUG_PRINT
+		  printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n",
+			 (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4),
+			 *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12));
+#endif
+		  load_ptr  += 16;
+		  store_ptr += 16;
+		  num_quads--;
+		}
+	      recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+	      recv_func_packet  = temp_packet;
+
+	    } /* End: Set up temporary copy of split packet */
+
+	  else /* Set up for contiguous packet */
+	    {
+	      recv_func_payload = (char*)packet_ptr +
+		sizeof(DMA_PacketHeader_t);
+	      recv_func_packet  = packet_ptr;
+	    }
+
+	  /* Call the receive function */
+	  if( recv_func_ptr )
+	    {
+	  rc = (*recv_func_ptr)(fifo_ptr,
+				recv_func_packet,
+				recv_func_parm,
+				recv_func_payload,
+				io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+	    }
+	  else
+	    {
+	      printk(KERN_ERR "DMA_RecFifoPollNormalFifos recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+	          recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+	    }
+
+	  /* Increment the head by the size of the packet */
+	  DMA_RecFifoIncrementHead(fifo_ptr,
+				   (io_vec.num_bytes[0]+
+				    io_vec.num_bytes[1] +
+				    sizeof(DMA_PacketHeader_t))>> 4);
+
+	  num_processed++;
+
+	  if ( rc != 0 ) /* Did receive function fail? */
+	    {
+#if defined(BGP_DD1_WORKAROUNDS)
+  	      NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                                     /* likely not encountered a fifo full */
+ 	                                     /* condition and stopped.  Reset the  */
+    	                                     /* fifo counter so we will start      */
+                                             /* tracking empty calls to poll.      */
+#endif
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   fifo_bit_number,
+						   0 );
+	      return (rc); /* Yes...return that return code */
+	    }
+
+	  if ( num_processed >= total_packets ) /* Got what they wanted? */
+	    {
+#if defined(BGP_DD1_WORKAROUNDS)
+  	      NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                                     /* likely not encountered a fifo full */
+ 	                                     /* condition and stopped.  Reset the  */
+    	                                     /* fifo counter so we will start      */
+                                             /* tracking empty calls to poll.      */
+#endif
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   fifo_bit_number,
+						   0 );
+	      return (num_processed); /* Yes...all done */
+	    }
+
+	  num_packets_in_fifo++;
+
+	} /* End: Process up to packets_per_fifo packets in this fifo */
+
+      /*
+       * We exited the loop processing the fifo_index fifo.
+       * - If we exited because we reached the packets_per_fifo limit, we want
+       *   to turn off this fifo's not-empty status in our shadow copy of the
+       *   status so we process all of the other fifos before re-fetching the
+       *   true status, giving this fifo another chance.
+       * - If we exited because the fifo was empty according to our snapshot
+       *   of the fifo's tail (head == tail snapshot), we want to turn off this
+       *   fifo's not-empty status in our shadow copy of the status so we
+       *   process all of the other fifos before re-fetching the true status and
+       *   tail for this fifo, giving this fifo another chance.
+       * Either way, we turn off the status bit.
+       *
+       */
+      not_empty_status &= ~(fifo_bit_number);
+
+      /* Clear the threshold crossed condition, in case we have gone below
+       * the threshold.
+       */
+      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+					   fifo_bit_number,
+					   0 );
+
+#ifdef DEBUG_PRINT
+      printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status);
+#endif
+
+      /* Bump to next fifo */
+      fifo_index = (fifo_index+1) % num_fifos_in_group;
+
+      /*
+       * If we have processed the max number of packets from the previous fifo,
+       * the recommended next fifo to process is the one after that.
+       *
+       */
+      if ( num_packets_in_fifo == packets_per_fifo )
+	{
+	  *next_fifo_index  = fifo_index;
+	}
+
+    } /* End: Keep looping through the fifos. */
+
+} /* End: DMA_RecFifoPollNormalFifos() */
+
+
+
+
+
+static int dumpmem_count ;
+
+static inline void quadcpy(void *dest, const void *src)
+{
+	unsigned int *desti=(unsigned int *) dest ;
+	const unsigned int *srci=(const unsigned int *) src ;
+	unsigned int w0 = srci[0] ;
+	unsigned int w1 = srci[1] ;
+	unsigned int w2 = srci[2] ;
+	unsigned int w3 = srci[3] ;
+	desti[0] = w0 ;
+	desti[1] = w1 ;
+	desti[2] = w2 ;
+	desti[3] = w3 ;
+}
+/*!
+ * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  num_packets packets are received
+ * 2.  The specified fifo is empty
+ * 3.  A receive function returns a non-zero value
+ * 4.  The last packet removed from the fifo has an invalid registration id. The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  num_packets       The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  fifo_id           The ID of the fifo to be polled.
+ *                               (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  num_empty_passes    When the not-empty status indicates that all
+ *                                 fifos in the group are emtpy, this is the
+ *                                 number of times the not-empty status is
+ *                                 re-fetched and re-checked before officially
+ *                                 declaring that they are indeed empty.
+ *                                 (0 means no extra passes are made).
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ *
+ * \param[in]  empty_callback    Function to call when spinning because the FIFO looks empty.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoPollNormalFifoById( int                 num_packets,
+				   int                 fifo_id,
+				   int                 num_empty_passes,
+				   int                 not_empty_poll_delay,
+				   DMA_RecFifoGroup_t *fg_ptr,
+				   void 		(*empty_callback)(void)
+				 )
+{
+  int num_packets_in_fifo;                 /* Count of packets processed in a */
+					   /* fifo.                           */
+  unsigned int status;                     /* Snapshot of the not empty status*/
+                                           /* for this group.                 */
+  int rc = 0;                              /* Return code from recv_func.     */
+  int num_processed = 0;                   /* Number of packets processed     */
+  DMA_PacketIovec_t io_vec;                /* Payload I/O vector              */
+  DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function     */
+  void                     *recv_func_parm;/* Receive function parameter      */
+  int                       recv_func_id;  /* Function ID from the packet     */
+                                           /* header.                         */
+  void                  *recv_func_payload;/* Pointer to recv func payload    */
+  void                  *recv_func_packet; /* Pointer to recv func packet     */
+  DMA_RecFifo_t *fifo_ptr;                 /* Pointer to fifo being processed */
+  char temp_packet[256] ALIGN_QUADWORD;    /* Temporary packet copy.          */
+                                           /* Align for efficient copying.    */
+  char *load_ptr, *store_ptr;              /* Used for copying bytes          */
+  int num_quads;                           /* Number of quads to copy         */
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+  int passes;                              /* Counter of not-empty passes     */
+
+  SPI_assert( num_packets       > 0 );
+  SPI_assert( num_empty_passes  >= 0 );
+  SPI_assert( fg_ptr           != NULL );
+  SPI_assert( ( fifo_id >= 0 ) &&
+	   ( fifo_id <  DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) );
+
+  fifo_ptr = &(fg_ptr->fifos[fifo_id]);
+
+  /*
+   * Loop until the specified fifo is declared empty, or
+   * until one of the termination conditions documented in the prolog occurs.
+   *
+   */
+  for (;;)
+    {
+      /*
+       * If the DMA SRAM not-empty status for this fifo is zero (the fifo is
+       * empty), the status is checked num_empty_passes times with a slight
+       * delay in between to give the DMA time to make progress before declaring
+       * that the fifo is truely empty.
+       */
+      passes = num_empty_passes;
+      status = DMA_RecFifoGetNotEmptyById( fg_ptr,
+					   fifo_id ); /* Get Normal fifo   */
+                                                      /* not-empty status. */
+      while ( ( status == 0 ) &&
+	      ( num_empty_passes-- > 0 ) )
+	{
+	  /* Delay, allowing the DMA to update its status */
+	  unsigned int pclks = not_empty_poll_delay;
+	  (*empty_callback)() ;
+	  while( pclks-- )
+	    {
+	      asm volatile("nop;");
+	    }
+
+	  /* Re-fetch the not-empty status */
+	  status = DMA_RecFifoGetNotEmptyById(
+					    fg_ptr,
+					    fifo_id ); /* Get Normal fifo  */
+	                                               /* not-empty status.*/
+	}
+
+      if ( status == 0 ) {       /* Fifo is empty?                             */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+	if (num_processed > 0) { /* Did we process at least 1 packet?          */
+	  NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                                 /* likely not encountered a fifo full */
+ 	                                 /* condition and stopped.  Reset the  */
+    	                                 /* fifo counter so we will start      */
+                                         /* tracking empty calls to poll.      */
+	  /* Clear the threshold crossed condition, in case we have gone below
+	   * the threshold.
+	   */
+	  DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+					       _BN(fifo_ptr->global_fifo_id),
+					       0 );
+	}
+	else {
+	  if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+	       (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+	     /*  printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+	    rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+                                             /* reception fifos filled and the */
+                                             /* DMA has stopped.               */
+	     /*  printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+	    NumEmptyPollFunctionCalls = -1; /* The DMA is active.  Reset the    */
+                                            /* fill-fifo counter.               */
+	  }
+	}
+#endif
+
+	return (num_processed);
+      }
+
+      /* The fifo has something in it.
+       * Update its shadow va_tail pointer to reflect the amount of packet
+       * data in the fifo.
+       */
+      DMA_RecFifoGetTailById( fg_ptr,
+			      fifo_id );
+
+      num_packets_in_fifo = 0;
+
+      /*
+       * MSYNC before we look at the data in the fifo to ensure that snoops
+       * issued by the DMA have completed.  This ensures the L1 cache
+       * invalidations have completed so we don't look at stale data.
+       */
+      _bgp_msync();
+
+      /*
+       * Within a fifo: The area between the va_head and va_tail shadow pointers
+       * contains packets to be processed.  Loop, processing those packets until
+       * we have processed packets_per_fifo of them, or all of them, or other
+       * issues come up.
+       *
+       */
+#if defined(CONFIG_BGP_STATISTICS)
+      {
+	      unsigned int tail = (unsigned int) fifo_ptr->dma_fifo.va_tail ;
+	      unsigned int head = (unsigned int) fifo_ptr->dma_fifo.va_head ;
+	      unsigned int end  = (unsigned int) fifo_ptr->dma_fifo.va_end ;
+	      unsigned int start = (unsigned int) fifo_ptr->dma_fifo.va_start ;
+	      unsigned int used_space = ( tail >= head ) ? (tail-head) : ((tail-start)+(end-head)) ;
+	      reception_fifo_histogram[fls(used_space >> 4)] += 1 ;
+	      if( used_space > reception_hi_watermark ) reception_hi_watermark = used_space ;
+
+/*       unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head) */
+/*                  ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) */
+/*                  : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) ) */
+/*                  ; */
+/*                  reception_fifo_histogram[fls(used_space)] += 1 ; */
+      }
+#endif
+      while ( ( num_packets_in_fifo < num_packets ) &&
+	      ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+	{
+	  DMA_RecFifoGetAddresses( fifo_ptr,
+				   &io_vec ); /* Get the payload pointer(s)   */
+      	                                      /* for the packet at the head   */
+                                              /* of the fifo.                 */
+
+	  packet_ptr = (DMA_PacketHeader_t*)
+	                 fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+	  /*
+	   * Determine the receive function to call.  Index into
+	   * recvFunctions array is in the packet header.
+	   */
+	  recv_func_id  = packet_ptr->Func_Id;
+	  recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+	  if ( recv_func_ptr != NULL )
+	    {
+	      recv_func_parm =
+		DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+	    }
+	  else
+	    {
+	      recv_func_ptr  = DMA_RecFifoInfo.errorRecvFunction;
+	      recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+	    }
+	  /*
+	   * Use a temporary copy of the packet, when the payload
+	   * wraps.
+	   */
+	  if ( io_vec.num_segments > 1 )
+	    {
+	      /* Copy packet header and first payload segment */
+	      load_ptr  = (char*)packet_ptr;
+	      store_ptr = temp_packet;
+	      num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+	      while ( num_quads > 0 )
+		{
+			 /*  Don't bother doing this via doublehummer; it only happens 'occasionally' and means the caller has to enable for floating-point */
+			quadcpy(store_ptr,load_ptr) ;
+/* 		  _bgp_QuadLoad ( load_ptr,     0 ); */
+/* 		  _bgp_QuadStore( store_ptr,    0 ); */
+		  load_ptr  += 16;
+		  store_ptr += 16;
+		  num_quads--;
+		}
+	      /* Copy second payload segment */
+	      load_ptr  = (char*)io_vec.payload_ptr[1];
+	      num_quads = io_vec.num_bytes[1] >> 4;
+	      while ( num_quads > 0 )
+		{
+			quadcpy(store_ptr,load_ptr) ;
+/* 		  _bgp_QuadLoad ( load_ptr,     0 ); */
+/* 		  _bgp_QuadStore( store_ptr,    0 ); */
+		  load_ptr  += 16;
+		  store_ptr += 16;
+		  num_quads --;
+		}
+	      recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+	      recv_func_packet  = temp_packet;
+
+	    } /* End: Set up temporary copy of split packet */
+
+	  else /* Set up for contiguous packet */
+	    {
+	      recv_func_payload = (char*)packet_ptr +
+		sizeof(DMA_PacketHeader_t);
+	      recv_func_packet  = packet_ptr;
+	    }
+
+	  /* Call the receive function */
+          if( recv_func_ptr )
+            {
+/*               dumpmem(recv_func_packet-32, 128, "Software FIFO around call") ; */
+              rc = (*recv_func_ptr)(fifo_ptr,
+                                    recv_func_packet,
+                                    recv_func_parm,
+                                    recv_func_payload,
+                                    io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+            }
+          else
+            {
+              printk(KERN_ERR "DMA_RecFifoPollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+                  recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+              if( dumpmem_count < 10 )
+                {
+                  dumpmem(recv_func_packet-256, 512, "Software FIFO around misread") ;
+                  dumpmem_count += 1 ;
+                }
+/*               show_tlbs((unsigned int) recv_func_packet) ; */
+/*               (void)dma_map_single(NULL,recv_func_packet-32, 128,DMA_FROM_DEVICE) ; */
+/*               dumpmem(recv_func_packet-32, 128, "Software FIFO around misread after cache discard") ; */
+            }
+
+	  /* Increment the head by the size of the packet */
+	  DMA_RecFifoIncrementHead(fifo_ptr,
+				   (io_vec.num_bytes[0]+
+				    io_vec.num_bytes[1] +
+				    sizeof(DMA_PacketHeader_t))>> 4);
+
+	  num_processed++;
+
+	  if ( rc != 0 ) /* Did receive function fail? */
+	    {
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   _BN(fifo_ptr->global_fifo_id),
+						   0 );
+	      return (rc); /* Yes...return that return code */
+	    }
+
+	  if ( num_processed >= num_packets ) /* Got what they wanted? */
+	    {
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   _BN(fifo_ptr->global_fifo_id),
+						   0 );
+	      return (num_processed); /* Yes...all done */
+	    }
+
+	  num_packets_in_fifo++;
+
+	} /* End: Process up to packets_per_fifo packets in this fifo */
+
+    } /* End: Keep looping through the fifo. */
+
+} /* End: DMA_RecFifoPollNormalFifoById() */
+
+
+
+
+/*!
+ *
+ * \brief Prime Receive Function Cache for Polling Function
+ *
+ * The reception fifo receive function maintains a simple cache of information
+ * about the last receive function called.  This function is called to return
+ * that information for a given function ID.
+ *
+ * \param [in]   recv_func_id    The function ID whose receive function info
+ *                               is to be returned.
+ * \param [out]  recv_func_ptr   Pointer to the receive function's address,
+ *                               returned by this function.
+ * \param [out]  recv_func_parm  Pointer to the receive function's parameter.
+ *
+ * \return The information (function pointer and function parameter) for the
+ *         specified receive function is returned as described.
+ */
+inline
+void DMA_RecFifoPollPrimeRecvFuncCache( int                         recv_func_id,
+					DMA_RecFifoRecvFunction_t  *recv_func_ptr,
+					void                      **recv_func_parm )
+{
+  DMA_RecFifoRecvFunction_t  local_recv_func_ptr;
+  void                      *local_recv_func_parm;
+
+  local_recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+  if ( local_recv_func_ptr != NULL ) {
+    local_recv_func_parm =
+      DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+  }
+  else {
+    local_recv_func_ptr  = DMA_RecFifoInfo.errorRecvFunction;
+    local_recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+  }
+  *recv_func_ptr = local_recv_func_ptr;
+  *recv_func_parm= local_recv_func_parm;
+
+} /* End: DMA_RecFifoPrimeRecvFuncCache() */
+
+
+
+
+/*!
+ *
+ * \brief Process a Wrap of a Reception Fifo While Polling
+ *
+ * This function is meant to be called by a polling function that has processed
+ * packets in a reception fifo such that there are just a few left to be
+ * processed before it hits the end of the fifo and wraps.  This function
+ * processes those packets at the end of the fifo until the wrap occurs,
+ * and then returns, leaving the rest of the packets in the fifo to be
+ * processed by the calling function.
+ *
+ * \param[in]      rec_fifo_ptr             Pointer to reception fifo
+ * \param[in,out]  va_head                  Pointer to the fifo's virtual address
+ *                                          head.  Updated by this function.
+ * \param[in,out]  va_tail                  Pointer to the fifo's virtual address
+ *                                          tail.  Updated by this function.
+ * \param[in,out]  num_processed            Pointer to the number of packets
+ *                                          processed by the calling poll
+ *                                          function.  Updated by this function.
+ * \param[in,out]  num_processed_in_fifo    Pointer to the number of packets
+ *                                          in this particular fifo processed
+ *                                          by the calling poll function.
+ *                                          Updated by this function.
+ * \param[in]      max_num_packets          The max number of packets that can be
+ *                                          processed before the poll function
+ *                                          must return.
+ * \param[in]      max_num_packets_in_fifo  The max number of packets that can be
+ *                                          processed in this fifo.
+ *
+ * \retval  0                     Processing complete successfully.  Output
+ *                                parameters have been updated as described.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ */
+
+int DMA_RecFifoPollProcessWrap ( DMA_RecFifo_t  *rec_fifo_ptr,
+				 void          **va_head,
+				 void           *va_tail,
+				 int            *num_processed,
+				 int            *num_processed_in_fifo,
+				 int             max_num_packets,
+				 int             max_num_packets_in_fifo) {
+  int                 rc = 0;
+  DMA_PacketIovec_t   io_vec;              /* Payload I/O vector              */
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+  DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function     */
+  void                     *recv_func_parm;/* Receive function parameter      */
+  int                       recv_func_id;  /* Function ID from the packet     */
+                                           /* header.                         */
+  void                  *recv_func_payload;/* Pointer to recv func payload    */
+  void                  *recv_func_packet; /* Pointer to recv func packet     */
+  char temp_packet[256] ALIGN_QUADWORD;    /* Temporary packet copy.          */
+                                           /* Align for efficient copying.    */
+  char *load_ptr, *store_ptr;              /* Used for copying bytes          */
+  int num_quads;                           /* Number of quads to copy         */
+
+  while ( rc == 0 ) { /* Loop while things are good until we exit after       */
+                      /* processing the wrap.                                 */
+
+    DMA_RecFifoGetAddresses( rec_fifo_ptr,
+			     &io_vec ); /* Get the payload pointer(s)         */
+    	                                /* for the packet at the head         */
+                                        /* of the fifo.                       */
+
+    packet_ptr = (DMA_PacketHeader_t*)
+                   rec_fifo_ptr->dma_fifo.va_head; /* Point to packet header  */
+
+    /*
+     * Determine the receive function to call.  Index into
+     * recvFunctions array is in the packet header.
+     */
+    recv_func_id  = packet_ptr->Func_Id;
+    recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+    if ( recv_func_ptr != NULL )
+      {
+	recv_func_parm =
+	  DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+      }
+    else
+      {
+	recv_func_ptr  = DMA_RecFifoInfo.errorRecvFunction;
+	recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+      }
+    /*
+     * Use a temporary copy of the packet, when the payload
+     * wraps.
+     */
+    if ( io_vec.num_segments > 1 )
+      {
+	/* Copy packet header and first payload segment */
+	load_ptr  = (char*)packet_ptr;
+	store_ptr = temp_packet;
+	num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+	while ( num_quads > 0 )
+	  {
+	    _bgp_QuadLoad ( load_ptr,     0 );
+	    _bgp_QuadStore( store_ptr,    0 );
+	    load_ptr  += 16;
+	    store_ptr += 16;
+	    num_quads --;
+	  }
+	/* Copy second payload segment */
+	load_ptr  = (char*)io_vec.payload_ptr[1];
+	num_quads = io_vec.num_bytes[1] >> 4;
+	while ( num_quads > 0 )
+	  {
+	    _bgp_QuadLoad ( load_ptr,     0 );
+	    _bgp_QuadStore( store_ptr,    0 );
+	    load_ptr  += 16;
+	    store_ptr += 16;
+	    num_quads --;
+	  }
+	recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+	recv_func_packet  = temp_packet;
+
+      } /* End: Set up temporary copy of split packet */
+
+    else /* Set up for contiguous packet */
+      {
+	recv_func_payload = (char*)packet_ptr +
+	                       sizeof(DMA_PacketHeader_t);
+	recv_func_packet  = packet_ptr;
+      }
+
+    /* Call the receive function */
+    if( recv_func_ptr)
+      {
+    rc = (*recv_func_ptr)(rec_fifo_ptr,
+			  recv_func_packet,
+			  recv_func_parm,
+			  recv_func_payload,
+			  io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+      }
+    else
+      {
+        printk(KERN_ERR "DMA_RecFifoPollProcessWrap recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+            recv_func_id,rec_fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+
+      }
+
+    /* Increment the head by the size of the packet */
+    DMA_RecFifoIncrementHead(rec_fifo_ptr,
+			     (io_vec.num_bytes[0]+
+			      io_vec.num_bytes[1] +
+			      sizeof(DMA_PacketHeader_t))>> 4);
+    *va_head = rec_fifo_ptr->dma_fifo.va_head; /* Refresh caller's head */
+
+    (*num_processed)++;
+    (*num_processed_in_fifo)++;
+
+#ifdef DEBUG_PRINT
+    printf("PollWrap: num_processed=%d, va_head=0x%08x, Part1Len=%d, Part2Len=%d, Part1Ptr=0x%08x, Part2Ptr=0x%08x\n",*num_processed,(unsigned)*va_head,io_vec.num_bytes[0],io_vec.num_bytes[1],(unsigned)io_vec.payload_ptr[0],(unsigned)io_vec.payload_ptr[1]);
+#endif
+
+    if ( ( (unsigned)*va_head < (unsigned)packet_ptr ) || /* Did we wrap? */
+	 ( *num_processed >= max_num_packets ) || /* Got enough packets? */
+	 ( *num_processed_in_fifo > max_num_packets_in_fifo ) ) /* Got enough */
+                                                  /* packets for this fifo?   */
+      {
+	break;
+      }
+
+  } /* End: Keep looping through the fifo. */
+
+  return(rc);
+
+} /* End: DMA_RecFifoPollProcessWrap() */
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1.  All packets in all of the fifos have been received.
+ * 2.  A receive function returns a non-zero value.
+ * 3.  The last packet removed from a fifo has an invalid registration id.  The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ * 4.  There have been fruitfulPollLimit polls attempted (summed across all
+ *     fifos).
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group.  If a fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed.  When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur until all
+ * fifos are empty or fruitfulPollLimit polls have been completed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed.  This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  fg_ptr             Pointer to the fifo group.
+ * \param[in]  fruitfulPollLimit  The limit on the number of fruitful polls that
+ *                                will be attempted (summed across all fifos).
+ *                                If the limit is reached, this function
+ *                                returns.  A value of zero means there is no
+ *                                limit imposed.  A fruitful poll is one where
+ *                                at least one packet has arrived in the fifo
+ *                                since the last poll.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr,
+				      int                 fruitfulPollLimit)
+{
+  int rc = 0;                              /* Return code from recv_func.     */
+  int num_processed = 0;                   /* Number of packets processed     */
+  int num_processed_in_fifo = 0;           /* Not used, but needed for calling*/
+                                           /* wrap function.                  */
+  int fruitfulPollCount;                   /* Number of fruitful polls.       */
+
+  /*
+   *The following is actually a cache of the last receive function called.
+   * We cache it so we don't need to keep looking up the receive function
+   * info on each packet.
+   */
+  DMA_RecFifoRecvFunction_t recv_func_ptr=NULL;  /* Pointer to receive function*/
+  void                     *recv_func_parm=NULL;;/* Receive function parameter */
+  int                       recv_func_id=-1;  /* Function ID from the packet  */
+                                              /* header. Init to -1 means     */
+                                              /* recv_func_ptr and            */
+                                              /* recv_func_parm do not cache  */
+                                              /* the previous packet values.  */
+
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+  unsigned int        packet_bytes;        /* Number of bytes in the packet.  */
+  unsigned int        wrap;                /* 1: A wrap of the fifo is going  */
+                                           /*    to occur.                    */
+                                           /* 0: No wrap is going to occur.   */
+
+  /*
+   * Processing of packets occurs in the fifo in three phases:
+   * Normal Phase 1   :  Packets before the wrap.
+   * Handle Wrap Phase:  Packets during the wrap.
+   * Normal Phase 2   :  Packets after the wrap.
+   */
+  void *va_logical_tail;                   /* The point beyond which normal   */
+                                           /* processing of packets ends.     */
+  void *va_starting_head;                  /* Pointer to the first packet in  */
+                                           /* a contiguous group extracted    */
+                                           /* from the fifo.                  */
+  void *va_nextHead;                       /* Pointer to the next packet to   */
+                                           /* be processed.                   */
+  void *va_tail;                           /* Snapshot of the fifo's tail.    */
+  unsigned int num_packets_processed_since_moving_fifo_head; /*
+				      	      Tells us when we should move the
+                                              hardware head.                  */
+
+  /*
+   * Control variables for looping through the fifos
+   */
+  int fifo_index=0;                        /* Index of fifo being processed.  */
+                                           /* Start with first fifo.          */
+  unsigned int fifo_bit_number;            /* The bit number of the fifo      */
+                                           /* being processed.  Group0: 0-7,  */
+                                           /* Group1: 8-15, Group2: 16-23,    */
+                                           /* Group3: 24-31.  Corresponds to  */
+                                           /* the DMA not-empty status bits.  */
+  int num_fifos_in_group;                  /* Number of fifos in this group.  */
+  int num_packets_in_fifo;                 /* Count of packets processed in a */
+					   /* fifo.                           */
+  unsigned int not_empty_status=0;         /* Snapshot of the not empty status*/
+                                           /* for this group.  0 indicates    */
+                                           /* that no snapshot has occurred   */
+                                           /* yet.                            */
+  DMA_RecFifo_t *rec_fifo_ptr;             /* Pointer to reception fifo being */
+                                           /* processed.                      */
+
+
+  SPI_assert( fg_ptr != NULL );
+
+  num_fifos_in_group = fg_ptr->num_normal_fifos;
+
+  /*
+   * Start the fruitful poll count at the max.
+   * For unlimited, set to a very high value.
+   */
+  fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit;
+
+  /*
+   * Circularly loop through the not-empty fifos in the fifo group.
+   * Keep going until one of the termination conditions documented in the
+   * prolog occurs.
+   *
+   */
+  for (;;) {
+    /*
+     * Find the next fifo to process.
+     */
+    rec_fifo_ptr    = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/
+    fifo_bit_number = _BN(rec_fifo_ptr->global_fifo_id);/* fifo's status bit*/
+
+    fifo_index = DMA_RecFifoGetNextFifo(fg_ptr,
+					fifo_index,
+					&fifo_bit_number,
+					0, /*  num_empty_passes */
+					0, /*  not_empty_poll_delay */
+					&not_empty_status);
+    if (fifo_index < 0) { /* No more packets to process? */
+#if defined(BGP_DD1_WORKAROUNDS)
+      /*
+       *
+       * If there are no more non-empty fifos, count the number of consecutive
+       * times the poll function came up dry (num_processed == 0), and if it
+       * exceeds a threshold, issue a system call to clear the rDMA's "full
+       * reception fifo" condition so it begins to receive packets again.
+       *
+       * When a non-empty fifo is returned, its shadow va_tail pointer has been
+       * updated to reflect the amount of packet data in the fifo.
+       */
+      if (num_processed > 0) { /* Did we process at least 1 packet? */
+	NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                               /* likely not encountered a fifo full */
+ 	                               /* condition and stopped.  Reset the  */
+    	                               /* fifo counter so we will start      */
+                                       /* tracking empty calls to poll.      */
+      }
+      else {
+	if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+	     (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+	   /*  printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+	  rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+                                                /* reception fifos filled and the */
+                                                /* DMA has stopped.               */
+	   /*  printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+	  NumEmptyPollFunctionCalls = -1; /* The DMA is active.  Reset the    */
+                                          /* fill-fifo counter.               */
+	}
+      }
+#endif
+       /* 	printf("Poll: returned %d processed\n",num_processed); */
+      return (num_processed);
+    }
+
+    num_packets_in_fifo = 0;
+
+    /*
+     * Establish pointers to the reception fifo and the DMA fifo.
+     * Snapshot the hardware head and tail pointers...they may change while we
+     * are running.  We will snapshot the tail again after processing everything
+     * up to this snapshot, until the fifo is empty (head == tail).
+     */
+    rec_fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+    DMA_Fifo_t    *fifo_ptr     = &(rec_fifo_ptr->dma_fifo);
+    void          *va_head      = fifo_ptr->va_head;
+    va_tail      = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+                                                                 /* tail.       */
+    num_packets_processed_since_moving_fifo_head =
+      rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch      */
+                                                              /* for later use. */
+
+#if defined(CONFIG_BGP_STATISTICS)
+      {
+      unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head)
+                 ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 )
+                 : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) )
+                 ;
+                 reception_fifo_histogram[fls(used_space)] += 1 ;
+      }
+#endif
+    /*
+     * Loop processing packets until the fifo is empty or until the fruitful poll
+     * limit is reached.
+     * At the top of the loop, we have a new snapshot of the tail, so something
+     * may have appeared in the fifo.
+     */
+    while ( ( rc == 0 ) &&
+	    ( va_tail != va_head ) &&
+	    ( fruitfulPollCount > 0) ) { /* Is there something in this fifo?  */
+                                         /* Yes...                            */
+      fruitfulPollCount--; /* Count the polls */
+
+      /*
+       * MSYNC before we look at the data in the fifo to ensure that snoops
+       * issued by the DMA have completed.  This ensures the L1 cache
+       * invalidations have completed so we don't look at stale data.
+       */
+      _bgp_msync();
+
+      /*
+       * Touch the first packet right away so it is is loaded into the memory
+       * cache before we try to use it.
+       */
+      _bgp_dcache_touch_line( va_head );
+
+      /*
+       * Prepare to split up the processing between "normal" and "handleWrap".
+       * Establish a "logicalTail" which is the point beyond which "normal"
+       * processing changes to "handleWrap" processing.
+       */
+      if ( va_head < va_tail ) { /* No wrap will occur? */
+	wrap            = 0;
+	va_logical_tail = va_tail; /* Logical tail is the physical tail */
+      }
+      else { /* Wrap will occur.  Logical tail is 256 bytes before the end
+	      * of the fifo.  We need to stop normal phase 1 there because
+	      * that is the first point at which the next packet could wrap.
+	      */
+	wrap             = 1;
+	va_logical_tail  = (void*)( ((unsigned)fifo_ptr->va_end) - 256 );
+      }
+
+      /* Loop processing packets until we hit our tail snapshot */
+      while ( ( rc == 0 ) &&
+	      ( va_head != va_tail ) ) {
+	/*
+	 * Process packets that do not wrap.  This is everything up to the
+	 * logical tail.  This gets executed both before and after wrapping.
+	 * This is normal phase 1 and normal phase 2.
+	 */
+	va_starting_head = va_head;
+
+	while ( ( rc == 0 ) &&
+		( va_head  < va_logical_tail ) ) {
+
+	  packet_ptr    = (DMA_PacketHeader_t*)va_head;
+	  packet_bytes  = (packet_ptr->Chunks + 1) << 5;
+
+	  /*
+	   * Touch the NEXT packet to ensure it will be in L1 cache when we
+	   * are ready for it on the next iteration.  Even though the packet will
+	   * likely be touched in its entirety by the receive function, and that
+	   * will likely cause the processor to perform prefetching of the next
+	   * packet, bringing in the next packet now has been shown to improve
+	   * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put
+	   * this dcbt here.
+	   */
+	  va_nextHead = (void*) ( (unsigned)va_head + packet_bytes );
+
+	  if ( va_nextHead < va_logical_tail )
+	    _bgp_dcache_touch_line( va_nextHead );
+
+	  /*
+	   * Determine the receive function to call.
+	   * The packet header Func_Id contains the ID of the function to call.
+	   * We cache the previous packet's values because it is likely this
+	   * packet will be the same.  If not, call out of line function to
+	   * re-prime the cache.
+	   */
+	  if ( packet_ptr->Func_Id != recv_func_id ) {
+	    recv_func_id = packet_ptr->Func_Id;
+	    DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id,
+					       &recv_func_ptr,
+					       &recv_func_parm );
+	  }
+
+	  /* Call the receive function, and no matter what happens, increment
+	   * the number of packets processed and move our head snapshot to the
+	   * next packet.
+	   */
+	    if( recv_func_ptr)
+	      {
+	          rc = (*recv_func_ptr)( rec_fifo_ptr,
+	                                 packet_ptr,
+	                                 recv_func_parm,
+	                                 (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),
+	                                 packet_bytes - sizeof(DMA_PacketHeader_t) );
+	      }
+	    else
+	      {
+	        printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifos recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+	            recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ;
+
+	      }
+	  num_packets_processed_since_moving_fifo_head++;
+	  num_packets_in_fifo++;
+
+#ifdef DEBUG_PRINT
+	printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap);
+#endif
+
+  	  va_head = va_nextHead;
+
+	} /* End: Process packets that do not wrap */
+
+	/*
+	 * We are done processing all packets prior to the wrap.
+	 * If the shadow va_head is not in sync with the hardware head, or if
+	 * we are going to wrap, sync up the hardware head and recalculate the
+	 * free space.  The movement of the head causes the fifo's free space
+	 * to be recalculated.
+	 *
+	 * The wrap function requires that the shadow and hardware heads be in
+	 * sync.  If we are not wrapping, we condition the syncing of the heads
+	 * on whether we have exceeded our limit on the number of packets we
+	 * processed in a fifo since the last time we moved the
+	 * hardware head.  If we have only processed a few packets, we just
+	 * leave the hardware head where it is and don't incur the expense of
+	 * moving the hardware head.  If we have processed at least our limit
+	 * of packets, then it is good to move the hardware head.
+	 */
+	if ( ( num_packets_processed_since_moving_fifo_head >
+	       DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) ||
+	     ( wrap ) ) {
+
+	  DMA_FifoSetHead( fifo_ptr, va_head );
+
+	  num_packets_processed_since_moving_fifo_head = 0;
+	}
+
+	/*
+	 * If we are anticipating a wrap, go handle the wrap.
+	 */
+	if ( ( rc == 0 ) && wrap ) {
+	  /*
+	   * Handle the wrapping of the fifo.  This requires extra checking
+	   * and moving of the head, and thus is in its own function.
+	   * It is a generic function, used by other poll functions.  Some of
+	   * these other poll functions have the ability to quit processing
+	   * packets when a specified limit is reached overall, or per fifo.
+	   * That is what the last two parameters specify.  For this poll
+	   * function, we don't have any limit...we process packets until the
+	   * fifo is empty, so we pass in large unreachable limits.
+	   */
+	  rc = DMA_RecFifoPollProcessWrap (
+			   rec_fifo_ptr,
+			   &va_head,
+			   va_tail,
+			   &num_processed,
+			   &num_processed_in_fifo,
+			   0x7FFFFFFF, /* Infinite packet limit, overall */
+			   0x7FFFFFFF);/* Infinite packet limit per fifo */
+
+	  va_logical_tail = va_tail;     /* Set to actual tail now.        */
+	  wrap = 0;     /* Next time around, don't do wrap processing.     */
+	}
+
+      } /* End: Process packets until we hit our snapshotted tail */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+      NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+                                     /* likely not encountered a fifo full */
+	                             /* condition and stopped.  Reset the  */
+	                             /* fifo counter so we will start      */
+	                             /* tracking empty calls to poll.      */
+#endif
+
+      va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+                                                              /* tail again. */
+
+    } /* End: Loop while there is something in the fifo */
+
+    /*
+     * The fifo is now empty.  If we have processed at least one packet,
+     * return the number, or if the receive function returned an error,
+     * return that return code.
+     */
+    if ( num_packets_in_fifo > 0 ) {
+      /* Store in the fifo structure the number of packets processed since
+       * last moving the hardware head, and the current head */
+      rec_fifo_ptr->num_packets_processed_since_moving_fifo_head =
+	num_packets_processed_since_moving_fifo_head;
+      fifo_ptr->va_head = va_head;
+      num_processed += num_packets_in_fifo;
+      /* Clear the threshold crossed condition, in case we have gone below
+       * the threshold.
+       */
+      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+					   _BN(rec_fifo_ptr->global_fifo_id),
+					   0 );
+
+      /* If the receive function returned an error, exit with that error now */
+      if ( rc )	return (rc);
+    }
+    /*
+     * We exited the loop processing the fifo_index fifo.
+     * - If we exited because the fifo was empty according to our snapshot
+     *   of the fifo's tail (head == tail snapshot), we want to turn off this
+     *   fifo's not-empty status in our shadow copy of the status so we
+     *   process all of the other fifos before re-fetching the true status and
+     *   tail for this fifo, giving this fifo another chance.
+     */
+    not_empty_status &= ~(fifo_bit_number);
+
+#ifdef DEBUG_PRINT
+    printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status);
+#endif
+
+    /* Bump to next fifo */
+    fifo_index = (fifo_index+1) % num_fifos_in_group;
+
+  } /* End: for loop processing reception fifos */
+
+} /* End: DMA_RecFifoSimplePollNormalFifos() */
+
+
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  All packets in the fifo have been received.
+ * 2.  The specified fifo is empty.
+ * 3.  A receive function returns a non-zero value.
+ * 4.  The last packet removed from the fifo has an invalid registration id. The
+ *     error receive function will have been called, but polling ends.
+ *     The invalid packet is counted as a processed packet, and the return
+ *     code from the error receive function is returned.
+ * 5.  There have been fruitfulPollLimit polls attempted.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in the fifo (emptying that fifo),
+ * or the fruitfulPollLimit has been reached, the function returns the number
+ * of packets processed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed.  This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload.  The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo.  Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently.  To make it easier for
+ * user/messaging code, the poll function will always pass a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory.  If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer.  In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function has
+ * to copy it to some other location.  The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in]  fifo_id           The ID of the fifo to be polled.
+ *                               (0 through
+ *                               DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ * \param[in]  fruitfulPollLimit  The limit on the number of fruitful polls that
+ *                                will be attempted.
+ *                                If the limit is reached, this function
+ *                                returns.  A value of zero means there is no
+ *                                limit imposed.  A fruitful poll is one where
+ *                                at least one packet has arrived in the fifo
+ *                                since the last poll.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifoById( int                 fifo_id,
+					 DMA_RecFifoGroup_t *fg_ptr,
+					 int                 fruitfulPollLimit
+				       )
+{
+  int rc = 0;                              /* Return code from recv_func.     */
+  int num_processed = 0;                   /* Number of packets processed     */
+  int num_processed_in_fifo = 0;           /* Not used, but needed for calling*/
+                                           /* wrap function.                  */
+  int fruitfulPollCount;                   /* Number of fruitful polls.       */
+
+  /*
+   *The following is actually a cache of the last receive function called.
+   * We cache it so we don't need to keep looking up the receive function
+   * info on each packet.
+   */
+  DMA_RecFifoRecvFunction_t recv_func_ptr=NULL; /* Pointer to receive function*/
+  void                     *recv_func_parm=NULL;/* Receive function parameter */
+  int                       recv_func_id=-1;  /* Function ID from the packet  */
+                                              /* header. Init to -1 means     */
+                                              /* recv_func_ptr and            */
+                                              /* recv_func_parm do not cache  */
+                                              /* the previous packet values.  */
+
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+  unsigned int        packet_bytes;        /* Number of bytes in the packet.  */
+  unsigned int        wrap;                /* 1: A wrap of the fifo is going  */
+                                           /*    to occur.                    */
+                                           /* 0: No wrap is going to occur.   */
+
+  /*
+   * Processing of packets occurs in the fifo in three phases:
+   * Normal Phase 1   :  Packets before the wrap.
+   * Handle Wrap Phase:  Packets during the wrap.
+   * Normal Phase 2   :  Packets after the wrap.
+   */
+  void *va_logical_tail;                   /* The point beyond which normal   */
+                                           /* processing of packets ends.     */
+  void *va_starting_head;                  /* Pointer to the first packet in  */
+                                           /* a contiguous group extracted    */
+                                           /* from the fifo.                  */
+  void *va_nextHead;                       /* Pointer to the next packet to   */
+                                           /* be processed.                   */
+  void *va_tail;                           /* Snapshot of the fifo's tail.    */
+  unsigned int num_packets_processed_since_moving_fifo_head; /*
+				      	      Tells us when we should move the
+                                              hardware head.                  */
+
+  SPI_assert( fg_ptr           != NULL );
+  SPI_assert( ( fifo_id >= 0 ) &&
+	      ( fifo_id <  DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) );
+  /*
+   * Start the fruitful poll count at the max.
+   * For unlimited, set to a very high value.
+   */
+  fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit;
+
+  /*
+   * Establish pointers to the reception fifo and the DMA fifo.
+   * Snapshot the hardware head and tail pointers...they may change while we
+   * are running.  We will snapshot the tail again after processing everything
+   * up to this snapshot, until the fifo is empty (head == tail).
+   */
+  DMA_RecFifo_t *rec_fifo_ptr = &(fg_ptr->fifos[fifo_id]);
+  DMA_Fifo_t    *fifo_ptr     = &(rec_fifo_ptr->dma_fifo);
+  void          *va_head      = fifo_ptr->va_head;
+  va_tail      = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+                                                               /* tail.       */
+  num_packets_processed_since_moving_fifo_head =
+      rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch    */
+                                                            /* for later use. */
+
+#if defined(CONFIG_BGP_STATISTICS)
+      {
+      unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head)
+                 ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 )
+                 : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) )
+                 ;
+                 reception_fifo_histogram[fls(used_space)] += 1 ;
+      }
+#endif
+  /*
+   * Loop processing packets until the fifo is empty or the fruitfulPollLimit
+   * has been reached.
+   * At the top of the loop, we have a new snapshot of the tail, so something
+   * may have appeared in the fifo.
+   */
+  while ( ( rc == 0 ) &&
+	  ( va_tail != va_head ) &&
+	  ( fruitfulPollCount > 0 ) ) { /* Is there something in this fifo?   */
+                                        /* Yes...                             */
+    fruitfulPollCount--; /* Count the polls */
+
+    /*
+     * MSYNC before we look at the data in the fifo to ensure that snoops
+     * issued by the DMA have completed.  This ensures the L1 cache
+     * invalidations have completed so we don't look at stale data.
+     */
+    _bgp_msync();
+
+    /*
+     * Touch the first packet right away so it is is loaded into the memory
+     * cache before we try to use it.
+     */
+    _bgp_dcache_touch_line( va_head );
+
+    /*
+     * Prepare to split up the processing between "normal" and "handleWrap".
+     * Establish a "logicalTail" which is the point beyond which "normal"
+     * processing changes to "handleWrap" processing.
+     */
+    if ( va_head < va_tail ) { /* No wrap will occur? */
+      wrap            = 0;
+      va_logical_tail = va_tail; /* Logical tail is the physical tail */
+    }
+    else { /* Wrap will occur.  Logical tail is 256 bytes before the end
+	    * of the fifo.  We need to stop normal phase 1 there because
+	    * that is the first point at which the next packet could wrap.
+	    */
+      wrap             = 1;
+      va_logical_tail  = (void*)( ((unsigned)fifo_ptr->va_end) - 256 );
+    }
+
+    /* Loop processing packets until we hit our tail snapshot */
+    while ( ( rc == 0 ) &&
+	    ( va_head != va_tail ) ) {
+      /*
+       * Process packets that do not wrap.  This is everything up to the
+       * logical tail.  This gets executed both before and after wrapping.
+       * This is normal phase 1 and normal phase 2.
+       */
+      va_starting_head = va_head;
+
+      while ( ( rc == 0 ) &&
+	      ( va_head  < va_logical_tail ) ) {
+
+	packet_ptr    = (DMA_PacketHeader_t*)va_head;
+	packet_bytes  = (packet_ptr->Chunks + 1) << 5;
+
+	/*
+	 * Touch the NEXT packet to ensure it will be in L1 cache when we
+	 * are ready for it on the next iteration.  Even though the packet will
+	 * likely be touched in its entirety by the receive function, and that
+	 * will likely cause the processor to perform prefetching of the next
+	 * packet, bringing in the next packet now has been shown to improve
+	 * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put
+	 * this dcbt here.
+	 */
+	va_nextHead = (void*) ( (unsigned)va_head + packet_bytes );
+
+	if ( va_nextHead < va_logical_tail )
+	  _bgp_dcache_touch_line( va_nextHead );
+
+	/*
+	 * Determine the receive function to call.
+	 * The packet header Func_Id contains the ID of the function to call.
+	 * We cache the previous packet's values because it is likely this
+	 * packet will be the same.  If not, call out of line function to
+	 * re-prime the cache.
+	 */
+	if ( packet_ptr->Func_Id != recv_func_id ) {
+	  recv_func_id = packet_ptr->Func_Id;
+	  DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id,
+					     &recv_func_ptr,
+					     &recv_func_parm );
+	}
+
+	/* Call the receive function, and no matter what happens, increment
+	 * the number of packets processed and move our head snapshot to the
+	 * next packet.
+	 */
+	SPI_assert ( recv_func_ptr != NULL );
+
+        if( recv_func_ptr)
+          {
+            rc = (*recv_func_ptr)( rec_fifo_ptr,
+                                   packet_ptr,
+                                   recv_func_parm,
+                                   (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),
+                                   packet_bytes - sizeof(DMA_PacketHeader_t) );
+          }
+        else
+          {
+            printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+                recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ;
+
+          }
+	num_processed++;
+	num_packets_processed_since_moving_fifo_head++;
+
+#ifdef DEBUG_PRINT
+	printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap);
+#endif
+
+	va_head = va_nextHead;
+
+      } /* End: Process packets that do not wrap */
+
+      /*
+       * We are done processing all packets prior to the wrap.
+       * If the shadow va_head is not in sync with the hardware head, or if
+       * we are going to wrap, sync up the hardware head and recalculate the
+       * free space.  The movement of the head causes the fifo's free space
+       * to be recalculated.
+       *
+       * The wrap function requires that the shadow and hardware heads be in
+       * sync.  If we are not wrapping, we condition the syncing of the heads
+       * on whether we have exceeded our limit on the number of packets we
+       * processed in a fifo since the last time we moved the
+       * hardware head.  If we have only processed a few packets, we just
+       * leave the hardware head where it is and don't incur the expense of
+       * moving the hardware head.  If we have processed at least our limit
+       * of packets, then it is good to move the hardware head.
+       */
+      if ( ( num_packets_processed_since_moving_fifo_head >
+	     DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) ||
+	   ( wrap ) ) {
+
+	DMA_FifoSetHead( fifo_ptr, va_head );
+
+	num_packets_processed_since_moving_fifo_head = 0;
+      }
+
+      /*
+       * If we are anticipating a wrap, go handle the wrap.
+       */
+      if ( ( rc == 0 ) && wrap ) {
+	/*
+	 * Handle the wrapping of the fifo.  This requires extra checking
+	 * and moving of the head, and thus is in its own function.
+	 * It is a generic function, used by other poll functions.  Some of
+	 * these other poll functions have the ability to quit processing
+	 * packets when a specified limit is reached overall, or per fifo.
+	 * That is what the last two parameters specify.  For this poll
+	 * function, we don't have any limit...we process packets until the
+	 * fifo is empty, so we pass in large unreachable limits.
+	 */
+	rc = DMA_RecFifoPollProcessWrap (
+			   rec_fifo_ptr,
+			   &va_head,
+			   va_tail,
+			   &num_processed,
+			   &num_processed_in_fifo,
+			   0x7FFFFFFF, /* Infinite packet limit, overall */
+			   0x7FFFFFFF);/* Infinite packet limit per fifo */
+
+	va_logical_tail = va_tail;     /* Set to actual tail now.        */
+	wrap = 0;     /* Next time around, don't do wrap processing.     */
+      }
+
+    } /* End: Process packets until we hit our snapshotted tail */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+    NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+ 	                           /* likely not encountered a fifo full */
+	                           /* condition and stopped.  Reset the  */
+	                           /* fifo counter so we will start      */
+	                           /* tracking empty calls to poll.      */
+#endif
+
+    va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+                                                            /* tail again. */
+
+  } /* End: Loop while there is something in the fifo */
+
+  /*
+   * The fifo is now empty.  If we have processed at least one packet,
+   * return the number, or if the receive function returned an error,
+   * return that return code.
+   * Also, clear the reception fifo threshold crossed interrupt condition.
+   */
+  if ( num_processed > 0 ) {
+    /* Store in the fifo structure the number of packets processed since
+     * last moving the hardware head, and the current head */
+    rec_fifo_ptr->num_packets_processed_since_moving_fifo_head =
+      num_packets_processed_since_moving_fifo_head;
+    fifo_ptr->va_head = va_head;
+    DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+					 _BN(rec_fifo_ptr->global_fifo_id),
+					 0 );
+
+    if ( rc == 0 ) return (num_processed);
+    else return (rc);
+  }
+
+  /*
+   * We didn't process any packets.  This could be because the rDMA has
+   * shut-down (a DD1 hardware behavior) because the reception fifo became full.
+   * We count the number of times we consecutively come up empty, and reactivate
+   * the rDMA via a system call.
+   */
+  else {
+
+#if defined(BGP_DD1_WORKAROUNDS)
+    if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+	 (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+       /*  printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+      rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+                                            /* reception fifos filled and the */
+	                                    /* DMA has stopped.               */
+       /*  printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+      NumEmptyPollFunctionCalls = -1; /* The DMA is active.  Reset the    */
+	                              /* fill-fifo counter.               */
+    }
+#endif
+
+    return (0); /* Return no packets processed */
+  }
+
+} /* End: DMA_RecFifoSimplePollNormalFifoById() */
+
+
+/*!
+ * \brief Poll Header Reception Fifo Given a Fifo Group
+ *
+ * Poll the "header" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1.  Total_packets packets are received
+ * 2.  The specified fifo is empty
+ * 3.  A receive function returns a non-zero value
+ *
+ * If the header fifo has a packet, the appropriate receive function is
+ * called.  Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed.
+ *
+ * The receive function must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface.  The receive function is
+ * called with a pointer to the packet header. The packet header is always
+ * 16 bytes of contiguous storage, in the fifo.  When the
+ * receive function returns, user code cannot assume that the buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function.  To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.  The packet header is 16-byte aligned
+ * for optimized copying.
+ *
+ * \param[in]  num_packets       The maximum number of packets that will be
+ *                               processed.
+ * \param[in]  num_empty_passes    When the not-empty status indicates that all
+ *                                 fifos in the group are emtpy, this is the
+ *                                 number of times the not-empty status is
+ *                                 re-fetched and re-checked before officially
+ *                                 declaring that they are indeed empty.
+ *                                 (0 means no extra passes are made).
+ * \param[in]  not_empty_poll_delay  The number of pclks to delay between polls
+ *                                   of the not-empty status when the fifos are
+ *                                   empty.
+ * \param[in]  fg_ptr            Pointer to the fifo group.
+ *
+ * \retval  num_packets_received  The number of packets received and processed.
+ * \retval  negative_value        The return code from the receive function that
+ *                                caused polling to end.
+ *
+ * \pre  The caller is responsible for disabling interrupts before invoking this
+ *       function.
+ *
+ */
+int DMA_RecFifoPollHeaderFifo( int                 num_packets,
+			       int                 num_empty_passes,
+			       int                 not_empty_poll_delay,
+			       DMA_RecFifoGroup_t *fg_ptr
+			     )
+{
+  int fifo_index;                          /* Index of fifo being processed   */
+  int num_packets_in_fifo;                 /* Count of packets processed in a */
+					   /* fifo.                           */
+  unsigned int status;                     /* Snapshot of the not empty status*/
+                                           /* for this group.                 */
+  int rc = 0;                              /* Return code from recv_func.     */
+  int num_processed = 0;                   /* Number of packets processed     */
+  DMA_PacketIovec_t io_vec;                /* Payload I/O vector              */
+  DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function     */
+  void                     *recv_func_parm;/* Receive function parameter      */
+  DMA_RecFifo_t *fifo_ptr;                 /* Pointer to fifo being processed */
+  DMA_PacketHeader_t *packet_ptr;          /* Pointer to packet header        */
+  int passes;                              /* Counter of not-empty passes     */
+
+  SPI_assert( num_packets       > 0 );
+  SPI_assert( num_empty_passes  >= 0 );
+  SPI_assert( fg_ptr           != NULL );
+
+
+  fifo_index = DMA_HEADER_REC_FIFO_ID;     /* We are working with the header  */
+                                           /* fifo.                           */
+  fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+
+  /*
+   * Loop until the header fifo is declared empty, or
+   * until one of the termination conditions documented in the prolog occurs.
+   *
+   */
+  for (;;)
+    {
+      /*
+       * If the DMA SRAM not-empty status for this fifo is zero (the fifo is
+       * empty), the status is checked num_empty_passes times with a slight
+       * delay in between to give the DMA time to make progress before declaring
+       * that the fifo is truely empty.
+       */
+      passes = num_empty_passes;
+      status = DMA_RecFifoGetNotEmptyById( fg_ptr,
+					   fifo_index ); /* Get Header fifo   */
+                                                         /* not-empty status. */
+      while ( ( status == 0 ) &&
+	      ( num_empty_passes-- > 0 ) )
+	{
+	  /* Delay, allowing the DMA to update its status */
+	  unsigned int pclks = not_empty_poll_delay;
+	  while( pclks-- )
+	    {
+	      asm volatile("nop;");
+	    }
+
+	  /* Re-fetch the not-empty status */
+	  status = DMA_RecFifoGetNotEmptyById(
+					    fg_ptr,
+				  	    fifo_index ); /* Get Header fifo  */
+	                                                  /* not-empty status.*/
+	}
+
+      if ( status == 0 ) {       /* Fifo is empty?                             */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+	if (num_processed > 0) { /* Did we process at least 1 packet?          */
+	  NumEmptyPollFunctionCalls = 0; /* The DMA must be active.  It has    */
+	                                 /* likely not encountered a fifo full */
+ 	                                 /* condition and stopped.  Reset the  */
+    	                                 /* fifo counter so we will start      */
+                                         /* tracking empty calls to poll.      */
+	}
+	else {
+	  if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+	       (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+	     /*  printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+	    rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+                                             /* reception fifos filled and the */
+                                             /* DMA has stopped.               */
+	     /*  printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+	    NumEmptyPollFunctionCalls = -1; /* The DMA is active.  Reset the    */
+                                            /* fill-fifo counter.               */
+	  }
+	}
+#endif
+
+	return (num_processed);
+      }
+
+      /* The fifo has something in it.
+       * Update its shadow va_tail pointer to reflect the amount of packet
+       * data in the fifo.
+       */
+      DMA_RecFifoGetTailById( fg_ptr,
+			      fifo_index );
+
+      num_packets_in_fifo = 0;
+
+      /*
+       * MSYNC before we look at the data in the fifo to ensure that snoops
+       * issued by the DMA have completed.  This ensures the L1 cache
+       * invalidations have completed so we don't look at stale data.
+       */
+      _bgp_msync();
+
+      /*
+       * Within a fifo: The area between the va_head and va_tail shadow pointers
+       * contains packets to be processed.  Loop, processing those packets until
+       * we have processed packets_per_fifo of them, or all of them, or other
+       * issues come up.
+       *
+       */
+      while ( ( num_packets_in_fifo < num_packets ) &&
+	      ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+	{
+	  DMA_RecFifoGetAddresses( fifo_ptr,
+				   &io_vec ); /* Get the payload pointer(s)   */
+      	                                      /* for the packet at the head   */
+                                              /* of the fifo.                 */
+
+	  packet_ptr = (DMA_PacketHeader_t*)
+	                 fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+
+	  /* Determine the receive function to call */
+	  recv_func_ptr = DMA_RecFifoInfo.headerRecvFunction;
+	  if ( recv_func_ptr != NULL )
+	    {
+	      recv_func_parm = DMA_RecFifoInfo.headerRecvFunctionParm;
+	    }
+	  else
+	    {
+	      recv_func_ptr  = DMA_RecFifoInfo.errorRecvFunction;
+	      recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+	    }
+
+	  /* Call the receive function */
+	        if( recv_func_ptr)
+	          {
+	            rc = (*recv_func_ptr)(fifo_ptr,
+	                                  packet_ptr,
+	                                  recv_func_parm,
+	                                  NULL, /* No payload */
+	                                  0);   /* No payload bytes */
+	          }
+	        else
+	          {
+	            printk(KERN_ERR "DMA_RecFifoPollHeaderFifo recv_func_ptr was NULL rfifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+	                fifo_ptr,packet_ptr,recv_func_parm,NULL,0) ;
+
+	          }
+
+	  DMA_RecFifoIncrementHead(fifo_ptr,
+				   1);/* Increment head by 16 bytes   */
+
+	  num_processed++;
+
+	  if ( rc != 0 ) /* Did receive function fail? */
+	    {
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   _BN(fifo_ptr->global_fifo_id),
+						   0 );
+	      return (rc); /* Yes...return that return code */
+	    }
+
+	  if ( num_processed >= num_packets ) /* Got what they wanted? */
+	    {
+	      /* Clear the threshold crossed condition, in case we have gone below
+	       * the threshold.
+	       */
+	      DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+						   _BN(fifo_ptr->global_fifo_id),
+						   0 );
+	      return (num_processed); /* Yes...all done */
+	    }
+
+	  num_packets_in_fifo++;
+
+	} /* End: Process up to packets_per_fifo packets in this fifo */
+
+    } /* End: Keep looping through the fifo. */
+
+} /* End: DMA_RecFifoPollHeaderFifo() */
+
+EXPORT_SYMBOL(DMA_RecFifoRegisterRecvFunction) ;
+EXPORT_SYMBOL(DMA_RecFifoGetFifoGroup) ;
+EXPORT_SYMBOL(DMA_RecFifoPollNormalFifoById) ;
+#if defined(CONFIG_BGP_STATISTICS)
+EXPORT_SYMBOL(reception_fifo_histogram) ;
+EXPORT_SYMBOL(reception_hi_watermark) ;
+#endif
+
diff --git a/arch/powerpc/syslib/bgdd/zepto_bigmem_explicit_mmap.c b/arch/powerpc/syslib/bgdd/zepto_bigmem_explicit_mmap.c
new file mode 100644
index 0000000..2ca1679
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/zepto_bigmem_explicit_mmap.c
@@ -0,0 +1,154 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+/* 
+   bigmem explicit mmap is only available for ION node
+*/
+
+#ifndef CONFIG_ZEPTO_COMPUTENODE
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+
+#include <linux/zepto_debug.h>
+
+#define __ZCL_KERNEL__
+
+#include <linux/zepto_task.h>
+
+static int zeptobigmem_open( struct inode* inode, struct file* filp )
+{
+    zepto_debug(2,"zeptobigmem_open()\n");
+
+    if( !enable_bigmem ) {
+	printk(KERN_ERR "bigmem is not enabled\n");
+	return -ENOMEM;
+    }
+    if( get_bigmem_region_start() < 0xffffffff ) { 
+	printk(KERN_WARNING "bigmem is in use\n");
+	return -ENOMEM;
+    }
+    return 0;
+}
+
+static int zeptobigmem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+    unsigned size = vma->vm_end - vma->vm_start;
+
+    zepto_debug(2,"zeptobigmem_mmap [%08lx, %08lx)\n",  vma->vm_start, vma->vm_end );
+
+    if( (vma->vm_start&0x0fffffff) != 0 ) {
+	printk(KERN_ERR "[Z] bigmem start address should be 256MB-aligned.  vma_start=%08lx\n",
+	       vma->vm_start );
+	return -EAGAIN;
+    }
+    if( ((size&0x00ffffff)!=0)  || size > get_bigmem_size() ) {
+	printk(KERN_ERR "[Z] invalid bigmem size.  size=%08x\n", size);
+	return -EAGAIN;
+    }
+
+    /* set bigmem start (virtual) address */
+    if( init_bigmem_tlb(vma->vm_start) == -1 ) {
+	return -EBUSY;
+    }
+
+    /* just set flags ( no PTEs ) */
+    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+    vma->vm_flags |= (MAP_FIXED|MAP_PRIVATE|VM_IO|VM_DONTEXPAND|VM_RESERVED|VM_PFNMAP);
+
+    if( bigmem_mmap_init(get_bigmem_region_start(),
+			 get_bigmem_region_end() )!=BIGMEM_MMAP_SUCCESS ) { 
+	printk(KERN_ERR "[Z] bigmem_mmap_init() failed\n");
+	free_bigmem_tlb();
+	return -EAGAIN;
+    }
+
+    /* don't call  bigmem_process_new for explit mmap usage */
+    SET_ZEPTO_TASK(current, 1);
+		
+    zepto_debug(2,"bigmem VA:[%08x,%08x)  PA:[%08x,%08x)  size=%08x\n",
+		get_bigmem_region_start(),
+		get_bigmem_region_end(),
+		get_bigmem_pa_start(),
+		get_bigmem_pa_end(),
+		get_bigmem_size() );
+
+    return 0;
+}
+
+
+
+
+static const struct file_operations zeptobigmem_fops = {
+    .open = zeptobigmem_open , 
+    .mmap = zeptobigmem_mmap,
+};
+
+static struct cdev  zeptobigmem_cdev;
+
+int __init zeptobigmem_init(void)
+{
+    int rc;
+    static int zeptobigmem_maj = 127;
+    static int zeptobigmem_min = 0;
+    dev_t devnum;
+
+    /*  registering zeptobigmem char device */
+    devnum = MKDEV(zeptobigmem_maj, zeptobigmem_min);
+
+    rc = register_chrdev_region( devnum, 1, "zeptobigmem" );
+    if( rc ) {
+	printk(KERN_WARNING "register_chrdev_region() failed. zeptobigmem(%d:%d) rc=%d\n",
+	       zeptobigmem_maj, zeptobigmem_min, rc );
+	return -1;
+    }
+
+    /* connecting up the device */
+    cdev_init(&zeptobigmem_cdev, &zeptobigmem_fops);
+    kobject_set_name(&zeptobigmem_cdev.kobj, "zeptobigmem%d",devnum);
+
+    rc = cdev_add(&zeptobigmem_cdev, devnum, 1 );
+    if (rc)   {
+	printk(KERN_WARNING "cdev_add() failed. zeptobigmem(%d:%d) rc=%d\n",
+	       zeptobigmem_maj, zeptobigmem_min, rc );
+	return -1;
+    }
+
+    zepto_debug(2,"zeptobigmem mmap driver is registered\n");
+
+    return 0;
+}
+__initcall(zeptobigmem_init);
+
+#endif
diff --git a/arch/powerpc/syslib/bgdd/zepto_bluegene_dma.c b/arch/powerpc/syslib/bgdd/zepto_bluegene_dma.c
new file mode 100644
index 0000000..77bed42
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/zepto_bluegene_dma.c
@@ -0,0 +1,1253 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+/*
+  BGP dma driver for ZCL
+*/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/zepto_debug.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+
+#include <asm/bgcns.h>
+#include <asm/bgp_personality.h>
+
+#define __ZCL_KERNEL__
+
+/* #include <bpcore/ppc450_inlines.h> */
+/* #include <bpcore/bgp_global_ints.h> */
+/* #include <bpcore/collective.h> */
+#define  __INLINE__  extern inline
+/* #include <spi/GlobInt.h> */
+
+#include <spi/DMA_Counter.h>
+#include <spi/DMA_InjFifo.h>
+#include <spi/DMA_RecFifo.h>
+
+
+#include <linux/zepto_task.h>
+
+#include <zepto/zepto_syscall.h>
+
+extern BGCNS_Descriptor bgcnsd;  /* defined in platforms/44x/bgp_cns.c */
+
+BGCNS_ServiceDirectory *bgcns(void) {
+    return bgcnsd.services;
+}
+
+
+#define FUNCT    zepto_debug(3,"func: %s()\n",__func__)
+#define FUNCB    zepto_debug(3," ret=%d\n",rc)
+#define FUNCBP   zepto_debug(3," ret=%p\n",rc)
+#define FUNCBNR  zepto_debug(3," ret=none\n")
+#define Z        zepto_debug(3,"%s() %s:%d\n",__func__,__FILE__,__LINE__)
+#define SCDEBUG  zepto_debug(3,"dmasc %s()\n", __func__)
+
+
+static const char* str_BGCNS_FifoOperation(BGCNS_FifoOperation op)
+{
+    switch( op ) 
+    {
+	case BGCNS_Disable:
+	    return "BGCNS_Disable";
+	case BGCNS_Enable:
+	    return "BGCNS_Enable";
+	case BGCNS_Reenable:
+	    return "BGCNS_Reenable";
+    }
+    return "????";
+}
+
+static const char* str_BGCNS_FifoFacility(BGCNS_FifoFacility facility)	    
+{
+    switch(facility) {
+	case BGCNS_InjectionFifo:
+	    return "BGCNS_InjectionFifo";
+	case BGCNS_ReceptionFifo:
+	    return "BGCNS_ReceptionFifo";
+	case BGCNS_ReceptionHeaderFifo:
+	    return "BGCNS_ReceptionHeaderFifo";
+	case BGCNS_InjectionFifoInterrupt:
+	    return "BGCNS_InjectionFifoInterrupt";
+	case BGCNS_ReceptionFifoInterrupt:
+	    return "BGCNS_ReceptionFifoInterrupt";
+	case BGCNS_ReceptionHeaderFifoInterrupt:
+	    return "BGCNS_ReceptionHeaderFifoInterrupt";
+	case BGCNS_InjectionCounterInterrupt:
+	    return "BGCNS_InjectionCounterInterrupt";
+	case BGCNS_ReceptionCounterInterrupt:
+	    return "BGCNS_ReceptionCounterInterrupt";
+    }
+    return "?????";
+}
+	    
+
+int bgcns_setDmaFifoControls(BGCNS_FifoOperation op, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer)
+{
+    int rc;
+    FUNCT;
+    zepto_debug(3,"  op=%s facility=%s group=%d mask=0x%08x buffer=%p\n",
+	    str_BGCNS_FifoOperation(op), 
+	    str_BGCNS_FifoFacility(facility), group, mask, buffer );
+    local_irq_disable();
+    rc = bgcns()->setDmaFifoControls(op,facility,group, mask,buffer);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+
+
+int bgcns_setDmaLocalCopies(BGCNS_FifoOperation operation, unsigned group, unsigned bits)
+{
+    int rc;
+    FUNCT;
+    zepto_debug(3, "  operation=%d group=%d bits=0x%08x\n",
+	     operation,group,bits);
+
+    local_irq_disable();
+    rc = bgcns()->setDmaLocalCopies(operation, group, bits);
+    local_irq_enable();
+
+    FUNCB;
+    return rc;
+}
+
+
+int bgcns_setDmaPriority(BGCNS_FifoOperation operation, unsigned group, unsigned bits)
+{
+    int rc;
+    FUNCT;
+    zepto_debug(3,"  operation=%d group=%d bits=0x%08x\n", operation, group, bits);
+    local_irq_disable();
+    rc = bgcns()->setDmaPriority(operation, group,  bits);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+int bgcns_setDmaReceptionMap( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[])
+{
+    int rc;
+    int i;
+
+    FUNCT;
+    for(i=0; i<BGCNS_NUM_DMA_RECEPTION_GROUPS; i++ ) {
+	zepto_debug(3,
+	    "  recmap[%2d] %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
+	    i,
+	    torus_reception_map[i][0],
+	    torus_reception_map[i][1],
+	    torus_reception_map[i][2],
+	    torus_reception_map[i][3],
+	    torus_reception_map[i][4],
+	    torus_reception_map[i][5],
+	    torus_reception_map[i][6],
+	    torus_reception_map[i][7]   );
+    }
+    if( fifo_types ) {
+	for(i=0; i<DMA_NUM_NORMAL_REC_FIFOS; i++ ) {
+	    zepto_debug(3,"  fifo_types[%2d] = %d\n", 
+		    i, fifo_types[i]);
+	}
+    }
+    if( header_types ) {
+	for(i=0; i<DMA_NUM_HEADER_REC_FIFOS; i++ ) {
+	    zepto_debug(3,"  header_types[%2d] = %d\n",
+		    i, header_types[i]);
+	}
+    }
+    zepto_debug(3,"  threadhold = %08x:%08x\n",
+	    threshold[0], threshold[1]);
+
+    local_irq_disable();
+    rc = bgcns()->setDmaReceptionMap(torus_reception_map, fifo_types,  
+				   header_types,
+				   threshold);
+    local_irq_enable();
+
+
+    FUNCB;
+    return rc;
+}
+
+
+int  bgcns_getDmaReceptionMap( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], 
+			       unsigned short* store_headers, unsigned header_types[], unsigned threshold[])
+
+{
+    int rc;
+    int i;
+    FUNCT;
+
+    local_irq_disable();
+    rc =  bgcns()->getDmaReceptionMap(
+	torus_reception_map, fifo_types,
+	store_headers,  header_types, threshold);
+    local_irq_enable();
+
+    for(i=0; i<BGCNS_NUM_DMA_RECEPTION_GROUPS; i++ ) {
+	zepto_debug(3,
+	    "  recmap[%2d] %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
+	    i,
+	    torus_reception_map[i][0],
+	    torus_reception_map[i][1],
+	    torus_reception_map[i][2],
+	    torus_reception_map[i][3],
+	    torus_reception_map[i][4],
+	    torus_reception_map[i][5],
+	    torus_reception_map[i][6],
+	    torus_reception_map[i][7]   );
+    }
+    if( fifo_types ) {
+	for(i=0; i<DMA_NUM_NORMAL_REC_FIFOS; i++ ) {
+	    zepto_debug(3,"  fifo_types[%2d] = %d\n", 
+		    i, fifo_types[i]);
+	}
+    }
+    if( header_types ) {
+	for(i=0; i<DMA_NUM_HEADER_REC_FIFOS; i++ ) {
+	    zepto_debug(3,"  header_types[%2d] = %d\n",
+		    i, header_types[i]);
+	}
+    }
+    zepto_debug(3,"  threadhold = %08x:%08x\n",
+	    threshold[0], threshold[1]);
+
+
+    FUNCB;
+    return rc;
+}
+
+int bgcns_setDmaInjectionMap(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos)
+{
+    int rc;
+    int i;
+    FUNCT;
+    zepto_debug(3,"  group=%d numberOfFifos=%d\n", group, numberOfFifos);
+    for(i=0;i<numberOfFifos;i++) 
+	zepto_debug(3,"  fifoIds[%2d] = %d\n", i, fifoIds[i]);
+    for(i=0;i<numberOfFifos;i++) 
+	zepto_debug(3,"  injection_map[%2d] = 0x%08x\n", i, injection_map[i]);
+
+    local_irq_disable();
+    rc = bgcns()->setDmaInjectionMap(group, fifoIds, 
+				     injection_map, numberOfFifos);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+
+int bgcns_disableInterrupt(unsigned group, unsigned irq)
+{
+    int rc;
+    FUNCT;
+    zepto_debug(3,"  group=%d irq=%d\n", group, irq);
+
+    if( group>=10 || irq>=32 ) {
+	return -EINVAL;
+    }
+
+    local_irq_disable();
+    rc = bgcns()->disableInterrupt(group, irq);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+
+
+int bgcns_dmaSetRange(unsigned numreadranges,  unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr, unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr)
+{
+    int rc;
+    FUNCT;
+    local_irq_disable();
+    rc = bgcns()->dmaSetRange(
+	numreadranges,  read_lower_paddr, read_upper_paddr, 
+	numwriteranges, write_lower_paddr, write_upper_paddr);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+
+int bgcns_globalBarrier(void)
+{
+    int rc;
+    FUNCT;
+    local_irq_disable();
+    rc = bgcns()->globalBarrier();
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+int bgcns_globalBarrierWithTimeout(unsigned timeoutInMillis)
+{
+    int rc;
+    FUNCT;
+    local_irq_disable();
+    rc =bgcns()->globalBarrierWithTimeout(timeoutInMillis);
+    local_irq_enable();
+    FUNCB;
+    return rc;
+}
+
+
+
+
+/* ==================================================
+   Misc. functions
+   ================================================== */
+
+asmlinkage uint32_t sys_bg_sc_donothing(void) /* this is benchmark purpose */
+{
+    return 0;
+}
+
+asmlinkage uint32_t sys_bg_sc_barrier(unsigned msec)
+{
+    if( msec==0 ) {
+	bgcns_globalBarrier();
+    } else {
+	bgcns_globalBarrierWithTimeout(msec);
+    }
+    return 0;
+}
+
+asmlinkage uint32_t sys_bg_sc_wildcard(int cmd)
+{
+    zepto_debug(3,"sys_bg_sc_wildcard()\n");
+    switch(cmd) {
+	case 0:
+	    asm volatile("dccci 0,0");
+	    break;
+	default:
+	    printk("cmd=%d is not implemented\n",cmd);
+    }
+    return 0;
+}
+
+
+
+/* ==================================================
+   DMA driver impl.
+   ================================================== */
+
+
+/* Keep track DMA usage per node resource */
+
+typedef struct _BGP_DMA_Resouce
+{
+    uint32_t  inj_ctr_used[4];       /* use bit 0-7  */
+    uint32_t  rec_ctr_used[4];       /* use bit 0-7 bits */
+
+    uint32_t  inj_fifo_used[4];      /* use 0-31 bits */
+
+    uint32_t  rec_fifo_set_map;      /* set non-zero if setDmaReceptionMap has been called */
+    uint32_t  rec_normal_fifo_init;  /* use bit 0-31 */
+    uint32_t  rec_header_fifo_init;  /* use bit 0-3  */
+
+}  BGP_DMA_Usage;
+
+static BGP_DMA_Usage  _bgp_dma_usage;  
+
+void force_clear_dma_usage(void)
+{
+    memset( &_bgp_dma_usage, 0, sizeof(_bgp_dma_usage) );
+    zepto_debug(2, "clear dma usage\n");
+}
+
+/* ========================================
+   DMA device MMIO map
+   ======================================== */
+   
+static unsigned  bgp_dma_base;
+
+static unsigned get_dma_inj_start(int gn, int fn)            { return bgp_dma_base + gn*0x1000 + fn*0x0010; }
+static unsigned get_dma_inj_not_empty(int gn)                { return bgp_dma_base + gn*0x1000 +    0x0200; }
+static unsigned get_dma_inj_counter_enabled(int gn, int fn)  { return bgp_dma_base + gn*0x1000 + 0x0300 + fn*0x0004; }
+static unsigned get_dma_inj_counter(int gn, int fn)          { return bgp_dma_base + gn*0x1000 + 0x0400 + fn*0x0010; }
+
+static unsigned get_dma_rec_start(int gn, int fn)            { return bgp_dma_base + gn*0x1000 + 0x0800 + fn*0x0010; }
+static unsigned get_dma_rec_not_empty(int gn, int fn)        { return bgp_dma_base + gn*0x1000 + 0x0a00 + fn*0x0004; }
+static unsigned get_dma_rec_counter_enabled(int gn, int fn ) { return bgp_dma_base + gn*0x1000 + 0x0b00 + fn*0x0004; }
+static unsigned get_dma_rec_counter(int gn, int fn)          { return bgp_dma_base + gn*0x1000 + 0x0c00 + fn*0x0010; }
+
+
+
+static int valid_dma_vaddr(unsigned vaddr)
+{
+    return  
+	(get_bigmem_region_start()<=vaddr) 
+	&& 
+	(vaddr<get_bigmem_region_end()) ;
+}
+
+static void print_region_info(void) 
+{
+    zepto_debug(2,"region=[0x%08x,0x%08x)\n",
+		get_bigmem_region_start(),
+		get_bigmem_region_end() );
+}
+
+
+static int dma_CounterGroupQueryFree( 
+    struct CounterGroupQueryFree_struct* commbuf )
+{
+    uint32_t  type = commbuf->type;
+    uint32_t  group = commbuf->group;
+    uint32_t  *n_subgroups = &(commbuf->n_subgroups); 
+    uint32_t  subgroups[DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP];
+    uint32_t  counters;
+    int i;
+
+    zepto_debug(2,"dma_CounterGroupQueryFree()\n");
+
+
+    /* spi wrapper function does parameter validation
+       we can skip check */
+
+    // LOCK
+
+    switch(type) {
+        case DMA_Type_Injection:
+            counters = _bgp_dma_usage.inj_ctr_used[group];
+            break;
+        case DMA_Type_Reception:
+            counters = _bgp_dma_usage.rec_ctr_used[group];
+            break;
+        default:
+            return -EINVAL;
+    }
+
+    *n_subgroups = 0;
+    for(i=0;i< DMA_NUM_COUNTERS_PER_SUBGROUP; i++) {
+        if( (counters & _BN(i)) == 0 ) {
+            subgroups[*n_subgroups] = i;
+            (*n_subgroups)++;
+        }
+    }
+    /* UNLOCK */
+    
+    /* commbuf->subgroups points to an user space buffer */
+    if( copy_to_user( commbuf->subgroups, subgroups,
+		      (*n_subgroups) * sizeof(uint32_t)) ) {
+	return -EINVAL;
+    }
+    
+    return 0;
+}
+
+
+static int dma_CounterGroupAllocate( 
+    struct   CounterGroupAllocate_struct* commbuf )
+{
+    uint32_t  type = commbuf->type;
+    uint32_t  group = commbuf->group;
+    uint32_t  num_subgroups = commbuf->num_subgroups;
+    DMA_CounterGroup_t* cg_ptr = 
+	(DMA_CounterGroup_t*)commbuf->cg_ptr; /* points to a special buffer,
+					       * so no need to
+					       * copy_from_user */
+    /****/
+    int subgroups[DMA_NUM_COUNTERS_PER_SUBGROUP];
+    int i,j;
+    int min_id,max_id,global_subgroup,word_id,bit_id;
+    uint32_t *counters_ptr;
+    uint32_t x;
+    unsigned counterGroupMask = 0;
+    unsigned counterNum;
+
+    SCDEBUG;    
+        
+    if(!valid_dma_vaddr((unsigned)cg_ptr)) {
+	printk("Error!  cg_ptr=%p %s(%d)\n",
+	       cg_ptr, 
+	       __FILE__, __LINE__);
+	print_region_info();
+
+	return -EINVAL;
+    }
+
+    switch(type) {
+	case DMA_Type_Injection:
+	    counters_ptr = &(_bgp_dma_usage.inj_ctr_used[group]);
+	    break;
+	case DMA_Type_Reception:
+	    counters_ptr = &(_bgp_dma_usage.rec_ctr_used[group]);
+		break;
+	default:
+	    return -EINVAL;
+    }
+
+    copy_from_user(subgroups, commbuf->subgroups, sizeof(int)*num_subgroups);
+
+    for(i=0;i<num_subgroups;i++) {
+	if(subgroups[i] < 0) {
+	    return -EINVAL;
+	}
+	if(subgroups[i] >= DMA_NUM_COUNTERS_PER_SUBGROUP){
+	    return -EINVAL;
+	}
+	if((*counters_ptr) & _BN(subgroups[i])){
+	    return -EINVAL;
+	}
+    }
+
+
+
+    memset( (void *)cg_ptr, 0, sizeof(DMA_CounterGroup_t));
+
+    cg_ptr->type = type;
+    cg_ptr->group_id = group;
+    if(type == DMA_Type_Injection){
+	cg_ptr->status_ptr = 
+	    (DMA_CounterStatus_t *)get_dma_inj_counter_enabled(group,0);
+    } else {
+	cg_ptr->status_ptr = 
+	    (DMA_CounterStatus_t *)get_dma_rec_counter_enabled(group,0);
+    }
+
+
+    zepto_debug(3,"cg_ptr->status_ptr=%p\n", (void*)cg_ptr->status_ptr);
+
+
+    for(i=0;i<num_subgroups;i++) {
+	*counters_ptr |= _BN(subgroups[i]);
+	min_id = subgroups[i]*DMA_NUM_COUNTERS_PER_SUBGROUP;
+	max_id = min_id + DMA_NUM_COUNTERS_PER_SUBGROUP;
+	global_subgroup = (group * DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP)
+	    + subgroups[i];
+
+	cg_ptr->grp_permissions |= _BN(global_subgroup);
+
+	for(j=min_id;j<max_id;j++){
+	    word_id = DMA_COUNTER_GROUP_WORD_ID(j);
+	    bit_id = DMA_COUNTER_GROUP_WORD_BIT_ID(j);
+	    cg_ptr->permissions[word_id] |= _BN(bit_id);
+	    
+	    if(type == DMA_Type_Injection){
+		cg_ptr->counter[j].counter_hw_ptr =
+		    (DMA_CounterHw_t *)get_dma_inj_counter(group,j);
+		DMA_CounterSetValueBaseHw(cg_ptr->counter[j].counter_hw_ptr,DMA_COUNTER_INIT_VAL,0);
+	    }else{
+		cg_ptr->counter[j].counter_hw_ptr =
+		    (DMA_CounterHw_t *)get_dma_rec_counter(group,j);
+		DMA_CounterSetValueBaseMaxHw(cg_ptr->counter[j].counter_hw_ptr,DMA_COUNTER_INIT_VAL,0,0);
+	    }
+	    DMA_CounterSetDisableById(cg_ptr,j);
+	    DMA_CounterClearHitZeroById(cg_ptr,j);
+	}
+    }
+
+    //
+
+    for(counterNum=0; counterNum < DMA_NUM_COUNTERS_PER_GROUP;
+	counterNum += DMA_NUM_COUNTERS_PER_SUBGROUP){
+	if(cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counterNum)] &
+	   _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counterNum))){
+	    counterGroupMask |= _BN(counterNum / DMA_NUM_COUNTERS_PER_SUBGROUP);
+	}
+    }
+
+    switch(type) {
+	case DMA_Type_Injection:
+	    bgcns_setDmaFifoControls(BGCNS_Enable,
+				     BGCNS_InjectionCounterInterrupt,
+				     cg_ptr->group_id,
+				     counterGroupMask >> ((cg_ptr->group_id) * 8),
+				     NULL);
+	    break;
+	case DMA_Type_Reception:
+	    bgcns_setDmaFifoControls(BGCNS_Enable,
+					BGCNS_ReceptionCounterInterrupt,
+					cg_ptr->group_id,
+					counterGroupMask >> ((cg_ptr->group_id) * 8),
+					NULL);
+    }
+
+    _bgp_mbar();
+
+
+    x = DMA_CounterGetHitZero(cg_ptr,0);
+    if(x != 0) {
+	printk("[DMA_Counter_Alloc] Hit Zero Error x = 0x%08x\n",x);
+	return -EFAULT;
+    }
+    _bgp_msync();
+    _bgp_isync();
+
+
+    zepto_debug(3,"cg_ptr: status_ptr=%p  counter[0].counter_hw_ptr=%p\n",
+		cg_ptr->status_ptr, cg_ptr->counter[0].counter_hw_ptr);
+
+
+    return 0;
+}
+
+
+static int dma_InjFifoGroupQueryFree( 
+    struct   InjFifoGroupQueryFree_struct* commbuf )
+{
+    uint32_t group = commbuf->group;
+    /****/
+    int i;
+    int num_fifos;
+    int fifo_ids[DMA_NUM_INJ_FIFOS_PER_GROUP];
+    uint32_t fifos;
+
+    SCDEBUG;    
+
+    fifos = _bgp_dma_usage.inj_fifo_used[group];
+	
+    num_fifos = 0;
+    for(i=0;i< DMA_NUM_INJ_FIFOS_PER_GROUP; i++) {
+	if((fifos & _BN(i)) == 0) {
+	    fifo_ids[num_fifos] = i;
+	    num_fifos++;
+	}
+    }
+
+    /* return */
+    commbuf->num_fifos = num_fifos;
+    copy_to_user(commbuf->fifo_ids,fifo_ids,
+		 DMA_NUM_INJ_FIFOS_PER_GROUP*sizeof(int));
+    
+
+
+    return 0;
+}
+
+static int dma_InjFifoGroupAllocate( 
+    struct   InjFifoGroupAllocate_struct* commbuf )
+{
+    uint32_t group = commbuf->group;
+    uint32_t num_fifos = commbuf->num_fifos;
+    DMA_InjFifoGroup_t* fg_ptr  = (DMA_InjFifoGroup_t*)commbuf->fg_ptr;
+    /*****/
+    int i;
+    uint32_t       value, f_ids, pri_bits = 0, local_bits = 0;
+    unsigned short priorities[DMA_NUM_INJ_FIFOS_PER_GROUP];
+    int            fifo_ids[DMA_NUM_INJ_FIFOS_PER_GROUP];
+    char           ts_inj_maps[DMA_NUM_INJ_FIFOS_PER_GROUP];
+    short          locals[DMA_NUM_INJ_FIFOS_PER_GROUP];
+
+    SCDEBUG;    
+
+
+    /* Parameter copy from user space to kernel */
+    copy_from_user(priorities,  commbuf->priorities,  sizeof(short)*num_fifos);
+    copy_from_user(fifo_ids,    commbuf->fifo_ids,    sizeof(int)*num_fifos);
+    copy_from_user(ts_inj_maps, commbuf->ts_inj_maps, sizeof(char)*num_fifos);
+    copy_from_user(locals,      commbuf->locals,      sizeof(short)*num_fifos);
+
+    f_ids = 0;
+
+    for(i=0;i<num_fifos;i++) {
+	if(fifo_ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP){
+	    return -EINVAL;
+	}
+	f_ids |= _BN(fifo_ids[i]);
+	if(priorities[i] > 1) {
+	    return -EINVAL;
+	}
+	if(locals[i] > 1) {
+	    return -EINVAL;
+	}
+	if(locals[i] == 0 && ts_inj_maps[i] == 0) {
+	    return -EINVAL;
+	}
+	if(locals[i] == 1 && ts_inj_maps[i] != 0) {
+	    return -EINVAL;
+	}
+	if(locals[i] == 1){
+	    local_bits |= _BN(i);
+	}
+	if(priorities[i] == 1) {
+	    pri_bits |= _BN(i);
+	}
+	if(_bgp_dma_usage.inj_fifo_used[group] & _BN(fifo_ids[i])) {
+	    return -EBUSY;
+	}
+    }
+
+    fg_ptr->status_ptr = (DMA_InjFifoStatus_t *)get_dma_inj_not_empty(group);
+    fg_ptr->group_id   = group;
+
+
+    zepto_debug(3,"fg_ptr->status_ptr=%p\n", (void*)fg_ptr->status_ptr);
+
+
+    _bgp_dma_usage.inj_fifo_used[group] |= f_ids;
+
+    fg_ptr->permissions = f_ids;
+
+    bgcns_setDmaFifoControls(BGCNS_Disable,BGCNS_InjectionFifoInterrupt,group,f_ids,NULL);
+    bgcns_setDmaFifoControls(BGCNS_Disable,BGCNS_InjectionFifo,group,f_ids,NULL);
+
+    DMA_InjFifoSetDeactivate(fg_ptr,f_ids);
+
+    _bgp_mbar();
+
+    bgcns_setDmaInjectionMap(group,(unsigned *)fifo_ids,ts_inj_maps,num_fifos);
+    
+    for(i=0;i<num_fifos;i++) {
+
+	fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr = 
+	    (DMA_FifoHW_t *)get_dma_inj_start(group,fifo_ids[i]);
+	fg_ptr->fifos[fifo_ids[i]].fifo_id = fifo_ids[i];
+	fg_ptr->fifos[fifo_ids[i]].desc_count = 0;
+	fg_ptr->fifos[fifo_ids[i]].occupiedSize = 0;
+	fg_ptr->fifos[fifo_ids[i]].priority = priorities[i];
+	fg_ptr->fifos[fifo_ids[i]].local = locals[i];
+	fg_ptr->fifos[fifo_ids[i]].ts_inj_map = ts_inj_maps[i];
+
+	DMA_FifoSetStartPa( fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr,0 );
+	DMA_FifoSetHeadPa(  fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr,0 );
+	DMA_FifoSetTailPa(  fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr,0 );
+	DMA_FifoSetEndPa(   fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr,0 );
+    }
+    _bgp_mbar();
+
+
+    zepto_debug(3,"fg_ptr->fifos[fifo_ids[0]].dma_fifo.fifo_hw_ptr=%p\n",
+		fg_ptr->fifos[fifo_ids[0]].dma_fifo.fifo_hw_ptr);
+
+
+    DMA_InjFifoSetClearThresholdCrossed(fg_ptr,f_ids);
+
+    bgcns_setDmaLocalCopies(BGCNS_Enable, group, local_bits);
+
+    bgcns_setDmaPriority(BGCNS_Enable,group,pri_bits);
+
+    _bgp_mbar();
+
+    zepto_debug(3,"fg_ptr->fifos[fifo_ids[0]].dma_fifo.fifo_hw_ptr=%p\n",
+		fg_ptr->fifos[fifo_ids[0]].dma_fifo.fifo_hw_ptr );
+
+    value = DMA_FifoGetStartPa(fg_ptr->fifos[fifo_ids[0]].dma_fifo.fifo_hw_ptr);
+    if(value != 0) {
+
+	return -EFAULT;
+    }
+
+    bgcns_setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifoInterrupt, 
+				group, f_ids, NULL );
+
+
+    return 0;
+}
+
+
+
+static int _bgp_DMA_FifoInit(
+    DMA_Fifo_t *f_ptr,
+    void *va_start,
+    void *va_head,
+    void *va_end)
+{
+    phys_addr_t pa_start, pa_head, pa_end;
+
+    if( !valid_dma_vaddr((unsigned)va_start) ) {
+	printk( KERN_WARNING "va_start %p is invalid\n",va_start);
+	print_region_info();
+    }
+    if( !valid_dma_vaddr((unsigned)va_end) ) {
+	printk( KERN_WARNING "va_end %p is invalid\n",va_end);
+	print_region_info();
+    }
+    if( !valid_dma_vaddr((unsigned)va_head) ) {
+	printk( KERN_WARNING "va_head %p is invalid\n",va_head);
+	print_region_info();
+    }
+    /*
+    pa_start = iopa((unsigned long)va_start);
+    pa_end   = iopa((unsigned long)va_end);
+    pa_head  = iopa((unsigned long)va_head);
+    */
+    pa_start = (phys_addr_t)bigmem_virt2phy((unsigned)va_start);
+    pa_end   = (phys_addr_t)bigmem_virt2phy((unsigned)va_end);
+    pa_head  = (phys_addr_t)bigmem_virt2phy((unsigned)va_head);
+
+
+
+    zepto_debug(3,"va_start=%p pa_start=0x%08llx\n",  va_start, pa_start);
+
+
+    /* fifo_hw_ptr->pa_* is 4-Bit Shifted phys address.*/
+    f_ptr->fifo_hw_ptr->pa_start = (unsigned long)(pa_start >> 4);
+    f_ptr->fifo_hw_ptr->pa_head  = (unsigned long)(pa_head >> 4);
+    f_ptr->fifo_hw_ptr->pa_tail  = (unsigned long)(pa_head >> 4);
+    f_ptr->fifo_hw_ptr->pa_end   = (unsigned long)(pa_end >> 4);
+    
+    _bgp_mbar();
+
+    /* shadow variables */
+    f_ptr->pa_start = (pa_start >> 4);
+
+    f_ptr->va_start = va_start;
+    f_ptr->va_end   = va_end;
+    f_ptr->va_head  = va_head;
+    f_ptr->va_tail  = va_head;
+
+
+    zepto_debug(3,"va = %p, pa = %llx\n",va_start,pa_start);
+
+	
+    f_ptr->fifo_size = (pa_end - pa_start) >> 4;
+    f_ptr->free_space = f_ptr->fifo_size;
+
+    return 0;
+}
+
+
+static int dma_InjFifoInitByID( 
+    struct   InjFifoInitByID_struct* commbuf )
+{
+    DMA_InjFifoGroup_t *fg_ptr = (DMA_InjFifoGroup_t *)commbuf->fg_ptr;
+    int  fifo_id = commbuf->fifo_id;
+    uint32_t* va_start = commbuf->va_start;
+    uint32_t* va_head = commbuf->va_head;
+    uint32_t* va_end = commbuf->va_end;
+    /***/
+    int group;
+    int ret;
+    void *x;
+
+    SCDEBUG;    
+
+    group = fg_ptr->group_id;
+
+    bgcns_setDmaFifoControls(BGCNS_Disable,BGCNS_InjectionFifo,
+				group,_BN(fifo_id),NULL);
+    bgcns_setDmaFifoControls(BGCNS_Disable,BGCNS_InjectionFifoInterrupt,
+				group,_BN(fifo_id),NULL);
+
+    DMA_InjFifoSetDeactivate(fg_ptr, _BN(fifo_id));
+
+    ret = _bgp_DMA_FifoInit(&(fg_ptr->fifos[fifo_id].dma_fifo),
+			    va_start, va_head, va_end);
+    if(ret != 0) {
+	return ret;
+    }
+    fg_ptr->fifos[fifo_id].desc_count = 0;
+    fg_ptr->fifos[fifo_id].occupiedSize = 0;
+
+    DMA_InjFifoSetClearThresholdCrossedById(fg_ptr,fifo_id);
+
+    x = DMA_FifoGetHead(&(fg_ptr->fifos[fifo_id].dma_fifo) );
+    if( x != fg_ptr->fifos[fifo_id].dma_fifo.va_tail) {
+	printk( "[Z] Error @ %s(%d)\n", __FILE__, __LINE__);
+	printk( "[Z] x=%p tail=%p fifo_id=%d\n",
+		x, fg_ptr->fifos[fifo_id].dma_fifo.va_tail, fifo_id);
+	return 0x03; // = _bgp_err_dma_sram_init
+    }
+    
+    bgcns_setDmaFifoControls(BGCNS_Enable,BGCNS_InjectionFifo, 
+				group,_BN(fifo_id),NULL);
+    bgcns_setDmaFifoControls(BGCNS_Enable,BGCNS_InjectionFifoInterrupt,
+				group,_BN(fifo_id),NULL);
+
+    /* Activate the fifo */
+    DMA_InjFifoSetActivate(fg_ptr, _BN(fifo_id));
+
+    return 0;
+}
+
+
+static int dma_RecFifoSetMap(uint32_t __user *rec_map)
+{
+    DMA_RecFifoMap_t map;
+    int i,g;
+
+    SCDEBUG;    
+   
+    if(copy_from_user(&map,rec_map,sizeof(DMA_RecFifoMap_t)) != 0) {
+	return -EINVAL;
+    }
+
+    for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS;i++) {
+	if(map.fifo_types[i] < 0 || map.fifo_types[i] > 1) {
+	    return -EINVAL;
+	}
+    }
+
+    if(map.save_headers > 1) {
+	return -EINVAL;
+    }
+
+    if(_bgp_dma_usage.rec_fifo_set_map != 0) {
+	/* called twice */
+	return -EFAULT;
+    }
+
+    if(map.save_headers == 1){
+	for(i=0; i<DMA_NUM_HEADER_REC_FIFOS; i++) {
+	    if(map.hdr_fifo_types[i] < 0 || map.hdr_fifo_types[i] > 1) {
+		return -EINVAL;
+	    }
+	}
+    }
+
+    for(g=0;g<DMA_NUM_REC_FIFO_GROUPS;g++) {
+	for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;i++) {
+	    if(map.ts_rec_map[g][i] >= DMA_NUM_NORMAL_REC_FIFOS) {
+		return -EINVAL;
+	    }
+	}
+    }
+
+    
+    bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifo,        0,                   0xFFFFFFFF,NULL);
+
+
+    bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, BGCNS_DMA_ALL_GROUPS, 0 /* mask not used */, NULL );
+
+
+    bgcns_setDmaReceptionMap(map.ts_rec_map, map.fifo_types,
+				map.save_headers ? map.hdr_fifo_types : NULL,
+				map.threshold );
+
+
+    _bgp_dma_usage.rec_fifo_set_map = 1;
+
+
+    return 0;
+}
+
+static int _bgp_DMA_RecFifoGetMap(DMA_RecFifoMap_t *rec_map)
+{
+    if(rec_map == NULL){
+	return -EINVAL;
+    }
+    memset(rec_map,0,sizeof(DMA_RecFifoMap_t));
+
+    return bgcns_getDmaReceptionMap(rec_map->ts_rec_map,rec_map->fifo_types,&(rec_map->save_headers),rec_map->hdr_fifo_types,rec_map->threshold);
+}
+
+static int dma_RecFifoGetFifoGroup( struct RecFifoGetFifoGroup_struct* commbuf)
+{
+    DMA_RecFifoGroup_t *fg_ptr = (DMA_RecFifoGroup_t *)commbuf->fg_ptr;
+    int group = commbuf->group;
+    // int target = commbuf->target;
+    /***/
+    DMA_RecFifoMap_t rec_map;
+    int min_id,max_id,g,i,j,idx;
+    uint32_t used_fifos = 0,x;
+    unsigned long fifoMask;
+    int fifoIndex;
+
+    SCDEBUG;
+
+    _bgp_DMA_RecFifoGetMap(&rec_map);
+
+    fg_ptr->group_id = group;
+    switch(group) {
+	case 0:  fg_ptr->mask  = 0xFF000000; break;
+	case 1:  fg_ptr->mask  = 0x00FF0000; break;
+	case 2:  fg_ptr->mask  = 0x0000FF00; break;
+	default: fg_ptr->mask  = 0x000000FF; break;
+    }
+    fg_ptr->status_ptr = (DMA_RecFifoStatus_t *)get_dma_rec_not_empty(group,0);
+
+    min_id = (group*DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP);
+    max_id =  min_id + DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1;
+
+    for(g=0;g<DMA_NUM_REC_FIFO_GROUPS;g++) {
+	for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;i++) {
+	    if(rec_map.ts_rec_map[g][i] >= min_id &&
+	       rec_map.ts_rec_map[g][i] <= max_id) {
+		used_fifos |= _BN(rec_map.ts_rec_map[g][i]);
+	    }
+	}
+    }
+
+    idx = 0;
+    for(j=0;j<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;j++) {
+	i = min_id + j;
+	if((_BN(i) & used_fifos) != 0){
+	    fg_ptr->fifos[idx].type = rec_map.fifo_types[j];
+	    fg_ptr->fifos[idx].global_fifo_id = i;
+	    fg_ptr->fifos[idx].num_packets_processed_since_moving_fifo_head = 0;
+	    fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr = 
+		(DMA_FifoHW_t *)get_dma_rec_start(group,j);
+	    
+	    DMA_FifoSetStartPa(fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr,0);
+	    DMA_FifoSetHeadPa( fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr,0);
+	    DMA_FifoSetTailPa( fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr,0);
+	    DMA_FifoSetEndPa(  fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr,0);
+	    idx++;
+	}
+    }
+
+    if(rec_map.save_headers == 1){
+	fg_ptr->num_hdr_fifos = 1;
+	fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].type = rec_map.hdr_fifo_types[group];
+	fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].global_fifo_id = DMA_NUM_NORMAL_REC_FIFOS+group;
+	fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].num_packets_processed_since_moving_fifo_head = 0;
+	fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr =
+	    ( DMA_FifoHW_t *) get_dma_rec_start(group, DMA_HEADER_REC_FIFO_ID);
+	
+	DMA_FifoSetStartPa( fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr,0);
+	DMA_FifoSetHeadPa(  fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr,0);
+	DMA_FifoSetTailPa(  fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr,0);
+	DMA_FifoSetEndPa(   fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr,0);
+    }
+
+    fg_ptr->num_normal_fifos = idx;
+    fg_ptr->status_ptr->clear_threshold_crossed[0] = fg_ptr->mask;
+    fg_ptr->status_ptr->clear_threshold_crossed[1] = fg_ptr->mask;
+
+
+    _bgp_mbar();
+
+    x = (fg_ptr->status_ptr->threshold_crossed[0] & fg_ptr->mask);
+    if(x != 0) {
+	printk(KERN_WARNING "Error: _bgp_err_dma_sram_init  %s(%d)\n", __FILE__,__LINE__ );
+	return -EFAULT;
+    }
+
+    fifoMask = 0;
+
+    for(fifoIndex=0;fifoIndex< fg_ptr->num_normal_fifos;fifoIndex++) {
+	fifoMask |= _BN(fg_ptr->fifos[fifoIndex].global_fifo_id);
+    }
+
+    bgcns_setDmaFifoControls(BGCNS_Enable,
+				BGCNS_ReceptionFifoInterrupt,
+				fg_ptr->group_id,
+				fifoMask,NULL);
+    _bgp_msync();
+    _bgp_isync();
+
+    return 0;
+}
+
+/* ============== */
+
+static int dma_RecFifoInitByID( struct RecFifoInitByID_struct* commbuf)
+{
+    DMA_RecFifoGroup_t *fg_ptr = (DMA_RecFifoGroup_t *)commbuf->fg_ptr;
+    int                fifo_id = commbuf->fifo_id;
+    void               *va_start = commbuf->va_start;
+    void               *va_head = commbuf->va_head;
+    void               *va_end  = commbuf->va_end;
+    /****/
+    int group;
+    int g_fifo_id;
+    int i;
+    uint32_t xint[4] = {0,0,0,0};
+    void *x;
+
+    SCDEBUG;
+
+
+    group = fg_ptr->group_id;
+    g_fifo_id = fg_ptr->fifos[fifo_id].global_fifo_id;
+    
+    
+    if(g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) {
+	if((_bgp_dma_usage.rec_normal_fifo_init & _BN(g_fifo_id)) != 0){
+	    printk( KERN_WARNING "Error: %s(%d)\n", __FILE__,__LINE__);
+	    return -EFAULT;
+	}
+	_bgp_dma_usage.rec_normal_fifo_init |= _BN(g_fifo_id);
+	bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifo,0,_BN(g_fifo_id),NULL);
+	for(i=0;i<4;i++) {
+	    bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifoInterrupt,i,0xffffffff,&(xint[i]));
+	}
+    } else {
+	if((_bgp_dma_usage.rec_header_fifo_init & _BN(g_fifo_id - 32)) != 0) {
+	    printk( KERN_WARNING "Error: %s(%d)\n", __FILE__,__LINE__);
+	    return -EFAULT;
+	}
+
+	// remember the reception header FIFO has been initialized
+	_bgp_dma_usage.rec_header_fifo_init |= _BN(g_fifo_id-32);
+
+	bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo,group,0,NULL);
+	bgcns_setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifoInterrupt,0,0xffffffff,xint);
+    }
+
+    _bgp_DMA_FifoInit( &(fg_ptr->fifos[fifo_id].dma_fifo),
+		       va_start,va_head,va_end);
+
+    DMA_RecFifoSetClearThresholdCrossedById(fg_ptr,fifo_id);
+
+    /* DMA_FifoGetHead */
+    x =  DMA_FifoGetHead(&(fg_ptr->fifos[fifo_id].dma_fifo) );
+    if ( x != fg_ptr->fifos[fifo_id].dma_fifo.va_tail) {
+	printk( KERN_WARNING "Error: %s(%d)\n", __FILE__,__LINE__);
+	return -EFAULT;
+    }
+
+    if(g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) {
+	bgcns_setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionFifo, 0, _BN(g_fifo_id), NULL);
+	for(i=0;i<4;i++) {
+	    bgcns_setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionFifoInterrupt,i,0,&(xint[i]));
+	}
+    } else {
+	bgcns_setDmaFifoControls(BGCNS_Enable,   BGCNS_ReceptionHeaderFifo,         group,0,NULL);
+	bgcns_setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionHeaderFifoInterrupt,0,  0,xint);
+    }
+
+    return 0;
+}
+
+
+int __init bgpdma_device_init(void)
+{
+    unsigned long long dma_phy_lower, dma_phy_upper;
+    int rc;
+    extern int bgp4GB; /* defined in arch/powerpc/mm/init_32.c */
+    extern char _end[];  /* ELF symbol */
+
+    bgp_dma_base = 0xFFFD0000;  /* XXX: fix hard-coded */
+
+    zepto_debug(2,"bgp_dma_base=%08x\n", bgp_dma_base);
+
+    dma_phy_lower = (unsigned long long)((unsigned long)_end-PAGE_OFFSET);
+
+#ifdef CONFIG_ZEPTO_CNS_RELOCATION
+    if( bgp4GB )  dma_phy_upper = 0x100000000ULL;
+    else  	  dma_phy_upper = 0x80000000ULL;
+#else
+    if( bgp4GB )  dma_phy_upper = 0x100000000ULL - 0x01000000ULL ;
+    else  	  dma_phy_upper = 0x80000000ULL - 0x01000000ULL;
+#endif
+
+    
+    zepto_debug(2,"dma_phy_lower=%08llx dma_phy_upper=%08llx\n", 
+		dma_phy_lower, dma_phy_upper );
+
+    rc = bgcns_dmaSetRange(1, &dma_phy_lower, &dma_phy_upper,
+			   1, &dma_phy_lower, &dma_phy_upper);
+
+    if( rc!=0 ) {
+	panic("ERROR: Failed to dmaSetRange()  dma_phy_lower=0x%08llx dma_phy_upper=0x%08llx\n",
+	      dma_phy_lower, dma_phy_upper);
+    }
+
+    return 0;
+}
+
+
+asmlinkage  long sys_zepto_dma(unsigned cmd, unsigned arg)
+{
+    int rc=0;
+
+    /* arg contains the address of bgpdma communication buffer which is
+     * allocated from statictlb area.
+     */
+
+    zepto_debug(3,"sys_zepto_dma cmd=%08x arg=%08x\n",cmd, arg);
+
+    switch(cmd) {
+	case ZEPTOSC_DMA_COUNTERGROUPQUERYFREE:
+	    rc = dma_CounterGroupQueryFree( (struct CounterGroupQueryFree_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_COUNTERGROUPALLOCATE:
+	    rc = dma_CounterGroupAllocate( (struct CounterGroupAllocate_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_INJFIFOGROUPQUERYFREE:
+	    rc = dma_InjFifoGroupQueryFree( (struct InjFifoGroupQueryFree_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_INJFIFOGROUPALLOCATE:
+	    rc = dma_InjFifoGroupAllocate( (struct InjFifoGroupAllocate_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_INJFIFOINITBYID:
+	    rc = dma_InjFifoInitByID( (struct InjFifoInitByID_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_RECFIFOSETMAP:
+	    rc = dma_RecFifoSetMap( (uint32_t __user *)arg );
+	    break;
+	case ZEPTOSC_DMA_RECFIFOGETFIFOGROUP:
+	    rc = dma_RecFifoGetFifoGroup( (struct RecFifoGetFifoGroup_struct*)arg);
+	    break;
+	case ZEPTOSC_DMA_RECFIFOINITBYID:
+	    rc = dma_RecFifoInitByID( (struct RecFifoInitByID_struct*)arg );
+	    break;
+	case ZEPTOSC_DMA_CHGCOUNTERINTERRUPTENABLES:
+	    if( arg ) {
+		rc = -EINVAL;
+	    } else {
+		bgcns_disableInterrupt( 3, 10 );
+		bgcns_disableInterrupt( 3, 11 );
+		bgcns_disableInterrupt( 3, 12 );
+		bgcns_disableInterrupt( 3, 13 );
+		rc = 0;
+	    }
+	    break;
+	default:
+	   printk(KERN_ERR "[Z] sys_zepto_dma: unknown cmd=%u arg=%08x\n", cmd, arg);
+           return -1;
+    };
+    
+    zepto_debug(3,"sys_zepto_dma cmd=%08x passed.\n",cmd);
+
+    return rc;
+}
+
+int __init bgpdma_init(void)
+{
+    extern BGCNS_Descriptor bgcnsd;
+
+    bgpdma_device_init();
+
+    zepto_debug(2,"bgpdma is initialized\n");
+
+    /* just debug */
+
+    zepto_debug(2,"baseVirtualAddress=0x%08x  size=0x%08x basePhysicalAddress=0x%08x basePhysicalAddressERPN=0x%08x\n",
+		bgcnsd.baseVirtualAddress,
+		bgcnsd.size,
+		bgcnsd.basePhysicalAddress,
+		bgcnsd.basePhysicalAddressERPN);
+    return 0;
+} 
+__initcall(bgpdma_init);
diff --git a/arch/powerpc/syslib/bgdd/zepto_bluegene_lockbox.c b/arch/powerpc/syslib/bgdd/zepto_bluegene_lockbox.c
new file mode 100644
index 0000000..64efabb
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/zepto_bluegene_lockbox.c
@@ -0,0 +1,212 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+/*
+  BGP lockbox driver for ZCL
+*/
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/atomic.h>
+#include <asm/time.h>
+
+#include <linux/zepto_bigmem.h>
+
+#include <zepto/zepto_syscall.h>
+
+#include <asm/bgcns.h>
+
+#include <linux/zepto_debug.h>
+
+
+#define __ZCL_KERNEL__
+#define __ZCL__
+
+#include <bpcore/lockbox.h> 
+#include <bpcore/bgp_lockbox_inlines.h>
+
+#include <linux/zepto_task.h>
+
+
+/* static variables */
+
+static void *bgplockbox_supervisor;
+static void *bgplockbox_user;
+
+static unsigned long _bgplockbox_array[4][_BGP_LOCKBOX_LOCKS_PER_PAGE/32];
+/* static struct   semaphore  _sem_bgplockbox_array; */
+
+/* XXX: this is not good idea but no header define MASKs */
+#define LOCKBOX_FLAGS_ORDERED_ALLOC         0x100
+#define LOCKBOX_FLAGS_MASTER_PROCESSOR_MASK  0xf0
+#define LOCKBOX_FLAGS_PROCESSOR_MASK         0x0f
+
+static atomic_t n_procs_joined1 = ATOMIC_INIT(0);
+static atomic_t n_procs_joined2 = ATOMIC_INIT(0);
+
+void internode_barrier(int mastercore, int num_cores)
+{
+    _bgp_LockBox_Barrier_Group((unsigned)bgplockbox_supervisor,mastercore,num_cores);
+}
+
+
+static uint32_t _bgplockbox_allocate(struct AllocateLockBox_struct *lb)
+{
+    int i;
+    int coreid = smp_processor_id();
+    unsigned flags = lb->flags;
+    int master_core, num_cores;
+
+    master_core = (flags&LOCKBOX_FLAGS_MASTER_PROCESSOR_MASK)>>4;
+    num_cores = (flags&LOCKBOX_FLAGS_PROCESSOR_MASK);
+
+    zepto_debug(3,"core=%d _bgplockbox_allocate() locknum=%d numlocks=%d flags=%08x master_core=%d num_cores=%d\n",
+		coreid, lb->locknum, lb->numlocks, flags, master_core, num_cores );
+    
+    if( lb->locknum + lb->numlocks >= _BGP_LOCKBOX_LOCKS_PER_PAGE ) {
+	printk(KERN_WARNING "bgplockbox: locknum %d is invalid\n",
+	       lb->locknum );
+	return -EINVAL;
+    }
+
+    /* check see if desired lockboxes are already taken */
+    for( i=lb->locknum; i<lb->locknum + lb->numlocks; i++ ) {
+	if( test_bit(i%32,  &(_bgplockbox_array[coreid][i/32]) ) ) {
+	    printk(KERN_WARNING "Error: lockbox %d is already in use\n", i);
+	    return -EAGAIN;
+	}
+    }
+
+    /* VN or DUAL mode, we wait all processor to join */
+    if( flags & LOCKBOX_FLAGS_ORDERED_ALLOC ) {
+#if 0
+	zepto_debug(3, "%10u: core=%d waits all %d procs to join. 1st\n", (unsigned)get_tbl(), coreid, bigmem_nprocs_per_node );
+	atomic_inc( &n_procs_joined1 );
+	/* generally this kind of coding is not good but probably ok for our CN usage */
+	while( atomic_read( &n_procs_joined1 ) < bigmem_nprocs_per_node )   ;
+	zepto_debug(3, "%10u: all procs joined. 1st\n", (unsigned)get_tbl() );
+	atomic_dec( &n_procs_joined1 );
+#else
+	zepto_debug(3, "%10u: core=%d waits all %d procs to join. 1st\n", (unsigned)get_tbl(), coreid, bigmem_nprocs_per_node );
+	_bgp_LockBox_Barrier_Group((unsigned)bgplockbox_supervisor,master_core, num_cores);
+	zepto_debug(3, "%10u: all procs joined. 1st\n", (unsigned)get_tbl() );
+#endif
+    }
+
+    for (i=lb->locknum; i<lb->locknum + lb->numlocks; i++) {
+	if( coreid == master_core ) {
+	    _bgp_LockBox_Write((uint32_t)bgplockbox_user, i, 0);
+	}
+
+	set_bit( i%32,  &(_bgplockbox_array[coreid][i/32]) );  
+
+	lb->lockbox_va[i - lb->locknum] = 
+	    (unsigned long)bgplockbox_user + _BGP_LOCKBOX_NUM2ADDR(i);
+
+	zepto_debug(3, "core=%d  lockbox_va=%p\n", coreid, (void*)(lb->lockbox_va[i - lb->locknum]) );
+    }
+
+    /* VN or DUAL mode, we wait all processor to join */
+    if( flags & LOCKBOX_FLAGS_ORDERED_ALLOC ) {
+#if 0
+	zepto_debug(3, "%10u: core=%d waits all %d procs to join. 2nd\n", (unsigned)get_tbl(), coreid, bigmem_nprocs_per_node );
+	atomic_inc( &n_procs_joined2 );
+	/* generally this kind of coding is not good but probably ok for our CN usage */
+	while( atomic_read( &n_procs_joined2 ) < bigmem_nprocs_per_node )   ;
+	zepto_debug(3, "%10u: all procs joined. 2nd\n", (unsigned)get_tbl() );
+	atomic_dec( &n_procs_joined2 );
+#else
+	zepto_debug(3, "%10u: core=%d waits all %d procs to join. 2nd\n", (unsigned)get_tbl(), coreid, bigmem_nprocs_per_node );
+	_bgp_LockBox_Barrier_Group((unsigned)bgplockbox_supervisor,master_core, num_cores);
+	zepto_debug(3, "%10u: all procs joined. 2nd\n", (unsigned)get_tbl() );
+#endif
+    }
+
+    return 0;
+}
+
+void bgplockbox_reset(void)
+{
+    memset( _bgplockbox_array, 0, sizeof(_bgplockbox_array) );
+    zepto_debug(4,"Reset lockbox\n");
+}
+
+asmlinkage  long sys_zepto_lockbox(unsigned key, unsigned val)
+{
+    long ret = -EINVAL;
+
+    switch( key ) {
+	case ZEPTOSC_LOCKBOX_ALLOCATE:
+	    if( !(enable_bigmem&&IS_ZEPTO_TASK(current)) ) return -EINVAL;
+	    ret = _bgplockbox_allocate( (struct AllocateLockBox_struct*) val );
+	    break;
+	case ZEPTOSC_LOCKBOX_RESET:
+	    if( !(enable_bigmem&&IS_ZEPTO_TASK(current)) ) return -EINVAL;
+	    bgplockbox_reset();
+	    break;
+	default:
+	    ret = -EINVAL;
+	    break;
+    }
+    return ret;
+}
+
+
+int __init bgplockbox_init(void)
+{
+    bgplockbox_reset();
+
+    /* TLBs are statically mapped for lockbox */
+    bgplockbox_supervisor = (void*)0xffff0000;
+    bgplockbox_user       = (void*)0xffff4000;
+
+    zepto_debug(4,"bgplockbox_init()\n");
+
+    return 0;
+} 
+__initcall(bgplockbox_init);
diff --git a/arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c b/arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c
new file mode 100644
index 0000000..0f45af1
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c
@@ -0,0 +1,360 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/zepto_debug.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+
+#include <asm/bgcns.h>
+
+#include <asm/bgp_personality.h>
+
+#define __ZCL_KERNEL__
+
+#define __ZCL__  /* we need MMIO address definition for linux*/
+
+#include <bpcore/bgp_collective_inlines.h>
+#include <common/bgp_personality_inlines.h>
+#include <bpcore/ppc450_inlines.h>
+#include <bpcore/bgp_global_ints.h>
+#include <bpcore/collective.h>
+#define  __INLINE__  extern inline
+#include <spi/GlobInt.h>
+
+#include <linux/zepto_task.h>
+
+int bigmem_nprocs_per_node = 1;
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+
+extern BGCNS_Descriptor bgcnsd;  /* defined in platforms/44x/bgp_cns.c */
+extern int bluegene_getPersonality(void *buff, unsigned buffSize); /* defined in platforms/44x/bgp_cns.c */
+
+static void _do_global_barrier(void)
+{
+    zepto_debug(3,"entering globalBarrier()...\n");
+    local_irq_disable();
+    bgcnsd.services->globalBarrier();
+    local_irq_enable();
+    zepto_debug(3,"globalBarrier() out\n");
+}
+
+static uint16_t _get_port_masks(int i)
+{
+    uint16_t masks;
+    switch(i) {
+	case 0: masks = _BGP_TREE_RDR_SRC0|_BGP_TREE_RDR_TGT0; break;
+	case 1: masks = _BGP_TREE_RDR_SRC1|_BGP_TREE_RDR_TGT1; break;
+	case 2: masks = _BGP_TREE_RDR_SRC2|_BGP_TREE_RDR_TGT2; break;
+	case 3: masks = _BGP_TREE_RDR_SRCL|_BGP_TREE_RDR_TGTL;  /* local node */
+    }
+    return masks;
+}
+
+static void _write_classroute3_to_rdr1(uint16_t  classroute3)
+{
+    uint32_t  rdr1;
+    _bgp_mbar();
+    rdr1 = _bgp_mfdcrx(_BGP_DCR_TR_CLASS_RDR1);
+    _bgp_mbar();
+    _bgp_mtdcrx(_BGP_DCR_TR_CLASS_RDR1, (rdr1 & 0xFFFF0000) | classroute3);
+    _bgp_mbar();
+}
+
+
+static uint32_t  _get_target_port(_BGP_TreePayload status)
+{
+    return status.u32[0];
+}
+static uint32_t  _get_subtree_status(_BGP_TreePayload status)
+{
+    return status.u32[1];
+}
+
+static _BGP_TreePayload  _set_target_port(_BGP_TreePayload status,uint32_t port)
+{
+    status.u32[0]=port;
+    return status;
+}
+static _BGP_TreePayload  _set_subtree_status(_BGP_TreePayload status,uint32_t subtree_status)
+{
+    status.u32[1]=subtree_status;
+    return status;
+}
+
+static void set_MSR_FP(int val)
+{
+    unsigned long msr;
+    __asm__ __volatile__ ("mfmsr %0" : "=r" (msr));
+    if( val ) {
+	msr |= MSR_FP;
+    }  else  {
+	msr &= (~MSR_FP);
+    }
+    __asm__ __volatile__ ("mtmsr %0" : : "r" (msr));
+}
+
+static void setup_treeroute(int active, int job_size)
+{
+    BGP_Personality_t bgpers;
+    uint16_t   classroute3=0;
+    uint16_t   port_masks[4];
+    unsigned   n_children = 0;
+    int child_status = 0;
+    int rc, tsize,  n_nodes, i;
+    int parent_port = -1;
+
+    set_MSR_FP(1);     /* Enable FPU. XXX: this is a bit brute-force. clean it up later */
+
+    /* enable barrier ch0 for usermode */
+    local_irq_disable();
+    rc = bgcnsd.services->enableBarrier(0, 1); 
+    local_irq_enable();
+    zepto_debug(2,"enableBarrier(0,1) rc=%d cpu=%d\n",rc,smp_processor_id() );
+
+    zepto_debug(3,"setup_treeroute(active=%d)\n",active);
+    _bgp_mtdcrx(_BGP_DCR_GLOBINT_ASSERT_CH(0),active);
+    _do_global_barrier();
+
+    zepto_debug(3,"Set class3 route. cpu=%d\n",smp_processor_id() );
+
+    bluegene_getPersonality(&bgpers, sizeof(bgpers));
+    classroute3 = bgpers.Network_Config.TreeRoutes[1];
+    zepto_debug(3,"classroute3=%08x\n", classroute3);
+
+    /* calculate the number of nodes */
+
+    switch( bgpers.Kernel_Config.ProcessConfig )     {
+	case _BGP_PERS_PROCESSCONFIG_2x2 : tsize = 2; break;
+	case _BGP_PERS_PROCESSCONFIG_VNM : tsize = 4; break;
+	default:
+	    tsize = 1;
+    }
+    zepto_debug(3,"tsize=%d\n", tsize);
+
+    n_nodes =   
+	bgpers.Network_Config.Xnodes * 
+	bgpers.Network_Config.Ynodes * 
+	bgpers.Network_Config.Znodes * tsize;
+
+    zepto_debug(3,"n_nodes=%d job_size=%d\n", n_nodes, job_size);
+
+    if( n_nodes == job_size ) {  /* fully occupied */
+	_write_classroute3_to_rdr1(classroute3);
+	zepto_debug(3,"configured as a fully occupied partition.\n");
+	return ;
+    }
+
+    /* count up the number of children */
+    n_children=0;
+    for(i=0; i<3; i++) {
+	if ( !BGP_Personality_treeInfo_isRedundant(&bgpers,i) ) {
+	    if( BGP_Personality_treeInfo_commWorld(&bgpers,i)==_BGP_PERS_TREEINFO_COMMWORLD_CHILD ) {
+		n_children++;
+	    }
+	}
+    }
+    zepto_debug(3,"n_children=%d\n", n_children);
+
+    /* disable all nodes if not active */
+    if( !active )   classroute3 &= (~ _get_port_masks(3));
+
+    zepto_debug(3,"classroute3=%08x\n", classroute3);
+
+    /* wait a msg from children */
+    for( i=0; i<n_children; ++i ) {
+	_BGP_TreeHwHdr    hdr;
+	_BGP_TreePayload  status;
+
+	/* check see if VC0 contains a header and payload */
+	//zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+
+	while(!_bgp_TreeReadyToReceiveVC0() );
+	//zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+	_bgp_TreeRawReceivePacketVC0(&hdr, &status);
+
+	zepto_debug(3,"SUBTREE_STATUS=%d TARGET_PORT=%d\n",
+		 _get_subtree_status(status), 
+		 _get_target_port(status) );
+
+	if( _get_subtree_status(status) ) {
+	    child_status = 1;
+	} else  {
+	    uint32_t target_port = _get_target_port(status);
+	    classroute3 &= (~ _get_port_masks(target_port) );
+
+	    zepto_debug(3,"classroute3=%08x target_port=%08x\n", classroute3,target_port);
+	}                                  
+    }
+
+    //zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+    /* send information to the parent */
+    parent_port = -1;
+    for(i=0; i< 3; i++) {
+	if( BGP_Personality_treeInfo_commWorld(&bgpers,i)==_BGP_PERS_TREEINFO_COMMWORLD_PARENT) {
+	    parent_port=i;
+	    break;
+	}
+    }
+    zepto_debug(3,"parent_port=%d\n",parent_port);
+
+    if(parent_port>=0) {
+	_BGP_TreeHwHdr hdr;
+	int ptptarget;
+	_BGP_TreePayload status;
+
+	//zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+	ptptarget = BGP_Personality_treeInfo_destP2Paddr(&bgpers,parent_port);
+	zepto_debug(3,"ptptarget=%d\n", ptptarget);
+
+	_bgp_TreeMakePtpHdr(&hdr,
+			    1,     /* class route? */
+			    false, /* interrupt?  */
+			    ptptarget, 
+			    _BGP_TREE_CSUM_SOME    );
+
+
+	// zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+
+	status = _set_target_port(status, 
+				  BGP_Personality_treeInfo_destPort(&bgpers, parent_port) );
+	// zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+
+	status = _set_subtree_status(status,
+				     (active|child_status));
+
+	// zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+
+	_bgp_TreeRawSendPacket(0, &hdr, &status);
+    }
+    // zepto_debug(3,"setup_treeroute.c(%d)\n",__LINE__);
+
+    _write_classroute3_to_rdr1(classroute3);
+    _do_global_barrier();
+    set_MSR_FP(0);     /* Disable FPU */
+
+    zepto_debug(3,"configured as a partially occupied partition.\n");
+}
+
+
+
+static int setup_treeroute_write(struct file *file, const char *buffer,
+				 unsigned long len, void *data)
+{
+    char tmp[20];
+
+    if( bigmem_process_active_count() > 0 ) {
+	printk("[Z] bigmem is in use, so unable to reset treeroute!\n");
+	return 1;
+    }
+
+    printk("[Z] Reset treeroute\n");
+
+    bigmem_nprocs_per_node=1;
+
+    if( len > 20 ) len = 20;
+
+    if(copy_from_user(tmp, buffer,len) == 0 ) {
+	extern void  bluegene_set_Kernel_Config_ProcessConfig(int nprocs); /* defined in arch/powerpc/platforms/44x/bgp_cns.c */
+
+	int active = 0;
+	int job_size;
+	char* p;
+	if( tmp[0] == (char)'1' )  active = 1;
+	p = tmp+2;
+	switch( *p ) {
+	    case '4': bigmem_nprocs_per_node=4; break;
+	    case '2': bigmem_nprocs_per_node=2; break;
+	    default:
+		bigmem_nprocs_per_node=1;
+	}
+
+	bluegene_set_Kernel_Config_ProcessConfig(bigmem_nprocs_per_node);
+
+	p = tmp+4;
+	job_size = simple_strtol(p,&p,0);
+	setup_treeroute(active,job_size);
+
+	zepto_debug(2, "setup_treeroute: active=%d nprocs=%d job_size=%d\n",active, bigmem_nprocs_per_node, job_size);
+    } else {
+	return -EFAULT;
+    }
+
+    init_bigmem_pa();
+    
+    return len;
+}
+
+static int __init  zeptorc_init(void)
+{
+    struct proc_dir_entry *p_setup_treeroute;
+
+    bigmem_nprocs_per_node=1; /* SMP is default */
+    init_bigmem_pa();
+
+    p_setup_treeroute = create_proc_entry("setup_treeroute", S_IFREG|S_IRUGO|S_IWUGO, NULL );
+    if( p_setup_treeroute ) {
+	p_setup_treeroute->nlink = 1;
+	p_setup_treeroute->write_proc = setup_treeroute_write; 
+    } else {
+	panic("Failed to register /proc/setup_treeroute\n");
+    }
+    return 0;
+}
+
+#else   /* for ION */
+
+static int __init  zeptorc_init(void)
+{
+    bigmem_nprocs_per_node=1; /* SMP is default */
+    init_bigmem_pa();
+    return 0;
+}
+
+#endif
+
+__initcall(zeptorc_init);
diff --git a/arch/powerpc/syslib/bgdd/zepto_task.c b/arch/powerpc/syslib/bgdd/zepto_task.c
new file mode 100644
index 0000000..a4831bd
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/zepto_task.c
@@ -0,0 +1,262 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+/*
+  Support codes for zepto task
+*/
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/proc_fs.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/uaccess.h>
+
+#include <linux/zepto_task.h>
+
+int  enable_bigmem;
+
+static int bigmem_proc_read(char *page, char **start, off_t off,
+				    int count, int *eof, void *data)
+{
+    int rc;
+    rc = snprintf(page, count, "%d", enable_bigmem);
+    *eof = 1;
+    return (rc >= 0 ? rc : 0);
+}
+
+static int bigmem_proc_write(struct file * filp, const char __user *buf,
+					  unsigned long len, void * data)
+{
+    if(buf[0]=='0') {
+	enable_bigmem = 0;
+    } else {
+	enable_bigmem = 1;
+    }
+    zepto_debug(2,"set enable_bigmem %d\n",
+	   enable_bigmem);
+    return len;
+}
+
+
+#ifdef CONFIG_ZEPTO_EXPERIMENTAL
+#include <linux/random.h>
+
+static int rtest_proc_read(char *page, char **start, off_t off,
+				    int count, int *eof, void *data)
+{
+    int rc;
+    rc = snprintf(page, count, "%d\n", random32());
+    *eof = 1;
+    return (rc >= 0 ? rc : 0);
+}
+
+static int rtest_proc_write(struct file * filp, const char __user *buf,
+					  unsigned long len, void * data)
+{
+    u32 entropy;
+    entropy = simple_strtoul(buf,NULL,0);
+    srandom32(entropy);
+    zepto_debug(1,"rtest: entropy=%d\n", entropy);
+    return len;
+}
+#endif
+
+
+static int bigmem_reset_proc_write(struct file *file, const char *buffer,
+			 unsigned long len, void *data)
+{
+    char tmp[2];
+    int rc;
+
+    if(copy_from_user(tmp, buffer, 1) == 0 ) {
+	tmp[1] = 0;
+	zepto_debug(1,"bigmem_reset_proc_write %s",tmp);
+	rc = len;
+    } else {
+	rc = -EFAULT;
+    }
+
+    if( tmp[0] == '1' ) {
+	if( enable_bigmem ) {
+	    bigmem_process_reset();
+	    if( bigmem_mmap_finalize() !=BIGMEM_MMAP_SUCCESS ) {
+		printk( KERN_ALERT  "[Z] bigmem_mmap_finalize() failed.\n");
+	    }
+	    free_bigmem_tlb();
+	}
+	zepto_debug(1, "bigmem is hard-reset.\n");
+    }
+
+    return rc;
+}
+
+
+#ifdef CONFIG_ZEPTO_EXPERIMENTAL
+static int bgprint_write(struct file *file, const char *buffer,
+			 unsigned long len, void *data)
+{
+    char *tmp;
+    int rc;
+
+    tmp = kzalloc(len+1,GFP_KERNEL);
+    if( !tmp ) return 0;
+
+    if(copy_from_user(tmp, buffer, len) == 0 ) {
+	tmp[len] = 0;
+	zepto_debug(1,"%s",tmp);
+	rc = len;
+    } else {
+	rc = -EFAULT;
+    }
+
+    kfree(tmp);
+    return rc;
+}
+#endif
+
+static int zeptonext_proc_read(char *page, char **start, off_t off,
+				    int count, int *eof, void *data)
+{
+    int rc;
+    rc = snprintf(page, count, "zepto next codes are compiled %s\n", __DATE__);
+    *eof = 1;
+    return (rc >= 0 ? rc : 0);
+}
+
+static int __init  zepto_task_init(void)
+{
+    struct proc_dir_entry *p;
+
+    enable_bigmem = 1; /* bigmem is enabled by default */
+
+    p = create_proc_entry("bigmem_ctrl", S_IRUGO|S_IWUGO, NULL);
+    if(p) {
+	p->nlink = 1;
+	p->read_proc  = bigmem_proc_read;
+	p->write_proc = bigmem_proc_write;
+    }
+    zepto_debug(2,"/proc/bigmem_ctrl is registered\n");
+
+    /* bigmem hard reset interface */
+    p = create_proc_entry("bigmem_reset", S_IFREG|S_IRUGO|S_IWUGO, NULL );
+    if(p ) {
+	p->nlink = 1;
+	p->write_proc = bigmem_reset_proc_write; 
+    } else {
+	printk(KERN_WARNING "Failed to register /proc/bigmem_reset\n");
+    }
+    zepto_debug(2,"/proc/bigmem_reset is registered\n");
+
+    p = create_proc_entry("zeptonext", S_IFREG|S_IRUGO|S_IWUGO, NULL );
+    if(p ) {
+	p->nlink = 1;
+	p->read_proc = zeptonext_proc_read; 
+    } else {
+	printk(KERN_WARNING "Failed to register /proc/zeptonext\n");
+    }
+    zepto_debug(2,"/proc/zeptonext is registered\n");
+
+
+#ifdef CONFIG_ZEPTO_EXPERIMENTAL
+    p = create_proc_entry("rtest", S_IRUGO|S_IWUGO, NULL);
+    if(p) {
+	p->nlink = 1;
+	p->read_proc  = rtest_proc_read;
+	p->write_proc = rtest_proc_write;
+    }
+    zepto_debug(2,"/proc/rtest is registered\n");
+
+    p = create_proc_entry("bgprint", S_IFREG|S_IRUGO|S_IWUGO, NULL );
+    if(p ) {
+	p->nlink = 1;
+	p->write_proc = bgprint_write; 
+    } else {
+	printk(KERN_WARNING "Failed to register /proc/bgprint\n");
+    }
+    zepto_debug(2,"/proc/bgprint is registered\n");
+#endif
+
+    return 0;
+}
+__initcall(zepto_task_init);
+
+int zepto_task_error(const char* fmt,...)
+{
+    extern int bgWriteConsoleBlockDirect(const char* fmt,...); /* ./drivers/char/bluegene_console.c */
+    int rc;
+    va_list  args;
+
+    va_start(args,fmt);
+    rc = bgWriteConsoleBlockDirect(fmt,args);
+    va_end(args);
+    return rc;
+}
+
+
+#ifdef CONFIG_ZEPTO_NOTUSED
+
+/* code fragments for tick disabling */
+
+static inline unsigned long long get_tb(void)
+{
+    unsigned long tbu0, tbu1, tbl;
+    do {
+	tbu0 = get_tbu();
+	tbl  = get_tbl();
+	tbu1 = get_tbu();
+    } while (tbu0 != tbu1);
+    
+    return ((unsigned long long) tbu0 << 32) | tbl;
+}
+
+static inline void disable_decrementer(void)
+{
+    unsigned int tcr = mfspr(SPRN_TCR);
+    mtspr(SPRN_TCR, tcr & (~TCR_DIE));
+}
+
+static inline void enable_decrementer(void)
+{
+    unsigned int tcr = mfspr(SPRN_TCR);
+    mtspr(SPRN_TCR, tcr | TCR_DIE);
+}
+
+
+void zc_enter_computation(void)
+{
+    disable_decrementer();
+}
+
+void zc_exit_computation(void)
+{
+    /*unsigned int r1;
+      __asm__ __volatile__ ("mr %0,1" : "=r"(r1));
+      printk(KERN_CRIT "running zc_exit_computation, r1=%u\n", r1);*/
+    enable_decrementer();
+}
+
+#endif //  CONFIG_ZEPTO_NOTUSED
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5..6585ac4 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -111,6 +111,8 @@
 obj-$(CONFIG_JS_RTC)		+= js-rtc.o
 js-rtc-y = rtc.o
 
+obj-$(CONFIG_BGP)		+= bluegene_console.o bluegene_networks.o
+
 # Files generated that shall be removed upon make clean
 clean-files := consolemap_deftbl.c defkeymap.c
 
diff --git a/drivers/char/bluegene_console.c b/drivers/char/bluegene_console.c
new file mode 100644
index 0000000..0af84ec
--- /dev/null
+++ b/drivers/char/bluegene_console.c
@@ -0,0 +1,915 @@
+/*
+ * Blue Gene Console over JTAG.
+ *
+ * (C) Copyright IBM Corp. 2003,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Todd Inglett <tinglett@vnet.ibm.com>
+ *
+ *
+ */
+
+#include <linux/unistd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/major.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/kbd_kern.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+#include <linux/syscalls.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+
+#include <asm/bluegene.h>
+#include <asm/bluegene_ras.h>
+
+static struct proc_dir_entry *proc_ras;         /* /proc/ras */
+static struct proc_dir_entry *proc_ras_ascii;   /* /proc/ras_ascii */
+
+
+/* ToDo: figure out what to do with bgprintf... */
+#define bgprintf udbg_printf
+#include <asm/udbg.h>
+
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+/* console message output control */
+
+#include <linux/zepto_debug.h>
+#include <asm/bgp_personality.h>
+static int enable_console;
+#endif
+
+
+#define BLUEGENE_MAJOR	229
+#define BLUEGENE_MINOR	0
+
+
+typedef struct _BG_CONSOLE {
+	struct tty_struct*	tty;
+	spinlock_t		ttyLock;
+	struct tty_driver*	ttyDriver;
+#define BG_OUTBOX_BUFF_SIZE 8192
+	unsigned char		outboxBuff[BG_OUTBOX_BUFF_SIZE];
+	spinlock_t		outboxLock;
+#define BG_RAS_MAGIC_CHAR	((unsigned char) 0xff)
+#define BG_RAS_TYPE_BINARY	((unsigned char) 0x82)
+#define BG_RAS_TYPE_ASCII	((unsigned char) 0x88)
+#define BG_OUTBOX_MSG_SIZE 248
+	unsigned int 		outboxHead;
+	unsigned int		outboxTail;
+	unsigned int		outboxMsgAge;
+#define BG_OUTBOX_MAX_AGE 36
+	unsigned int		outboxRetry;
+#define BG_OUTBOX_MAX_RETRY 2
+	int			outboxMsgSent;
+	struct task_struct*	kmboxdTask;
+	/* Wait queue to wakeup kmboxd.  For now it runs strictly on timeout (polling),
+	 * but in the future an interrupt or other means could wake it.
+	 */
+	wait_queue_head_t	wait;
+} BG_CONSOLE;
+
+
+static BG_CONSOLE bgc = {
+	.tty = NULL,
+	.ttyLock = SPIN_LOCK_UNLOCKED,
+	.ttyDriver = NULL,
+	.outboxLock = SPIN_LOCK_UNLOCKED,
+	.outboxHead = 0,
+	.outboxTail = 0,
+	.outboxMsgAge = 0,
+	.outboxRetry = 0,
+	.outboxMsgSent = 0,
+	.kmboxdTask = NULL,
+};
+
+
+#define BG_OUTBOX_HEAD_INCREMENT(i) bgc.outboxHead = (bgc.outboxHead + (i)) % BG_OUTBOX_BUFF_SIZE
+#define BG_OUTBOX_TAIL_INCREMENT(i) bgc.outboxTail = (bgc.outboxTail + (i)) % BG_OUTBOX_BUFF_SIZE
+
+
+/*  How many bytes of outbox buffer space are in use.  The caller must be */
+/*  holding the outbox lock. */
+static inline int __bgOutboxBufferUsed(void)
+{
+        int rc = 0;
+
+        if (bgc.outboxHead <= bgc.outboxTail)
+                rc = bgc.outboxTail - bgc.outboxHead;
+        else
+                rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxHead + bgc.outboxTail;
+
+        return rc;
+}
+
+
+/*  How many bytes of buffer space are in use. */
+static inline int bgOutboxBufferUsed(struct tty_struct* tty)
+{
+        unsigned long flags;
+        int rc;
+
+        spin_lock_irqsave(&bgc.outboxLock, flags);
+        rc = __bgOutboxBufferUsed();
+        spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+        return rc;
+}
+
+
+/*  How many bytes of outbox buffer space are unused.  The caller must be */
+/*  holding the outbox lock. */
+static inline int __bgOutboxBufferFree(void)
+{
+        int rc;
+
+        if (bgc.outboxHead > bgc.outboxTail)
+                rc = bgc.outboxHead - bgc.outboxTail;
+        else
+                rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxTail + bgc.outboxHead;
+
+        return rc;
+}
+
+
+/*  How many bytes of buffer space are free. */
+static inline int bgOutboxBufferFree(struct tty_struct* tty)
+{
+        int rc;
+        unsigned long flags;
+
+        spin_lock_irqsave(&bgc.outboxLock, flags);
+        rc = __bgOutboxBufferFree();
+        spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+        return rc;
+}
+
+
+/*  Append the specified data to the outbox buffer. */
+static inline int __bgOutboxBufferAppend(unsigned char* data,
+                                        unsigned int dataLen)
+{
+        int i = 0;
+
+        while ((!dataLen && data[i]) || i < dataLen) {
+                bgc.outboxBuff[bgc.outboxTail] = data[i++];
+                if ((bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxHead)
+                        bgc.outboxTail = (bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE;
+                else
+                        break;
+        }
+
+        return i;
+}
+
+
+/*  Remove the specified number of bytes from the outbox buffer. */
+static inline int __bgOutboxBufferRemove(unsigned char* data,
+                                         unsigned int dataLen)
+{
+        int i = 0;
+
+        while (bgc.outboxHead != bgc.outboxTail && i < dataLen) {
+                data[i++] = bgc.outboxBuff[bgc.outboxHead];
+                bgc.outboxHead = (bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE;
+        }
+
+        return i;
+}
+
+
+/*  Search for the end of the line, starting at the specified index for the specified maximum length. */
+/*  The end of a line is defined by the presence of a newline character or the RAS magic character or */
+/*  the end of the buffer.  The number of bytes in the line are returned and 'index' is set to the */
+/*  buffer index of the last character in the line.  If no line can be found zero is returned and */
+/*  'index' is set to the buffer index of the last character examined.  The caller must ensure that */
+/*  the outbox is locked. */
+inline static int __bgOutboxBuffFindEOL(unsigned int* index, unsigned int maxLen) {
+        int rc;
+        int i = *index;
+        int limit;
+	int foundRAS = 0;
+
+	 /*  Determine the limit of the search. */
+	limit = (*index + maxLen - 1 < BG_OUTBOX_BUFF_SIZE - 1 ? *index + maxLen - 1 : BG_OUTBOX_BUFF_SIZE - 1);
+	if (bgc.outboxTail > *index && limit > bgc.outboxTail -1)
+		limit = bgc.outboxTail - 1;
+
+         /*  Search for a newline. */
+        while (i < limit && bgc.outboxBuff[i] != '\n') {
+		if (bgc.outboxBuff[i] == BG_RAS_MAGIC_CHAR) {
+			unsigned char nextChar = bgc.outboxBuff[(i+1) % BG_OUTBOX_BUFF_SIZE];
+
+			if ((nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) &&
+			    (i+1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxTail) {
+				foundRAS = 1;
+				break;
+			}
+		}
+		i++;
+	}
+	if (bgc.outboxBuff[i] == '\n') {
+		 /*  Found the end of a line. */
+		rc = i - *index + 1;
+		*index = i;
+	} else if (foundRAS) {
+		 /*  Ran into a RAS message so end the line. */
+		rc = i - *index;;
+		*index = i - 1;
+	} else {
+		 /*  Reached the search limit. */
+                rc = 0;
+                *index = i;
+        }
+
+        return rc;
+}
+
+
+/*  Send any buffered messages so long as the outbox is ready.  This function assumes that the caller is */
+/*  holding the outbox buffer lock. */
+int __bgFlushOutboxMsgs(void)
+{
+        int rc = 0;
+
+         /*  Send buffered outbox messages as long as there is something to send and the mailbox is ready. */
+        while (bgc.outboxHead != bgc.outboxTail && !bluegene_testForOutboxCompletion()) {
+		unsigned char nextChar = bgc.outboxBuff[(bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE];
+
+                 /*  We have a message to send.  Is it RAS or a console message? */
+                if (bgc.outboxBuff[bgc.outboxHead] == BG_RAS_MAGIC_CHAR &&
+		    (nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) &&
+		    (__bgOutboxBufferUsed() >= sizeof(bg_ras) + 2)) {
+                         /*  Send a RAS message to the outbox. */
+                        bg_ras ras;
+                        int rc;
+
+                         /*  Copy the RAS information out of the buffer into a form we can easily deal with. */
+                        BG_OUTBOX_HEAD_INCREMENT(2);
+                        rc = __bgOutboxBufferRemove((unsigned char*) &ras, sizeof(ras));
+
+                         /*  Send the RAS. */
+                        do {
+                                if (nextChar == BG_RAS_TYPE_BINARY) {
+                                         /*  Send binary RAS to the outox. */
+                                        bgc.outboxMsgSent = !bluegene_writeRASEvent_nonBlocking(ras.comp, ras.subcomp, ras.code,
+                                                                      ras.length / sizeof(int), (int*) ras.data);
+                                } else if (nextChar == BG_RAS_TYPE_ASCII) {
+                                         /*  Send ASCII RAS. */
+                                        int sent = bluegene_writeRASString_nonBlocking(ras.comp, ras.subcomp, ras.code, ras.data);
+
+                                        bgc.outboxMsgSent = (sent == 0 || sent == -2);
+                                } else {
+                                        bgprintf("Unknown RAS msg type %d\n", nextChar);
+					break;
+				}
+                        } while (!bgc.outboxMsgSent && bgc.outboxRetry++ < BG_OUTBOX_MAX_RETRY);
+                        if (!bgc.outboxMsgSent) {
+                                bgprintf("Unable to send RAS (0x%02x 0x%02x 0x%02x\n", ras.comp, ras.subcomp, ras.code);
+                                rc = -EIO;
+                        }
+                        bgc.outboxRetry = 0;
+                } else {
+			 /*  Send console messages. */
+                        unsigned int EOL = bgc.outboxHead;
+                        unsigned int msgLen = 0;
+			unsigned int len;
+
+                         /*  Group lines into an outbox-sized block of lines. */
+			while (EOL != bgc.outboxTail && msgLen < BG_OUTBOX_MSG_SIZE &&
+				(len = __bgOutboxBuffFindEOL(&EOL, BG_OUTBOX_MSG_SIZE - msgLen)) > 0) {
+				 /*  Found another line.  Append it to the outbox message. */
+				EOL = (EOL+1) % BG_OUTBOX_BUFF_SIZE;
+				msgLen += len;
+			}
+
+                         /*  Determine if there are complete lines to print or if we should print a partial line. */
+                        if (!msgLen) {
+				unsigned int bytesAvailable = EOL - bgc.outboxHead + 1;
+
+                                if (bytesAvailable == BG_OUTBOX_MSG_SIZE || bgc.outboxMsgAge++ >= BG_OUTBOX_MAX_AGE) {
+                                         /*  Either we have a full outbox message or output is too old.  Send it now. */
+                                        msgLen = bytesAvailable;
+                                } else {
+                                        rc = -EAGAIN;  // wait for more output
+                                        break;
+                                }
+                        }
+
+			 /*  Send any outbox message data. */
+                        if (msgLen) {
+                                bgc.outboxMsgSent = !bluegene_writeToMailboxConsole_nonBlocking(bgc.outboxBuff+bgc.outboxHead, msgLen);
+                                if (bgc.outboxMsgSent || bgc.outboxRetry++ > BG_OUTBOX_MAX_RETRY) {
+                                       	BG_OUTBOX_HEAD_INCREMENT(msgLen);
+                                        bgc.outboxMsgAge = bgc.outboxRetry = 0;
+                                        rc = (bgc.outboxMsgSent ? rc + 1 : -EIO);
+                                } else {
+                                        rc = -EAGAIN;
+                                }
+                        }
+                }
+        }
+
+         /*  If a message was sent (now or during a past call) then check to see if the message has been */
+         /*  taken so that we lower outbox attention ASAP. */
+        if (bgc.outboxMsgSent && !bluegene_testForOutboxCompletion())
+                bgc.outboxMsgSent = 0;
+
+         /*  If there is something to send but the outbox wasn't ready then return -EWOULDBLOCK. */
+        if (!rc && bgc.outboxHead != bgc.outboxTail)
+                rc = -EWOULDBLOCK;
+
+        return rc;
+}
+
+
+/*  Send any buffered messages so long as the outbox is ready.  This function locks the outbox before accessing it. */
+inline int bgFlushOutboxMsgs(void)
+{
+        int rc;
+        unsigned long flags;
+
+        spin_lock_irqsave(&bgc.outboxLock, flags);
+        rc = __bgFlushOutboxMsgs();
+        spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+        return rc;
+}
+
+
+/*  Add a console message to the outbox buffer. */
+int bgWriteConsoleMsg(struct tty_struct* tty,
+		      const unsigned char* msg,
+                      int msgLen)
+{
+        int rc = 0;
+
+	if (msgLen > 0) {
+		unsigned long flags;
+
+	         /*  Lock the outbox. */
+        	spin_lock_irqsave(&bgc.outboxLock, flags);
+
+	         /*  Copy the message to the buffer, wrapping around if necessary. */
+        	rc = __bgOutboxBufferAppend((char*) msg, (unsigned int) msgLen);
+
+	         /*  Unlock outbox. */
+        	spin_unlock_irqrestore(&bgc.outboxLock, flags);
+	}
+
+        return rc;
+}
+
+static DEFINE_MUTEX(bgWriteConsoleBlockDirect_mutex);
+
+/* Write a console msg in block mode. This function can be called from other kernel code. */
+int bgWriteConsoleBlockDirect(const char* fmt,...)
+{
+    int rc = 0;
+    va_list  args;
+    static char buf[256];
+    int len;
+
+    mutex_lock(&bgWriteConsoleBlockDirect_mutex);
+    va_start(args,fmt);
+    vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+    len = strlen(buf);
+
+    if(len>0) bgWriteConsoleMsg(bgc.tty, buf,len);
+    mutex_unlock(&bgWriteConsoleBlockDirect_mutex);
+
+    return rc;
+}
+
+
+/*  Add a binary RAS event to the outbox buffer.  If the buffer is full this function flushes */
+/*  outbox messages to free buffer space. */
+int bgWriteRasEvent(unsigned int component,
+                    unsigned int subcomponent,
+                    unsigned int errCode,
+                    unsigned int data[],
+                    unsigned int dataLen)
+{
+        int rc = 1;
+        unsigned long flags;
+        bg_ras ras;
+
+         /*  Lock the outbox buffer. */
+        spin_lock_irqsave(&bgc.outboxLock, flags);
+
+         /*  If insufficient buffer space exists then flush outbox messages until we free enough space. */
+        while (__bgOutboxBufferFree() < sizeof(ras) + 2)
+                __bgFlushOutboxMsgs();
+
+         /*  Initialize the RAS structure. */
+        ras.comp = component;
+        ras.subcomp = subcomponent;
+        ras.code = errCode;
+        ras.length = (dataLen <= sizeof(ras.data) ? dataLen : sizeof(ras.data));
+        memcpy(ras.data, (char*) data, ras.length);
+
+         /*  Copy the RAS information to the outbox buffer. */
+        bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR;
+        BG_OUTBOX_TAIL_INCREMENT(1);
+	bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_BINARY;
+	BG_OUTBOX_TAIL_INCREMENT(1);
+        rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras));
+
+         /*  Unlock the outbox buffer. */
+        spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+        return rc;
+}
+
+
+/*  Add an ASCII RAS event to the outbox buffer.  If the buffer is full this function flushes */
+/*  outbox messages to free buffer space. */
+int bgWriteRasStr(unsigned int component,
+                  unsigned int subcomponent,
+                  unsigned int errCode,
+                  char*        str,
+                  unsigned int strLen)
+{
+        int rc = 1;
+        unsigned long flags;
+        bg_ras ras;
+
+         /*  Lock the outbox buffer. */
+        spin_lock_irqsave(&bgc.outboxLock, flags);
+
+         /*  If insufficient buffer space exists then flush outbox messages until we free enough space. */
+        while (__bgOutboxBufferFree() < sizeof(ras) + 2)
+                __bgFlushOutboxMsgs();
+
+         /*  Initialize the RAS structure. */
+        ras.comp = component;
+        ras.subcomp = subcomponent;
+        ras.code = errCode;
+        if (!strLen || strLen > sizeof(ras.data))
+                strLen = sizeof(ras.data)-1;
+        for (ras.length = 0; *str && ras.length < strLen; str++, ras.length++)
+                ras.data[ras.length] = *str;
+	ras.data[ras.length] = '\0';
+
+         /*  Copy the RAS information to the outbox buffer. */
+        bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR;
+        BG_OUTBOX_TAIL_INCREMENT(1);
+        bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_ASCII;
+        BG_OUTBOX_TAIL_INCREMENT(1);
+        rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras));
+
+         /*  Unlock the outbox buffer. */
+        spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+        return rc;
+}
+
+
+static int bluegenecons_open(struct tty_struct *tty, struct file * filp)
+{
+	if (tty->count == 1) {
+		bgc.tty = tty;
+		tty->driver_data = &bgc;
+	}
+
+	return 0;
+}
+
+static void bluegenecons_close(struct tty_struct *tty, struct file * filp)
+{
+	if (tty && tty->count == 1) {
+		bgc.tty = NULL;
+	}
+
+	return;
+}
+
+
+#define BLUEGENECONS_MAGIC_SYSRQ_KEY (15)       /* ^O */
+
+static void bluegenecons_rcv(char *msg, int msglen)
+{
+	struct tty_struct *tty;
+	unsigned long flags;
+	static int sysrq_mode;
+
+	spin_lock_irqsave(&bgc.ttyLock, flags);
+	tty = bgc.tty;
+	if (tty) {
+		while (msglen) {
+			int i;
+			int count = tty_buffer_request_room(tty, msglen);
+
+			for (i = 0; i < count; i++) {
+				if (sysrq_mode) {
+					handle_sysrq(msg[i], tty);
+					sysrq_mode = 0;
+				} else if (msg[i] == BLUEGENECONS_MAGIC_SYSRQ_KEY)
+					sysrq_mode = 1;
+				else
+					tty_insert_flip_char(tty, msg[i], 0);
+			}
+			msglen -= count;
+			msg += count;
+			tty_flip_buffer_push(tty);
+		}
+	}
+	spin_unlock_irqrestore(&bgc.ttyLock, flags);
+
+	return;
+}
+
+
+/*
+ * Mailbox polling kernel thread.
+ *
+ * This thread wakes up at intervals to check for inbound mailbox messages
+ * and it will send waiting outbound messages if the outbound box is free.
+ */
+int kmboxd(void *arg)
+{
+	__set_current_state(TASK_RUNNING);
+	do {
+		int rc;
+
+		 /*  If there is anything in the inbox read it now. */
+		if (bluegene_testInboxAttention()) {
+			static char buffer[512];
+			int len;
+
+			/* Fetch any input */
+			len = bluegene_readFromMailboxConsole(buffer, sizeof(buffer));
+			if (len > 0)
+				bluegenecons_rcv(buffer, len);
+		}
+
+		 /*  Flush any console output that is buffered. */
+		rc = bgFlushOutboxMsgs();
+
+		 /*  If outbox buffer data was written then wake any TTY writer */
+		 /*  that is waiting. */
+                if (rc > 0 && bgc.tty) {
+                        if ((bgc.tty->flags & (1 << TTY_DO_WRITE_WAKEUP))
+                            && bgc.tty->ldisc.ops->write_wakeup)
+                                (bgc.tty->ldisc.ops->write_wakeup)(bgc.tty);
+                 	wake_up_interruptible(&bgc.tty->write_wait);
+                }
+
+		wait_event_interruptible_timeout(bgc.wait, 0, msecs_to_jiffies(10));
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MAGIC_SYSRQ
+
+extern void ctrl_alt_del(void);
+
+static int bluegene_do_sysrq(void* data)
+{
+	int key = (int) data;
+	static char* env[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+                               "LD_LIBRARY_PATH=/lib:/usr/lib", NULL };
+
+	switch(key) {
+		case 'h' :
+		{
+			static char* argv[] = { "/etc/rc.shutdown", NULL };
+
+			kernel_execve(argv[0], argv, env);
+			printk(KERN_EMERG "Failure halting I/O node.  Attempting secondary method.\n");
+			ctrl_alt_del();
+			break;
+		}
+
+		case 'x' :
+		{
+			static char* argv[] = { "/etc/rc.reboot", NULL };
+
+			kernel_execve(argv[0], argv, env);
+			printk(KERN_EMERG "Failure rebooting I/O node.\n");
+			break;
+		}
+
+		default :
+			printk(KERN_EMERG "Unknown sysrq '%c'\n", key);
+	}
+
+ 	return 0;
+}
+
+
+static void bluegene_handle_sysrq(int key, struct tty_struct *tty)
+{
+	struct task_struct* t = kthread_run(bluegene_do_sysrq, (void*) key, "Process System Request");
+
+	if (IS_ERR(t)) {
+		printk(KERN_EMERG "Failure creating sysrq '%c' thread.\n", (char) key);
+		bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_sysrq_thread_create_failure,
+				"Failure creating sysrq thread.", 0);
+		if (key == 'h')
+			ctrl_alt_del();
+	}
+
+        return;
+}
+
+static struct sysrq_key_op bg_sysrq_halt_op = {
+        .handler =        bluegene_handle_sysrq,
+        .help_msg =       "Halt",
+        .action_msg =     "Halt node"
+};
+
+static struct sysrq_key_op bg_sysrq_reboot_op = {
+        .handler =        bluegene_handle_sysrq,
+        .help_msg =       "Reboot",
+        .action_msg =     "Reboot node"
+};
+#endif
+
+
+static struct tty_operations bgcons_ops = {
+	.open = bluegenecons_open,
+	.close = bluegenecons_close,
+	.write = bgWriteConsoleMsg,
+	.write_room = bgOutboxBufferFree,
+	.chars_in_buffer = bgOutboxBufferUsed,
+};
+
+
+/* Read interface not defined so we just return EOF */
+static int bluegene_rasevent_read(char *page, char **start, off_t off,
+                             int count, int *eof, void *data)
+{
+        return 0;
+}
+
+
+/* Write the event.  The user provides the payload...we provide the rest.
+ */
+static int bluegene_rasevent_write(struct file *file, const char *buffer,
+                           unsigned long len, void *data)
+{
+        bg_ras ras;
+
+         /*  Truncate the message if it is too large. */
+        if (len > sizeof(ras))
+                len = sizeof(ras);
+        else if (len < ((unsigned long) &ras.data - (unsigned long) &ras))
+                return -EIO;
+
+        if (copy_from_user(&ras, buffer, len))
+                return -EFAULT;
+        else {
+                if (!data)
+                        bgWriteRasEvent(ras.comp, ras.subcomp, ras.code,
+                                        (unsigned int*) ras.data, ras.length);
+                else {
+                         /*  ASCII detail data was written. */
+                        if (!ras.length)
+                                ras.data[0] = '\0';
+                        bgWriteRasStr(ras.comp, ras.subcomp, ras.code,
+                                        ras.data, ras.length);
+                }
+        }
+
+        return len;
+}
+
+
+static inline char* entryName(char* path)
+{
+        char* lastSlash = NULL;
+
+        while (*path) {
+                if (*path == '/')
+                        lastSlash = path + 1;
+                path++;
+        }
+
+        return lastSlash;
+}
+
+
+static int __init bluegenecons_init(void)
+{
+
+	bgc.ttyDriver = alloc_tty_driver(1);
+	if (!bgc.ttyDriver) {
+		char* msg = "Failure allocating BlueGene console driver.";
+
+		bgprintf(msg);
+		bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_alloc_failure, msg);
+		return -EIO;
+	}
+
+	bgc.ttyDriver->owner = THIS_MODULE;
+	bgc.ttyDriver->name = "bgcons";
+	bgc.ttyDriver->name_base = 1;
+	bgc.ttyDriver->major = BLUEGENE_MAJOR;
+	bgc.ttyDriver->minor_start = BLUEGENE_MINOR;
+	bgc.ttyDriver->type = TTY_DRIVER_TYPE_SYSTEM;
+	bgc.ttyDriver->init_termios = tty_std_termios;
+	bgc.ttyDriver->flags = TTY_DRIVER_REAL_RAW;
+	tty_set_operations(bgc.ttyDriver, &bgcons_ops);
+
+	if (tty_register_driver(bgc.ttyDriver)) {
+		char* msg = "Failure registering BlueGene console driver";
+
+		bgprintf(msg);
+		bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_reg_failure, msg);
+		return -EIO;
+	}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+        /* Sysrq h is sent by the control system to halt an ION during free_block */
+        register_sysrq_key('h', &bg_sysrq_halt_op);
+
+        /* Sysrq x is sent by the control system when ION reboot is requested. */
+        register_sysrq_key('x', &bg_sysrq_reboot_op);
+#endif
+
+	/* Kick off the kernel mailbox poll thread. */
+	init_waitqueue_head(&bgc.wait);
+	bgc.kmboxdTask = kthread_run(kmboxd, NULL, "kmboxd");
+	if (IS_ERR(bgc.kmboxdTask)) {
+		char* msg = "Failure creating mailbox processing thread.";
+
+		bgprintf(msg);
+		bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_mbox_thread_create_failure, msg);
+		put_tty_driver(bgc.ttyDriver);
+		return -EIO;
+	}
+
+	 /*  Create /proc RAS interfaces. */
+	proc_ras = create_proc_entry(entryName(BG_RAS_FILE), S_IFREG | S_IRWXUGO, NULL);
+	if (proc_ras) {
+		proc_ras->nlink = 1;
+		proc_ras->read_proc = (void*) bluegene_rasevent_read;
+		proc_ras->write_proc = (void*) bluegene_rasevent_write;
+		proc_ras->data = (void*) 0; // not ASCII message
+	}
+	proc_ras_ascii = create_proc_entry(entryName(BG_RAS_ASCII_FILE), S_IFREG | S_IRWXUGO, NULL);
+	if (proc_ras_ascii) {
+		proc_ras_ascii->nlink = 1;
+		proc_ras_ascii->read_proc = (void*) bluegene_rasevent_read;
+		proc_ras_ascii->write_proc = (void*) bluegene_rasevent_write;
+		proc_ras_ascii->data = (void*) 1; // is ASCII message
+	}
+
+	return 0;
+}
+
+static void __exit bluegenecons_exit(void)
+{
+	if (proc_ras) {
+		remove_proc_entry(proc_ras->name, NULL);
+		proc_ras = NULL;
+	}
+	if (proc_ras_ascii) {
+		remove_proc_entry(proc_ras_ascii->name, NULL);
+		proc_ras_ascii = NULL;
+	}
+
+	return;
+}
+
+/*
+ * Console write.
+ */
+static void bluegene_console_write(struct console *co, const char *b, unsigned count)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    if( !enable_console ) return; 
+#endif
+	if (count > 0)
+		bgWriteConsoleMsg(bgc.tty, b, count);
+}
+
+
+static struct tty_driver *bluegene_console_device(struct console *c, int *ip)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    if( !enable_console ) return NULL;
+#endif
+
+	*ip = 0;
+	return bgc.ttyDriver;
+}
+
+
+static struct console bgcons = {
+        .name   = "bgcons",
+        .write  = bluegene_console_write,
+        .device = bluegene_console_device,
+        .flags  = CON_PRINTBUFFER,
+        .index  = 0,
+};
+
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+void zepto_enable_console(int i) { 
+    enable_console = i; 
+}
+
+static int  zepto_enable_console_write(struct file *file, const char *buffer,
+				       unsigned long len, void *data)
+{
+    char tmp[2];
+
+    if( len > 2 ) len = 2;
+
+    if(copy_from_user(tmp, buffer,len) == 0 ) {
+	if( tmp[0] == '1' )  zepto_enable_console(1);
+	else                 zepto_enable_console(0);
+    } else {
+	return -EFAULT;
+    }
+
+    return len;
+}
+
+int __init  zepto_enable_console_proc_init(void)
+{
+    struct proc_dir_entry *p_zepto_enable_console;
+    p_zepto_enable_console = create_proc_entry("zepto_enable_console", S_IFREG|S_IRUGO|S_IWUGO, NULL );
+    if( p_zepto_enable_console ) {
+	p_zepto_enable_console->nlink = 1;
+	p_zepto_enable_console->write_proc = zepto_enable_console_write; 
+    } else {
+	printk("Failed to register /proc/zepto_enable_console\n");
+    }
+    return 0;
+}
+__initcall(zepto_enable_console_proc_init);
+
+#endif
+
+int __init bluegene_console_init(void)
+{
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    char* optstr = "zepto_console_output=";
+    int  zepto_console_output = 1; /* 0=disable 1=onenode 2=all */
+
+
+    /* zepto_debug(1, "'%s' '%s'\n",saved_command_line,optstr); */
+    /* FIXME: not sure cmd_line is truncated for some reason.
+            so just using saved_command_line here but not sure
+            this is right solution or not */
+    if(strstr(saved_command_line, optstr) ) {
+        char* p;
+        p = strstr( saved_command_line, optstr );
+        if( p && (strlen(p)-strlen(optstr))>0 ) {
+            p=p+strlen(optstr);
+            zepto_console_output=simple_strtoul(p,&p,0);
+        }
+    }
+
+    enable_console = 0;
+    if( zepto_console_output==1) {
+        BGP_Personality_t bgpers;
+        bluegene_getPersonality(&bgpers, sizeof(bgpers));
+        if( bgpers.Network_Config.Rank == 0 ) enable_console = 1;
+    } else if( zepto_console_output>=2) {
+        enable_console = 1;
+    }
+
+
+/* #else */
+/*     enable_console = 1; */
+#endif
+    register_console(&bgcons);
+
+    return 0;
+}
+
+
+module_init(bluegenecons_init);
+module_exit(bluegenecons_exit);
+console_initcall(bluegene_console_init);
diff --git a/drivers/char/bluegene_networks.c b/drivers/char/bluegene_networks.c
new file mode 100644
index 0000000..20289b1
--- /dev/null
+++ b/drivers/char/bluegene_networks.c
@@ -0,0 +1,201 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+
+static int bgpnet_add_device(int major, int minor, const char* name, unsigned long long base);
+static int bgpnet_device_open(struct inode *inode, struct file *filp);
+static int bgpnet_device_mmap(struct file *filp,  struct vm_area_struct *);
+static int bgpnet_device_release(struct inode *inode, struct file * filp);
+static int bgpnet_device_ioctl(struct inode *inode, struct file * filp,
+                               unsigned int  cmd,   unsigned long arg);
+
+
+#define BGP_COL_MAJOR_NUM  120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM    122
+#define BGP_COL_MINOR_NUMS  2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS   4
+#define _BGP_UA_COL0  (0x6)
+#define _BGP_PA_COL0  (0x10000000)
+#define _BGP_UA_COL1  (0x6)
+#define _BGP_PA_COL1  (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+struct bgpnet_dev
+{
+  int                  major,minor;        /* device major, minor */
+  unsigned long long   physaddr;           /* physical address */
+  struct task_struct* current;            /* process holding device */   
+  int                  signum;             /* signal to send holding process */
+  wait_queue_head_t    read_wq;
+  int                  read_complete;
+  void                 *regs;              /* mapped regs (only used with col) */
+  struct semaphore     sem;                /* interruptible semaphore */ 
+  struct cdev          cdev;               /* container device? */
+};
+
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+static unsigned int bgpnet_num_devices = 0;
+
+
+static struct file_operations bgpnet_device_fops =
+{
+  .owner=   THIS_MODULE,
+  .open=    bgpnet_device_open,
+  .read =   NULL,
+  .write=   NULL,
+  .poll=    NULL, 
+  .ioctl=   bgpnet_device_ioctl,
+  .release= bgpnet_device_release,
+  .mmap=    bgpnet_device_mmap,
+};
+
+
+static int bgpnet_add_device(int major,
+                             int minor,
+                             const char* devname,
+                             unsigned long long physaddr)
+{
+  int ret;
+  dev_t devno;
+  struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices];
+
+  /* initilize struct */
+  init_MUTEX (&dev->sem);
+  dev->major  = major;
+  dev->minor  = minor; 
+  dev->physaddr = physaddr;
+  init_waitqueue_head(&dev->read_wq);
+  dev->read_complete = 0;
+  if (physaddr) {
+          dev->regs = ioremap(physaddr, 4096);
+  }
+  devno=MKDEV(major,minor);
+
+  /* register i.e., /proc/devices */
+  ret=register_chrdev_region(devno,1,(char *)devname);
+
+  if (ret) {
+	printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) err=%d\n",
+              major,minor,ret);
+	return ret;
+  }
+    
+  /* add cdev */
+  cdev_init(&dev->cdev,&bgpnet_device_fops);
+  dev->cdev.owner=THIS_MODULE;
+  dev->cdev.ops=&bgpnet_device_fops;
+  ret=cdev_add(&dev->cdev,devno,1);
+  if (ret) {
+      printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d), err=%d\n",
+             major,minor,ret);
+      return ret;
+  }
+
+  /* signul to pass to owning process, should be altered using ioctl */
+  dev->signum=-1;
+
+  bgpnet_num_devices++;
+
+  return 0;
+}
+
+
+static int bgpnet_device_open (struct inode *inode, struct file *filp)
+{
+  struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev);
+
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+  up(&dev->sem);
+
+  dev->current=current;
+  filp->private_data = (void*) dev;
+
+  return 0;
+}
+
+
+
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+  unsigned long vsize = vma->vm_end - vma->vm_start;
+  struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data;
+  int ret = -1;
+
+  vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+  vma->vm_flags     |= VM_IO;
+  vma->vm_flags     |= VM_RESERVED;
+
+  if (device->physaddr != 0)
+    ret = remap_pfn_range(vma,
+                          vma->vm_start,
+                          device->physaddr >> PAGE_SHIFT,
+                          vsize,
+                          vma->vm_page_prot);
+
+  if (ret) 
+      printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n",
+                   device->major, device->minor);
+  
+  return ret? -EAGAIN :0;
+}
+
+
+static int bgpnet_device_release (struct inode *inode, struct file * filp)
+{
+  struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data;
+
+  /*Ensure exclusive access*/
+  if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+  dev->current = NULL;
+  up(&dev->sem);
+
+  return 0;
+}
+
+
+static int bgpnet_device_ioctl (struct inode *inode,
+                                struct file * filp,
+                                unsigned int cmd,
+                                unsigned long arg)
+{
+  return 0;
+}
+
+
+static int __init bgpnet_module_init(void)
+{
+	int rc = 0;
+	unsigned long long tr0, tr1, ts0, ts1;
+
+	tr0=((unsigned long long) _BGP_UA_COL0 << 32)  + _BGP_PA_COL0;
+	tr1=((unsigned long long) _BGP_UA_COL1 << 32)  + _BGP_PA_COL1;
+	ts0=((unsigned long long) _BGP_UA_TORUS0 << 32) + _BGP_PA_TORUS0;
+	ts1=((unsigned long long) _BGP_UA_TORUS1 << 32) + _BGP_PA_TORUS1;
+
+/* 	bgpnet_add_device(BGP_COL_MAJOR_NUM,  0,"bgptree_vc0", tr0); */
+/* 	bgpnet_add_device(BGP_COL_MAJOR_NUM,  1, "bgptree_vc1", tr1); */
+	bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0);
+	bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1);
+	return rc;
+}
+
+
+module_init(bgpnet_module_init);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index dd0db67..2559add 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,16 +1,26 @@
-menuconfig INFINIBAND
-	tristate "InfiniBand support"
-	depends on PCI || BROKEN
-	depends on HAS_IOMEM
-	---help---
-	  Core support for InfiniBand (IB).  Make sure to also select
-	  any protocols you wish to use as well as drivers for your
-	  InfiniBand hardware.
+#menuconfig INFINIBAND
+#	tristate "InfiniBand support"
+#	default m
+#	depends on PCI || BROKEN
+#	depends on HAS_IOMEM
+#	---help---
+#	  Core support for InfiniBand (IB).  Make sure to also select
+#	  any protocols you wish to use as well as drivers for your
+#	  InfiniBand hardware.
 
-if INFINIBAND
+#if INFINIBAND
 
+menu "InfiniBand support"
+
+config INFINIBAND
+        tristate "InfiniBand support"
+        ---help---
+          Core support for InfiniBand (IB).  Make sure to also select
+          any protocols you wish to use as well as drivers for your
+          InfiniBand hardware.
 config INFINIBAND_USER_MAD
 	tristate "InfiniBand userspace MAD support"
+	default m
 	depends on INFINIBAND
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
@@ -20,6 +30,7 @@
 
 config INFINIBAND_USER_ACCESS
 	tristate "InfiniBand userspace access (verbs and CM)"
+ default m
 	---help---
 	  Userspace InfiniBand access support.  This enables the
 	  kernel side of userspace verbs and the userspace
@@ -37,16 +48,16 @@
 config INFINIBAND_ADDR_TRANS
 	bool
 	depends on INET
-	depends on !(INFINIBAND = y && IPV6 = m)
 	default y
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
 source "drivers/infiniband/hw/ehca/Kconfig"
 source "drivers/infiniband/hw/amso1100/Kconfig"
-source "drivers/infiniband/hw/cxgb3/Kconfig"
-source "drivers/infiniband/hw/mlx4/Kconfig"
-source "drivers/infiniband/hw/nes/Kconfig"
+# source "drivers/infiniband/hw/cxgb3/Kconfig"
+source "drivers/infiniband/hw/softiwarp/Kconfig"
+
+# source "drivers/infiniband/hw/mlx4/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
@@ -54,4 +65,6 @@
 
 source "drivers/infiniband/ulp/iser/Kconfig"
 
-endif # INFINIBAND
+#endif # INFINIBAND
+endmenu
+
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index ed35e44..69217d7 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -2,10 +2,9 @@
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_SOFTRDMA)	+= hw/softiwarp/
+obj-$(CONFIG_INFINIBAND_SOFTIWARP)	+= hw/softiwarp/
 obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
-obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
-obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
-obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2a2e508..2382662 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1289,15 +1289,20 @@
 	int ret;
 	struct ib_device_attr attr;
 
+//	printk(KERN_INFO "(>) iw_conn_req_handler cm_id=%p iw_event=%p\n",cm_id,iw_event) ;
 	listen_id = cm_id->context;
 	if (cma_disable_callback(listen_id, CMA_LISTEN))
+	  {
+	    printk(KERN_INFO "(<) iw_conn_req_handler ECONNABORTED\n") ;
 		return -ECONNABORTED;
+	  }
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.event_handler,
 				   listen_id->id.context,
 				   RDMA_PS_TCP);
 	if (IS_ERR(new_cm_id)) {
+      printk(KERN_INFO "(E) iw_conn_req_handler rdma_create_id returned %p, ENOMEM\n",new_cm_id) ;
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1307,6 +1312,7 @@
 
 	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
 	if (!dev) {
+    printk(KERN_INFO "(E) iw_conn_req_handler !dev, EADDRNOTAVAIL\n") ;
 		ret = -EADDRNOTAVAIL;
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
@@ -1314,6 +1320,7 @@
 	}
 	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
 	if (ret) {
+    printk(KERN_INFO "(E) iw_conn_req_handler rdma_copy_addr returned %d\n",ret) ;
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
@@ -1323,6 +1330,7 @@
 	ret = cma_acquire_dev(conn_id);
 	mutex_unlock(&lock);
 	if (ret) {
+	  printk(KERN_INFO "(E) iw_conn_req_handler cma_acquire_dev returned %d\n",ret) ;
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
@@ -1339,6 +1347,7 @@
 
 	ret = ib_query_device(conn_id->id.device, &attr);
 	if (ret) {
+	  printk(KERN_INFO "(E) iw_conn_req_handler ib_query_device returned %d\n",ret) ;
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
@@ -1350,8 +1359,10 @@
 	event.param.conn.private_data_len = iw_event->private_data_len;
 	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
 	event.param.conn.responder_resources = attr.max_qp_rd_atom;
+//  printk(KERN_INFO "(I) iw_conn_req_handler event_handler=%p\n",conn_id->id.event_handler) ;
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
 	if (ret) {
+	  printk(KERN_INFO "(E) iw_conn_req_handler event_handler (%p) returned %d\n",conn_id->id.event_handler,ret) ;
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
 		cma_exch(conn_id, CMA_DESTROYING);
@@ -1366,6 +1377,7 @@
 	if (dev)
 		dev_put(dev);
 	mutex_unlock(&listen_id->handler_mutex);
+//  printk(KERN_INFO "(<) iw_conn_req_handler ret=%d\n",ret) ;
 	return ret;
 }
 
@@ -1429,6 +1441,7 @@
 
 	id->context = id_priv->id.context;
 	id->event_handler = id_priv->id.event_handler;
+//	printk(KERN_INFO "(><) cma_listen_handler id_priv->id.event_handler=%p\n", id_priv->id.event_handler) ;
 	return id_priv->id.event_handler(id, event);
 }
 
@@ -1960,11 +1973,18 @@
 	return ret;
 }
 
+#if defined(CONFIG_BLUEGENE)
+static int siw_hack = 1 ;
+#endif
 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
 {
 	struct rdma_bind_list *bind_list;
 	int port, ret, low, high;
 
+#if defined(CONFIG_BLUEGENE)
+	if( siw_hack ) return 0;
+#endif
+
 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
 	if (!bind_list)
 		return -ENOMEM;
@@ -2942,6 +2962,31 @@
 	cma_process_remove(cma_dev);
 	kfree(cma_dev);
 }
+#if defined(CONFIG_BLUEGENE)
+static struct ctl_path bgp_cma_ctl_path[] = {
+  { .procname = "bgp", .ctl_name = 0, },
+  { .procname = "cma", .ctl_name = 0, },
+  { },
+};
+
+static struct ctl_table bgp_cma_ctl_table[] = {
+    {
+            .ctl_name       = CTL_UNNUMBERED,
+            .procname       = "siw_hack" ,
+            .data           = &siw_hack,
+            .maxlen         = sizeof(int),
+            .mode           = 0644,
+            .proc_handler   = &proc_dointvec
+    } ,
+
+    { 0 }
+};
+static void register_cma_sysctl(void)
+  {
+    struct ctl_table_header * sysctl_table_header = register_sysctl_paths(bgp_cma_ctl_path,bgp_cma_ctl_table) ;
+    printk(KERN_INFO "cma_init register_cma_sysctl: sysctl_table_header=%p\n",sysctl_table_header) ;
+  }
+#endif
 
 static int cma_init(void)
 {
@@ -2963,6 +3008,10 @@
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
+
+#if defined(CONFIG_BLUEGENE)
+	register_cma_sysctl() ;
+#endif
 	return 0;
 
 err:
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 4346a24..56fb679 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -48,7 +48,7 @@
 MODULE_LICENSE("Dual BSD/GPL");
 
 enum {
-	UCMA_MAX_BACKLOG	= 128
+	UCMA_MAX_BACKLOG	= 8192
 };
 
 struct ucma_file {
@@ -265,6 +265,7 @@
 	mutex_lock(&ctx->file->mut);
 	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 		if (!ctx->backlog) {
+		    printk(KERN_INFO "(E) ucma_event_handler RDMA_CM_EVENT_CONNECT_REQUEST but no backlog, ENOMEM\n") ;
 			ret = -ENOMEM;
 			kfree(uevent);
 			goto out;
diff --git a/drivers/infiniband/hw/softiwarp/Kconfig b/drivers/infiniband/hw/softiwarp/Kconfig
new file mode 100644
index 0000000..06ddf8f
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_SOFTIWARP
+	tristate "Software iWARP Stack (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
+	---help---
+	Kernel Software Implementation of the iWARP protocol stack
+
+	This driver implements the iWARP protocol stack in software
+	and interfaces with in-kernel TCP/IP as well as the OFED
+	verbs interfaces.
+
+	Please send feedback to <bmt@zurich.ibm.com>.
+
+	To compile this driver as a module, choose M here: the module
+	will be called siw.
+
diff --git a/drivers/infiniband/hw/softiwarp/Makefile b/drivers/infiniband/hw/softiwarp/Makefile
new file mode 100644
index 0000000..6c5b75c
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/Makefile
@@ -0,0 +1,10 @@
+#EXTRA_CFLAGS += -DOFA_VERSION=141 -DCHECK_DMA_CAPABILITIES -DSIW_TX_FULLSEGS
+EXTRA_CFLAGS += -DOFA_VERSION=141 
+ifdef CONFIG_BLUEGENE
+EXTRA_CFLAGS += -DSIW_ON_BGP
+endif
+
+obj-$(CONFIG_INFINIBAND_SOFTIWARP) += siw.o
+
+siw-y :=  siw_main.o siw_cm.o siw_verbs.o siw_obj.o siw_qp.o siw_qp_tx.o siw_qp_rx.o siw_cq.o siw_debug.o siw_ae.o
+
diff --git a/drivers/infiniband/hw/softiwarp/iwarp.h b/drivers/infiniband/hw/softiwarp/iwarp.h
new file mode 100644
index 0000000..762c1d3
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/iwarp.h
@@ -0,0 +1,324 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *          Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _IWARP_H
+#define _IWARP_H
+
+#include <rdma/rdma_user_cm.h>	/* RDMA_MAX_PRIVATE_DATA */
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+
+#define RDMAP_VERSION		1
+#define DDP_VERSION		1
+#define MPA_REVISION_1		1
+#define MPA_MAX_PRIVDATA	RDMA_MAX_PRIVATE_DATA
+#define MPA_KEY_REQ		"MPA ID Req Frame"
+#define MPA_KEY_REP		"MPA ID Rep Frame"
+
+struct mpa_rr_params {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16	res:5,
+		r:1,
+		c:1,
+		m:1,
+		rev:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u16	m:1,
+		c:1,
+		r:1,
+		res:5,
+		rev:8;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+	__u16	pd_len;
+};
+
+/*
+ * MPA request/reply header
+ */
+struct mpa_rr {
+	__u8	key[16];
+	struct mpa_rr_params params;
+};
+
+/*
+ * Don't change the layout/size of this struct!
+ */
+struct mpa_marker {
+	__u16	rsvd;
+	__u16	fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */
+};
+
+#define MPA_MARKER_SPACING	512
+#define MPA_HDR_SIZE		2
+
+/*
+ * MPA marker size:
+ * - Standards-compliant marker insertion: Use sizeof(struct mpa_marker)
+ * - "Invisible markers" for testing sender's marker insertion
+ *   without affecting receiver: Use 0
+ */
+#define MPA_MARKER_SIZE		sizeof(struct mpa_marker)
+
+
+/*
+ * maximum MPA trailer
+ */
+struct mpa_trailer {
+	char	pad[4];
+	__u32	crc;
+};
+
+#define MPA_CRC_SIZE	4
+
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for any FPDU
+ */
+struct iwarp_ctrl {
+	__u16	mpa_len;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16	dv:2,		/* DDP Version */
+		rsvd:4,		/* DDP reserved, MBZ */
+		l:1,		/* DDP Last flag */
+		t:1,		/* DDP Tagged flag */
+		opcode:4,	/* RDMAP opcode */
+		rsv:2,		/* RDMAP reserved, MBZ */
+		rv:2;		/* RDMAP Version, 01 for IETF, 00 for RDMAC */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u16	t:1,		/* DDP Tagged flag */
+		l:1,		/* DDP Last flag */
+		rsvd:4,		/* DDP reserved, MBZ */
+		dv:2,		/* DDP Version */
+		rv:2,		/* RDMAP Version, 01 for IETF, 00 for RDMAC */
+		rsv:2,		/* RDMAP reserved, MBZ */
+		opcode:4;	/* RDMAP opcode */
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+};
+
+
+struct rdmap_terminate_ctrl {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u32	etype:4,
+		layer:4,
+		ecode:8,
+		rsvd1:5,
+		r:1,
+		d:1,
+		m:1,
+		rsvd2:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u32	layer:4,
+		etype:4,
+		ecode:8,
+		m:1,
+		d:1,
+		r:1,
+		rsvd1:5,
+		rsvd2:8;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+};
+
+
+struct iwarp_rdma_write {
+	struct iwarp_ctrl	ctrl;
+	__u32			sink_stag;
+	__u64			sink_to;
+} __attribute__((__packed__));
+
+struct iwarp_rdma_rreq {
+	struct iwarp_ctrl	ctrl;
+	__u32			rsvd;
+	__u32			ddp_qn;
+	__u32			ddp_msn;
+	__u32			ddp_mo;
+	__u32			sink_stag;
+	__u64			sink_to;
+	__u32			read_size;
+	__u32			source_stag;
+	__u64			source_to;
+} __attribute__((__packed__));
+
+struct iwarp_rdma_rresp {
+	struct iwarp_ctrl	ctrl;
+	__u32			sink_stag;
+	__u64			sink_to;
+} __attribute__((__packed__));
+
+struct iwarp_send {
+	struct iwarp_ctrl	ctrl;
+	__u32			rsvd;
+	__u32			ddp_qn;
+	__u32			ddp_msn;
+	__u32			ddp_mo;
+} __attribute__((__packed__));
+
+struct iwarp_send_inv {
+	struct iwarp_ctrl	ctrl;
+	__u32			inval_stag;
+	__u32			ddp_qn;
+	__u32			ddp_msn;
+	__u32			ddp_mo;
+} __attribute__((__packed__));
+
+struct iwarp_terminate {
+	struct iwarp_ctrl	ctrl;
+	__u32				rsvd;
+	__u32				ddp_qn;
+	__u32				ddp_msn;
+	__u32				ddp_mo;
+	struct rdmap_terminate_ctrl	term_ctrl;
+} __attribute__((__packed__));
+
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for an FPDU carrying an untagged DDP segment
+ */
+struct iwarp_ctrl_untagged {
+	struct iwarp_ctrl	ctrl;
+	__u32			rsvd;
+	__u32			ddp_qn;
+	__u32			ddp_msn;
+	__u32			ddp_mo;
+} __attribute__((__packed__));
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for an FPDU carrying a tagged DDP segment
+ */
+struct iwarp_ctrl_tagged {
+	struct iwarp_ctrl	ctrl;
+	__u32			ddp_stag;
+	__u64			ddp_to;
+} __attribute__((__packed__));
+
+union iwarp_hdrs {
+	struct iwarp_ctrl		ctrl;
+	struct iwarp_ctrl_untagged	c_untagged;
+	struct iwarp_ctrl_tagged	c_tagged;
+	struct iwarp_rdma_write		rwrite;
+	struct iwarp_rdma_rreq		rreq;
+	struct iwarp_rdma_rresp		rresp;
+	struct iwarp_terminate		terminate;
+	struct iwarp_send		send;
+	struct iwarp_send_inv		send_inv;
+};
+
+
+#define MPA_MIN_FRAG ((sizeof(union iwarp_hdrs) + MPA_CRC_SIZE))
+
+enum ddp_etype {
+	DDP_ETYPE_CATASTROPHIC	= 0x0,
+	DDP_ETYPE_TAGGED_BUF	= 0x1,
+	DDP_ETYPE_UNTAGGED_BUF	= 0x2,
+	DDP_ETYPE_RSVD		= 0x3
+};
+
+enum ddp_ecode {
+	DDP_ECODE_CATASTROPHIC		= 0x00,
+	/* Tagged Buffer Errors */
+	DDP_ECODE_T_INVALID_STAG	= 0x00,
+	DDP_ECODE_T_BASE_BOUNDS		= 0x01,
+	DDP_ECODE_T_STAG_NOT_ASSOC	= 0x02,
+	DDP_ECODE_T_TO_WRAP		= 0x03,
+	DDP_ECODE_T_DDP_VERSION		= 0x04,
+	/* Untagged Buffer Errors */
+	DDP_ECODE_UT_INVALID_QN		= 0x01,
+	DDP_ECODE_UT_INVALID_MSN_NOBUF	= 0x02,
+	DDP_ECODE_UT_INVALID_MSN_RANGE	= 0x03,
+	DDP_ECODE_UT_INVALID_MO		= 0x04,
+	DDP_ECODE_UT_MSG_TOOLONG	= 0x05,
+	DDP_ECODE_UT_DDP_VERSION	= 0x06
+};
+
+
+enum rdmap_untagged_qn {
+	RDMAP_UNTAGGED_QN_SEND		= 0,
+	RDMAP_UNTAGGED_QN_RDMA_READ	= 1,
+	RDMAP_UNTAGGED_QN_TERMINATE	= 2,
+	RDMAP_UNTAGGED_QN_COUNT		= 3
+};
+
+enum rdmap_etype {
+	RDMAP_ETYPE_CATASTROPHIC	= 0x0,
+	RDMAP_ETYPE_REMOTE_PROTECTION	= 0x1,
+	RDMAP_ETYPE_REMOTE_OPERATION	= 0x2
+};
+
+enum rdmap_ecode {
+	RDMAP_ECODE_INVALID_STAG	= 0x00,
+	RDMAP_ECODE_BASE_BOUNDS		= 0x01,
+	RDMAP_ECODE_ACCESS_RIGHTS	= 0x02,
+	RDMAP_ECODE_STAG_NOT_ASSOC	= 0x03,
+	RDMAP_ECODE_TO_WRAP		= 0x04,
+	RDMAP_ECODE_RDMAP_VERSION	= 0x05,
+	RDMAP_ECODE_UNEXPECTED_OPCODE	= 0x06,
+	RDMAP_ECODE_CATASTROPHIC_STREAM	= 0x07,
+	RDMAP_ECODE_CATASTROPHIC_GLOBAL	= 0x08,
+	RDMAP_ECODE_STAG_NOT_INVALIDATE	= 0x09,
+	RDMAP_ECODE_UNSPECIFIED		= 0xff
+};
+
+enum rdmap_elayer {
+	RDMAP_ERROR_LAYER_RDMA	= 0x00,
+	RDMAP_ERROR_LAYER_DDP	= 0x01,
+	RDMAP_ERROR_LAYER_LLP	= 0x02	/* eg., MPA */
+};
+
+enum rdma_opcode {
+	RDMAP_RDMA_WRITE	= 0x0,
+	RDMAP_RDMA_READ_REQ	= 0x1,
+	RDMAP_RDMA_READ_RESP	= 0x2,
+	RDMAP_SEND		= 0x3,
+	RDMAP_SEND_INVAL	= 0x4,
+	RDMAP_SEND_SE		= 0x5,
+	RDMAP_SEND_SE_INVAL	= 0x6,
+	RDMAP_TERMINATE		= 0x7,
+	RDMAP_NOT_SUPPORTED	= RDMAP_TERMINATE + 1
+};
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw.h b/drivers/infiniband/hw/softiwarp/siw.h
new file mode 100644
index 0000000..b38ae20
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw.h
@@ -0,0 +1,823 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_H
+#define _SIW_H
+
+#include <linux/idr.h>
+#include <rdma/ib_verbs.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/netdevice.h>
+#include <linux/crypto.h>
+#include <linux/resource.h>	/* MLOCK_LIMIT */
+
+#include <rdma/ib_umem.h>	/* struct ib_umem_chunk */
+
+#include "siw_user.h"
+#include "iwarp.h"
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
+#define KERNEL_VERSION_PRE_2_6_26
+#endif
+
+enum siw_if_type {
+	SIW_IF_OFED = 0,	/* only via standard ofed syscall if */
+	SIW_IF_MAPPED = 1	/* private qp and cq mapping */
+};
+
+#define DEVICE_ID_SOFTIWARP	0x0815
+#define VERSION_ID_SOFTIWARP	0x0001
+#define SIW_VENDOR_ID		0
+#define SIW_VENDORT_PART_ID	0
+#define SIW_SW_VERSION		1
+#define SIW_MAX_QP		(1024 * 100)
+#define SIW_MAX_QP_WR		(1024 * 32)
+#define SIW_MAX_ORD		128
+#define SIW_MAX_IRD		128
+#define SIW_MAX_SGE		10
+#define SIW_MAX_SGE_RD		1	/* iwarp limitation. we could relax */
+#define SIW_MAX_INLINE		PAGE_SIZE
+#define SIW_MAX_CQ		(1024 * 100)
+#define SIW_MAX_CQE		(SIW_MAX_QP_WR * 100)
+#define SIW_MAX_MR	\
+	(current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT)
+#define SIW_MAX_MR_SIZE		(1024 * 1024 * 1024)
+#define SIW_MAX_PD		SIW_MAX_QP
+#define SIW_MAX_MW		0	/* to be set if MW's are supported */
+#define SIW_MAX_FMR		0
+#define SIW_MAX_SRQ		SIW_MAX_QP
+#define SIW_MAX_SRQ_WR		(SIW_MAX_QP_WR * 10)
+
+#define SENDPAGE_THRESH		256	/* min bytes for using sendpage() */
+#define SOCKBUFSIZE		(PAGE_SIZE * 40)
+#define SQ_USER_MAXBURST	10
+
+#define	SIW_NODE_DESC		"Software iWARP stack"
+
+
+/*
+ * Softiwarp TX/RX configuration options
+ */
+
+#define CONFIG_RDMA_SIW_CRC_ENFORCED	0
+
+
+struct siw_devinfo {
+	unsigned		device;
+	unsigned		version;
+
+	/* close match to ib_device_attr where appropriate */
+	u32			vendor_id;
+	u32			vendor_part_id;
+	u32			sw_version;
+	int			max_qp;
+	int			max_qp_wr;
+	int			max_ord; /* max. outbound read queue depth */
+	int			max_ird; /* max. inbound read queue depth */
+
+	enum ib_device_cap_flags	cap_flags;
+	int			max_sge;
+	int			max_sge_rd;
+	int			max_cq;
+	int			max_cqe;
+	u64			max_mr_size;
+	int			max_mr;
+	int			max_pd;
+	int			max_mw;
+	int			max_fmr;
+	int			max_srq;
+	int			max_srq_wr;
+	int			max_srq_sge;
+	/* end ib_device_attr */
+
+	enum siw_if_type	iftype;
+};
+
+struct siw_dev {
+	struct ib_device	ofa_dev;
+	struct siw_dev		*next;
+	struct net_device	*l2dev;
+	struct siw_devinfo	attrs;
+	/* object management */
+	spinlock_t		idr_lock;
+	struct idr		qp_idr;
+	struct idr		cq_idr;
+	struct idr		pd_idr;
+	struct idr		mem_idr;	/* MRs & MWs */
+	/* active objects statistics */
+	atomic_t		num_qp;
+	atomic_t		num_cq;
+	atomic_t		num_pd;
+	atomic_t		num_mem;
+	atomic_t		num_srq;
+};
+
+struct siw_objhdr {
+	u32			id;	/* for idr based object lookup */
+	struct kref		ref;
+	struct siw_dev 		*dev;
+};
+
+
+struct siw_ucontext {
+	struct ib_ucontext	ib_ucontext;
+};
+
+struct siw_pd {
+	struct siw_objhdr	hdr;
+	struct ib_pd		ofa_pd;
+};
+
+enum siw_access_flags {
+	SR_MEM_LREAD	= (1<<0),
+	SR_MEM_LWRITE	= (1<<1),
+	SR_MEM_RREAD	= (1<<2),
+	SR_MEM_RWRITE	= (1<<3),
+
+	SR_MEM_FLAGS_LOCAL =
+		(SR_MEM_LREAD | SR_MEM_LWRITE),
+	SR_MEM_FLAGS_REMOTE =
+		(SR_MEM_RWRITE | SR_MEM_RREAD)
+};
+
+
+
+#define STAG_VALID 	1
+#define STAG_INVALID	0
+#define SIW_STAG_MAX	0xffffffff
+
+struct siw_mr;
+
+/*
+ * generic memory representation for registered siw memory.
+ * memory lookup always via higher 24 bit of stag (stag index).
+ * the stag is stored as part of the siw object header (id).
+ * object relates to memory window if embedded mr pointer is valid
+ */
+struct siw_mem {
+	struct siw_objhdr	hdr;
+
+	struct siw_mr	*mr;		/* assoc. MR if MW, NULL if MR */
+
+	__u32	stag_state:1,		/* VALID or INVALID */
+		is_zbva:1,		/* zero based virt. addr. */
+		mw_bind_enabled:1,	/* check only if MR */
+		remote_inval_enabled:1,	/* VALID or INVALID */
+		consumer_owns_key:1,	/* key/index split ? */
+		rsvd:27;
+
+	enum siw_access_flags	perms;	/* local/remote READ & WRITE */
+
+	u64	va;		/* VA of memory */
+	u32	len;		/* amount of memory bytes */
+	u32	fbo;		/* first byte offset */
+};
+
+#define SIW_MEM_IS_MW(m)	((m)->mr != NULL)
+#define SIW_INLINED_DATA(w)	((w)->wr.hdr.flags & IB_SEND_INLINE)
+
+/*
+ * MR and MW definition.
+ * Used OFA structs ib_mr/ib_mw holding:
+ * lkey, rkey, MW reference count on MR
+ */
+struct siw_mr {
+	struct ib_mr	ofa_mr;
+	struct siw_mem	mem;
+	struct ib_umem	*umem;
+	struct siw_pd	*pd;
+};
+
+struct siw_mw {
+	struct ib_mw	ofa_mw;
+	struct siw_mem	mem;
+};
+
+/********** WR definitions  ****************/
+
+enum siw_wr_opcode {
+	SIW_WR_RDMA_WRITE		= IB_WR_RDMA_WRITE,
+	SIW_WR_RDMA_WRITE_WITH_IMM	= IB_WR_RDMA_WRITE_WITH_IMM,
+	SIW_WR_SEND			= IB_WR_SEND,
+	SIW_WR_SEND_WITH_IMM		= IB_WR_SEND_WITH_IMM,
+	SIW_WR_RDMA_READ_REQ		= IB_WR_RDMA_READ,
+	SIW_WR_ATOMIC_CMP_AND_SWP	= IB_WR_ATOMIC_CMP_AND_SWP,
+	SIW_WR_ATOMIC_FETCH_AND_ADD	= IB_WR_ATOMIC_FETCH_AND_ADD,
+#if (OFA_VERSION >= 140)
+	SIW_WR_FASTREG			= IB_WR_FAST_REG_MR, /* unsupported */
+	SIW_WR_INVAL_STAG		= IB_WR_LOCAL_INV, /* unsupported */
+#endif
+	SIW_WR_RECEIVE,
+	SIW_WR_BIND_MW, /* unsupported */
+	SIW_WR_RDMA_READ_RESP,		/* pseudo WQE */
+	SIW_WR_NUM			/* last entry! */
+};
+
+#define SIW_WQE_IS_TX(wqe)	1	/* add BIND/FASTREG/INVAL_STAG */
+
+struct siw_sge {
+	u64		addr;	/* HBO */
+	unsigned int	len;	/* HBO */
+	u32		lkey;	/* HBO */
+	union {
+		struct siw_mem	*obj; /* reference to registered memory */
+		char 		*buf; /* linear kernel buffer */
+	} mem;
+};
+
+struct siw_wr_common {
+	enum siw_wr_opcode	type;
+	enum ib_send_flags	flags;
+	u64			id;
+};
+
+/*
+ * All WRs below having an SGL (with 1 ore more SGEs) must start with
+ * the layout given by struct siw_wr_with_sgl!
+ */
+struct siw_wr_with_sgl {
+	struct siw_wr_common	hdr;
+	int                     num_sge;
+	struct siw_sge		sge[0]; /* Start of source or dest. SGL */
+};
+
+struct siw_wr_send {
+	struct siw_wr_common	hdr;
+	int			num_sge;
+	struct siw_sge		sge[SIW_MAX_SGE];
+};
+
+struct siw_wr_rmda_write {
+	struct	siw_wr_common	hdr;
+	int			num_sge;
+	struct siw_sge		sge[SIW_MAX_SGE];
+	u64			raddr;
+	u32			rtag;
+};
+
+struct siw_wr_rdma_rread {
+	struct	siw_wr_common	hdr;
+	int			num_sge;
+	struct siw_sge		sge[SIW_MAX_SGE];
+	u64			raddr;
+	u32			rtag;
+};
+
+struct siw_wr_rdma_rresp {
+	struct	siw_wr_common	hdr;
+	int			num_sge; /* must be 1 */
+	struct siw_sge		sge;
+	u64			raddr;
+	u32			rtag;  /* uninterpreted, NBO */
+};
+
+struct siw_wr_bind {
+	struct	siw_wr_common	hdr;
+	u32			rtag;
+	u32			ltag;
+	struct siw_mr		*mr;
+	u64			addr;
+	u32			len;
+	enum siw_access_flags	perms;
+};
+
+struct siw_wr_recv {
+	struct	siw_wr_common	hdr;
+	int			num_sge;
+	struct siw_sge		sge[SIW_MAX_SGE];
+};
+
+enum siw_wr_state {
+	SR_WR_QUEUED            = 0,	/* processing has not started yet */
+	SR_WR_INPROGRESS	= 1,	/* initiated processing of the WR */
+	SR_WR_DONE		= 2,
+};
+
+/* better name it siw_qe? */
+struct siw_wqe {
+	struct list_head	list;
+	union {
+		struct siw_wr_common		hdr;
+		struct siw_wr_with_sgl		sgl;
+		struct siw_wr_send		send;
+		struct siw_wr_rmda_write	write;
+		struct siw_wr_rdma_rread	rread;
+		struct siw_wr_rdma_rresp	rresp;
+		struct siw_wr_bind		bind;
+		struct siw_wr_recv		recv;
+	} wr;
+	struct siw_qp		*qp;
+	enum siw_wr_state	wr_status;
+	enum ib_wc_status	wc_status;
+	u32			bytes;		/* # bytes to processed */
+	u32			processed;	/* # bytes sucessfully proc'd */
+	int			error;
+};
+
+enum siw_cq_armed {
+	SIW_CQ_NOTIFY_NOT = 0,
+	SIW_CQ_NOTIFY_SOLICITED,
+	SIW_CQ_NOTIFY_ALL
+};
+
+struct siw_cq {
+	struct ib_cq		ofa_cq;
+	struct siw_objhdr	hdr;
+	enum siw_cq_armed	notify;
+	spinlock_t		lock;
+	struct list_head	queue;		/* simple list of cqe's */
+	atomic_t		qlen;		/* number of elements */
+};
+
+enum siw_qp_state {
+	SIW_QP_STATE_IDLE 	= 0,
+	SIW_QP_STATE_RTR 	= 1,
+	SIW_QP_STATE_RTS 	= 2,
+	SIW_QP_STATE_CLOSING 	= 3,
+	SIW_QP_STATE_TERMINATE 	= 4,
+	SIW_QP_STATE_ERROR 	= 5,
+	SIW_QP_STATE_MORIBUND 	= 6, /* destroy called but still referenced */
+	SIW_QP_STATE_UNDEF 	= 7,
+	SIW_QP_STATE_COUNT 	= 8
+};
+
+enum siw_qp_flags {
+	SIW_RDMA_BIND_ENABLED	= (1 << 0),
+	SIW_RDMA_WRITE_ENABLED	= (1 << 1),
+	SIW_RDMA_READ_ENABLED	= (1 << 2),
+	SIW_TERMINATE_LOCAL	= (1 << 3),
+	SIW_RECVQ_ARMED		= (1 << 4),
+	/*
+	 * QP currently being destroyed
+	 */
+	SIW_QP_IN_DESTROY	= (1 << 8)
+};
+
+enum siw_qp_attr_mask {
+	SIW_QP_ATTR_STATE		= (1 << 0),
+	SIW_QP_ATTR_ACCESS_FLAGS	= (1 << 1),
+	SIW_QP_ATTR_LLP_HANDLE		= (1 << 2),
+	SIW_QP_ATTR_ORD			= (1 << 3),
+	SIW_QP_ATTR_IRD			= (1 << 4),
+	SIW_QP_ATTR_SQ_SIZE		= (1 << 5),
+	SIW_QP_ATTR_RQ_SIZE		= (1 << 6),
+	SIW_QP_ATTR_MPA			= (1 << 7)
+};
+
+struct siw_mpa_attrs {
+	__u8	marker_rcv;
+	__u8	marker_snd;
+	__u8	crc;
+	__u8	version;
+};
+
+struct siw_sk_upcalls {
+	void    (*sk_state_change)(struct sock *sk);
+	void    (*sk_data_ready)(struct sock *sk, int bytes);
+	void    (*sk_write_space)(struct sock *sk);
+	void    (*sk_error_report)(struct sock *sk);
+};
+
+struct siw_sq_work {
+	struct work_struct	work;
+};
+
+struct siw_srq {
+	struct ib_srq		ofa_srq;
+	struct siw_pd		*pd;
+	struct list_head	rq;
+	spinlock_t		lock;
+	u32			max_sge;
+	atomic_t		space;	/* current space for posting wqe's */
+	u32			limit;	/* low watermark for async event */
+	u32			max_wr;	/* max # of wqe's allowed */
+	char			armed;	/* inform user if limit hit */
+};
+
+struct siw_qp_attrs {
+	enum siw_qp_state	state;
+	char                    terminate_buffer[52];
+	u32			terminate_msg_length;
+	u32			ddp_rdmap_version; /* 0 or 1 */
+	char                    *stream_msg_buf;
+	u32			stream_msg_buf_length;
+	u32			rq_hiwat;
+	u32			sq_size;
+	u32			rq_size;
+	u32			sq_max_sges;
+	u32			sq_max_sges_rdmaw;
+	u32			rq_max_sges;
+	u32			ord;
+	u32			ird;
+	struct siw_mpa_attrs	mpa;
+	enum siw_qp_flags	flags;
+
+	struct socket		*llp_stream_handle;
+};
+
+enum siw_tx_ctx {
+	SIW_SEND_HDR = 0,	/* start or continue sending HDR */
+	SIW_SEND_DATA = 1,	/* start or continue sending DDP payload */
+	SIW_SEND_TRAILER = 2,	/* start or continue sending TRAILER */
+	SIW_SEND_SHORT_FPDU = 3 /* send whole FPDU hdr|data|trailer at once */
+};
+
+enum siw_rx_state {
+	SIW_GET_HDR = 0,	/* await new hdr or within hdr */
+	SIW_GET_DATA_START = 1,	/* start of inbound DDP payload */
+	SIW_GET_DATA_MORE = 2,	/* continuation of (misaligned) DDP payload */
+	SIW_GET_TRAILER	= 3	/* await new trailer or within trailer (+pad) */
+};
+
+
+struct siw_iwarp_rx {
+	struct sk_buff		*skb;
+	union iwarp_hdrs	hdr;
+	struct mpa_trailer	trailer;
+	/*
+	 * local destination memory of inbound iwarp operation.
+	 * valid, if already resolved, NULL otherwise.
+	 */
+	union {
+		struct siw_wqe	*wqe; /* SEND, RRESP */
+		struct siw_mem	*mem; /* WRITE */
+	} dest;
+
+	struct hash_desc	mpa_crc_hd;
+	/*
+	 * Next expected DDP MSN for each QN +
+	 * expected steering tag +
+	 * expected DDP tagget offset (all HBO)
+	 */
+	u32			ddp_msn[RDMAP_UNTAGGED_QN_COUNT];
+	u32			ddp_stag;
+	u64			ddp_to;
+
+	/*
+	 * For each FPDU, main RX loop runs through 3 stages:
+	 * Receiving protocol headers, placing DDP payload and receiving
+	 * trailer information (CRC + eventual padding).
+	 * Next two variables keep state on receive status of the
+	 * current FPDU part (hdr, data, trailer).
+	 */
+	int			fpdu_part_rcvd;/* bytes in pkt part copied */
+	int			fpdu_part_rem; /* bytes in pkt part not seen */
+
+	int			skb_new;      /* pending unread bytes in skb */
+	int			skb_offset;   /* offset in skb */
+	int			skb_copied;   /* processed bytes in skb */
+
+	int			sge_idx;	/* current sge in rx */
+	unsigned int		sge_off; 	/* already rcvd in curr. sge */
+	struct ib_umem_chunk	*umem_chunk;	/* chunk used by sge and off */
+	int			pg_idx;		/* page used in chunk */
+	unsigned int		pg_off;		/* offset within that page */
+
+	enum siw_rx_state	state;
+
+	u8			crc_enabled:1,
+				first_ddp_seg:1,   /* receiving first DDP seg */
+				more_ddp_segs:1,   /* more DDP segs expected */
+				rx_suspend:1,	   /* stop rcv DDP segs. */
+				prev_ddp_opcode:4; /* opcode of prev DDP msg */
+	char			pad;		/* # of pad bytes expected */
+};
+
+#define siw_rx_data(qp, rctx)	\
+	(iwarp_pktinfo[rctx->hdr.ctrl.opcode].proc_data(qp, rctx))
+
+/*
+ * Shorthands for short packets w/o payload
+ * to be transmitted more efficient.
+ */
+struct siw_send_pkt {
+	struct iwarp_send	send;
+	__u32			crc;
+} __attribute__((__packed__));
+
+struct siw_write_pkt {
+	struct iwarp_rdma_write	write;
+	__u32			crc;
+} __attribute__((__packed__));
+
+struct siw_rreq_pkt {
+	struct iwarp_rdma_rreq	rreq;
+	__u32			crc;
+} __attribute__((__packed__));
+
+struct siw_rresp_pkt {
+	struct iwarp_rdma_rresp	rresp;
+	__u32			crc;
+} __attribute__((__packed__));
+
+struct siw_iwarp_tx {
+	union {
+		union iwarp_hdrs		hdr;
+
+		/* Generic part of FPDU header */
+		struct iwarp_ctrl		ctrl;
+		struct iwarp_ctrl_untagged	c_untagged;
+		struct iwarp_ctrl_tagged	c_tagged;
+
+		/* FPDU headers */
+		struct iwarp_rdma_write		rwrite;
+		struct iwarp_rdma_rreq		rreq;
+		struct iwarp_rdma_rresp		rresp;
+		struct iwarp_terminate		terminate;
+		struct iwarp_send		send;
+		struct iwarp_send_inv		send_inv;
+
+		/* complete short FPDUs */
+		struct siw_send_pkt		send_pkt;
+		struct siw_write_pkt		write_pkt;
+		struct siw_rreq_pkt		rreq_pkt;
+		struct siw_rresp_pkt		rresp_pkt;
+	} pkt;
+
+	struct mpa_trailer			trailer;
+	/* DDP MSN for untagged messages */
+	u32			ddp_msn[RDMAP_UNTAGGED_QN_COUNT];
+
+	enum siw_tx_ctx	state;
+	wait_queue_head_t	waitq;
+
+	u16			ctrl_len;	/* ddp+rdmap hdr */
+	u16			ctrl_sent;
+	int			bytes_unsent;	/* ddp payload bytes */
+
+	struct hash_desc	mpa_crc_hd;
+
+	atomic_t		in_use;		/* tx currently under way */
+
+	char			pad;		/* # pad in current fpdu */
+	u8			crc_enabled:1,	/* compute and ship crc */
+				do_crc:1,	/* do crc for segment */
+				use_sendpage:1,	/* send w/o copy */
+				new_tcpseg:1,	/* start new tcp segment */
+				wspace_update:1,/* new write space indicated */
+				tx_suspend:1,	/* stop sending DDP segs. */
+				rsvd:3;
+
+	u16			fpdu_len;	/* len of FPDU to tx */
+
+	int			tcp_seglen;	/* remaining tcp seg space */
+	struct siw_wqe		*wqe;
+
+	int			sge_idx;	/* current sge in tx */
+	u32			sge_off; 	/* already sent in curr. sge */
+	struct ib_umem_chunk	*umem_chunk;	/* chunk used by sge and off */
+	int			pg_idx;		/* page used in mem chunk */
+};
+
+struct siw_qp {
+	struct ib_qp		ofa_qp;
+	struct siw_objhdr	hdr;
+	int			cpu;
+	struct siw_iwarp_rx	rx_ctx;
+	struct siw_iwarp_tx	tx_ctx;
+
+	struct siw_cep		*cep;
+	struct rw_semaphore	state_lock;
+
+	struct siw_pd		*pd;
+	struct siw_cq		*scq;
+	struct siw_cq		*rcq;
+
+	struct siw_qp_attrs	attrs;
+
+	struct list_head	wqe_freelist;
+	spinlock_t		freelist_lock;
+	struct list_head	sq;
+	struct list_head	irq;
+	spinlock_t		sq_lock;
+	atomic_t		sq_space;
+	struct siw_srq		*srq;
+	struct list_head	rq;
+	spinlock_t		rq_lock;
+	atomic_t		rq_space;
+	struct list_head	orq;
+	atomic_t		orq_space;
+	spinlock_t		orq_lock;
+	/*
+	 * workqueue interface:
+	 *
+	 * we must allow for two works since during work
+	 * execution we may have to schedule another work item
+	 */
+	struct siw_sq_work	sq_work;
+};
+
+#define lock_sq(qp)	spin_lock(&qp->sq_lock)
+#define unlock_sq(qp)	spin_unlock(&qp->sq_lock)
+
+#define lock_sq_rxsave(qp, flags) spin_lock_irqsave(&qp->sq_lock, flags)
+#define unlock_sq_rxsave(qp, flags) spin_unlock_irqrestore(&qp->sq_lock, flags)
+
+#define lock_rq(qp)	spin_lock(&qp->rq_lock)
+#define unlock_rq(qp)	spin_unlock(&qp->rq_lock)
+
+#define lock_rq_rxsave(qp, flags) spin_lock_irqsave(&qp->rq_lock, flags)
+#define unlock_rq_rxsave(qp, flags) spin_unlock_irqrestore(&qp->rq_lock, flags)
+
+#define lock_srq(srq)	spin_lock(&srq->lock)
+#define unlock_srq(srq)	spin_unlock(&srq->lock)
+
+#define lock_srq_rxsave(srq, flags) spin_lock_irqsave(&srq->lock, flags)
+#define unlock_srq_rxsave(srq, flags) spin_unlock_irqrestore(&srq->lock, flags)
+
+#define lock_cq(cq)	spin_lock(&cq->lock)
+#define unlock_cq(cq)	spin_unlock(&cq->lock)
+
+#define lock_cq_rxsave(cq, flags)	spin_lock_irqsave(&cq->lock, flags)
+#define unlock_cq_rxsave(cq, flags)	spin_unlock_irqrestore(&cq->lock, flags)
+
+#define lock_orq(qp)	spin_lock(&qp->orq_lock)
+#define unlock_orq(qp)	spin_unlock(&qp->orq_lock)
+
+#define lock_orq_rxsave(qp, flags)	spin_lock_irqsave(&qp->orq_lock, flags)
+#define unlock_orq_rxsave(qp, flags)\
+	spin_unlock_irqrestore(&qp->orq_lock, flags)
+
+#define RX_QP(rx)		container_of(rx, struct siw_qp, rx_ctx)
+#define TX_QP(tx)		container_of(tx, struct siw_qp, tx_ctx)
+#define QP_ID(qp)		((qp)->hdr.id)
+#define OBJ_ID(obj)		((obj)->hdr.id)
+#define RX_QPID(rx)		QP_ID(RX_QP(rx))
+#define TX_QPID(tx)		QP_ID(TX_QP(tx))
+
+/* helper macros */
+#define tx_wqe(qp)		((qp)->tx_ctx.wqe)
+#define rx_wqe(qp)		((qp)->rx_ctx.dest.wqe)
+#define rx_mem(qp)		((qp)->rx_ctx.dest.mem)
+#define wr_id(wqe)		((wqe)->wr.hdr.id)
+#define wr_type(wqe)		((wqe)->wr.hdr.type)
+#define wr_flags(wqe)		((wqe)->wr.hdr.flags)
+#define list_entry_wqe(pos)	list_entry(pos, struct siw_wqe, list)
+#define list_first_wqe(pos)	list_first_entry(pos, struct siw_wqe, list)
+
+#define ORD_SUSPEND_SQ(qp) 	(!atomic_read(&(qp)->orq_space))
+#define TX_ACTIVE(qp)		(tx_wqe(qp) != NULL)
+#define SQ_EMPTY(qp)		list_empty(&((qp)->sq))
+#define ORQ_EMPTY(qp)		list_empty(&((qp)->orq))
+#define IRQ_EMPTY(qp)		list_empty(&((qp)->irq))
+#define TX_ACTIVE_RRESP(qp)	(TX_ACTIVE(qp) &&\
+			wr_type(tx_wqe(qp)) == SIW_WR_RDMA_READ_RESP)
+
+#define TX_IDLE(qp)		(!TX_ACTIVE(qp) && SQ_EMPTY(qp) && \
+				IRQ_EMPTY(qp) && ORQ_EMPTY(qp))
+
+#define TX_MORE_WQE(qp)		(!SQ_EMPTY(qp) || !IRQ_EMPTY(qp))
+
+struct iwarp_msg_info {
+	int			hdr_len;
+	struct iwarp_ctrl	ctrl;
+	int (*proc_data)	(struct siw_qp *, struct siw_iwarp_rx *);
+};
+
+extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1];
+
+
+extern struct siw_dev *siw;
+
+
+/* QP general functions */
+int siw_qp_modify(struct siw_qp *, struct siw_qp_attrs *,
+		  enum siw_qp_attr_mask);
+
+void siw_qp_llp_close(struct siw_qp *);
+void siw_qp_cm_drop(struct siw_qp *, int);
+
+
+struct ib_qp *siw_get_ofaqp(struct ib_device *, int);
+void siw_qp_get_ref(struct ib_qp *);
+void siw_qp_put_ref(struct ib_qp *);
+
+int siw_no_mad(struct ib_device *, int, u8, struct ib_wc *, struct ib_grh *,
+	       struct ib_mad *, struct ib_mad *);
+
+enum siw_qp_state siw_map_ibstate(enum ib_qp_state);
+
+int siw_check_mem(struct siw_pd *, struct siw_mem *, u64,
+		  enum siw_access_flags, int);
+int siw_check_sge(struct siw_pd *, struct siw_sge *,
+		  enum siw_access_flags, u32, int);
+int siw_check_sgl(struct siw_pd *, struct siw_sge *,
+		  enum siw_access_flags, u32, int);
+
+void siw_rq_complete(struct siw_wqe *, struct siw_qp *);
+void siw_sq_complete(struct list_head *, struct siw_qp *, int,
+		     enum ib_send_flags);
+
+
+/* QP TX path functions */
+int siw_qp_sq_process(struct siw_qp *, int);
+int siw_sq_worker_init(void);
+void siw_sq_worker_exit(void);
+int siw_sq_queue_work(struct siw_qp *qp);
+
+/* QP RX path functions */
+int siw_proc_send(struct siw_qp *, struct siw_iwarp_rx *);
+int siw_init_rresp(struct siw_qp *, struct siw_iwarp_rx *);
+int siw_proc_rreq(struct siw_qp *, struct siw_iwarp_rx *);
+int siw_proc_rresp(struct siw_qp *, struct siw_iwarp_rx *);
+int siw_proc_write(struct siw_qp *, struct siw_iwarp_rx *);
+int siw_proc_terminate(struct siw_qp*, struct siw_iwarp_rx *);
+int siw_proc_unsupp(struct siw_qp *, struct siw_iwarp_rx *);
+
+int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
+		    unsigned int off, size_t len);
+
+/* MPA utilities */
+int siw_crc_array(struct hash_desc *, u8 *, size_t);
+int siw_crc_sg(struct hash_desc *, struct scatterlist *, int, int);
+
+
+/* Varia */
+void siw_cq_flush(struct siw_cq *);
+void siw_sq_flush(struct siw_qp *);
+void siw_rq_flush(struct siw_qp *);
+void siw_qp_freeq_flush(struct siw_qp *);
+int siw_reap_cqe(struct siw_cq *, struct ib_wc *);
+
+void siw_async_ev(struct siw_qp *, struct siw_cq *, enum ib_event_type);
+void siw_async_srq_ev(struct siw_srq *, enum ib_event_type);
+
+static inline struct siw_wqe *
+siw_next_tx_wqe(struct siw_qp *qp) {
+	struct siw_wqe *wqe;
+
+	if (!list_empty(&qp->irq))
+		wqe = list_first_entry(&qp->irq, struct siw_wqe, list);
+	else if (!list_empty(&qp->sq))
+		wqe = list_first_entry(&qp->sq, struct siw_wqe, list);
+	else
+		wqe = NULL;
+	return wqe;
+}
+
+static inline void
+siw_rreq_queue(struct siw_wqe *wqe, struct siw_qp *qp)
+{
+	unsigned long	flags;
+
+	lock_orq_rxsave(qp, flags);
+	list_move_tail(&wqe->list, &qp->orq);
+	atomic_dec(&qp->orq_space);
+	unlock_orq_rxsave(qp, flags);
+}
+
+
+static inline struct ib_umem_chunk *
+mem_chunk_next(struct ib_umem_chunk *chunk)
+{
+	return list_entry(chunk->list.next, struct ib_umem_chunk, list);
+}
+
+
+static inline struct siw_mr *siw_mem2mr(struct siw_mem *m)
+{
+	if (!SIW_MEM_IS_MW(m))
+		return container_of(m, struct siw_mr, mem);
+	return m->mr;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_ae.c b/drivers/infiniband/hw/softiwarp/siw_ae.c
new file mode 100644
index 0000000..7e9ab3f
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_ae.c
@@ -0,0 +1,96 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+/*
+ * siw_async_ev()
+ *
+ * Report Asynchonous event to user.
+ */
+void siw_async_ev(struct siw_qp *qp, struct siw_cq *cq,
+		  enum ib_event_type etype)
+{
+	static struct ib_event	event;
+
+	dprint(DBG_EH, "(QP%d): AE type %d\n", QP_ID(qp), etype);
+
+	event.event = etype;
+	event.device = qp->ofa_qp.device;
+	if (cq)
+		event.element.cq = &cq->ofa_cq;
+	else
+		event.element.qp = &qp->ofa_qp;
+
+	if (!(qp->attrs.flags & SIW_QP_IN_DESTROY) &&
+	    qp->ofa_qp.event_handler) {
+		dprint(DBG_EH, "(QP%d): Call AEH\n", QP_ID(qp));
+		(*qp->ofa_qp.event_handler)(&event, qp->ofa_qp.qp_context);
+	}
+}
+
+void siw_async_srq_ev(struct siw_srq *srq, enum ib_event_type etype)
+{
+	static struct ib_event 	event;
+
+	dprint(DBG_EH, "(SRQ%p): AE type %d\n", srq, etype);
+
+	event.event = etype;
+	event.device = srq->ofa_srq.device;
+	event.element.srq = &srq->ofa_srq;
+
+	if (srq->ofa_srq.event_handler)
+		(*srq->ofa_srq.event_handler)(&event, srq->ofa_srq.srq_context);
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_cm.c b/drivers/infiniband/hw/softiwarp/siw_cm.c
new file mode 100644
index 0000000..0ec5021
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_cm.c
@@ -0,0 +1,1964 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *          Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+#include <linux/tcp.h>
+
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_cm.h"
+#include "siw_obj.h"
+
+static int mpa_crc_enabled;
+module_param(mpa_crc_enabled, int, 0644);
+MODULE_PARM_DESC(mpa_crc_enabled, "MPA CRC enabled");
+
+static int mpa_revision = 1;
+
+
+/*
+ * siw_sock_nodelay() - Disable Nagle algorithm
+ *
+ * See also fs/ocfs2/cluster/tcp.c, o2net_set_nodelay()
+ */
+static int siw_sock_nodelay(struct socket *sock)
+{
+	int ret, val = 1;
+	mm_segment_t oldfs;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+
+	/*
+	 * Don't use sock_setsockopt() for SOL_TCP. It doesn't check its level
+	 * argument and assumes SOL_SOCKET so, say, your TCP_NODELAY will
+	 * silently turn into SO_DEBUG.
+	 */
+	ret = sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY,
+				    (char __user *)&val, sizeof(val));
+	set_fs(oldfs);
+	return ret;
+}
+
+static void siw_cm_llp_state_change(struct sock *);
+static void siw_cm_llp_data_ready(struct sock *, int);
+static void siw_cm_llp_write_space(struct sock *);
+static void siw_cm_llp_error_report(struct sock *);
+static void siw_sk_assign_cm_upcalls(struct sock *sk)
+
+{
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_state_change = siw_cm_llp_state_change;
+	sk->sk_data_ready   = siw_cm_llp_data_ready;
+	sk->sk_write_space  = siw_cm_llp_write_space;
+	sk->sk_error_report = siw_cm_llp_error_report;
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_sk_save_upcalls(struct sock *sk)
+{
+	struct siw_cep *cep = sk_to_cep(sk);
+	BUG_ON(!cep);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	cep->sk_state_change = sk->sk_state_change;
+	cep->sk_data_ready   = sk->sk_data_ready;
+	cep->sk_write_space  = sk->sk_write_space;
+	cep->sk_error_report = sk->sk_error_report;
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep)
+{
+	sk->sk_state_change	= cep->sk_state_change;
+	sk->sk_data_ready	= cep->sk_data_ready;
+	sk->sk_write_space	= cep->sk_write_space;
+	sk->sk_error_report	= cep->sk_error_report;
+	sk->sk_user_data 	= NULL;
+	sk->sk_no_check 	= 0;
+}
+
+static void siw_socket_disassoc(struct socket *s)
+{
+	struct sock	*sk = s->sk;
+	struct siw_cep	*cep;
+
+	if (sk) {
+		write_lock_bh(&sk->sk_callback_lock);
+		cep = sk_to_cep(sk);
+		if (cep) {
+			siw_sk_restore_upcalls(sk, cep);
+			siw_cep_put(cep);
+		}
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+}
+
+
+static inline int kernel_peername(struct socket *s, struct sockaddr_in *addr)
+{
+	int unused;
+	return s->ops->getname(s, (struct sockaddr *)addr, &unused, 1);
+}
+
+static inline int kernel_localname(struct socket *s, struct sockaddr_in *addr)
+{
+	int unused;
+	return s->ops->getname(s, (struct sockaddr *)addr, &unused, 0);
+}
+
+static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s)
+{
+	cep->llp.sock = s;
+	siw_cep_get(cep);
+	s->sk->sk_user_data = cep;
+
+	siw_sk_save_upcalls(s->sk);
+	siw_sk_assign_cm_upcalls(s->sk);
+}
+
+
+static struct siw_cep *siw_cep_alloc(void)
+{
+	struct siw_cep *cep = kzalloc(sizeof *cep, GFP_KERNEL);
+	if (cep) {
+		INIT_LIST_HEAD(&cep->list);
+		INIT_LIST_HEAD(&cep->work_freelist);
+
+		cep->mpa.hdr.params.c = mpa_crc_enabled ? 1 : 0;
+		cep->mpa.hdr.params.m = 0;
+		cep->mpa.hdr.params.rev = mpa_revision ? 1 : 0;
+		kref_init(&cep->ref);
+		cep->state = SIW_EPSTATE_IDLE;
+		init_waitqueue_head(&cep->waitq);
+		spin_lock_init(&cep->lock);
+		dprint(DBG_OBJ|DBG_CM, "(CEP 0x%p): New Object\n", cep);
+	}
+	return cep;
+}
+
+static void siw_cm_free_work(struct siw_cep *cep)
+{
+	struct list_head	*w, *tmp;
+	struct siw_cm_work	*work;
+
+	list_for_each_safe(w, tmp, &cep->work_freelist) {
+		work = list_entry(w, struct siw_cm_work, list);
+		list_del(&work->list);
+		kfree(work);
+	}
+}
+
+static void siw_put_work(struct siw_cm_work *work)
+{
+	INIT_LIST_HEAD(&work->list);
+	spin_lock_bh(&work->cep->lock);
+	list_add(&work->list, &work->cep->work_freelist);
+	spin_unlock_bh(&work->cep->lock);
+}
+
+
+static void __siw_cep_dealloc(struct kref *ref)
+{
+	struct siw_cep *cep = container_of(ref, struct siw_cep, ref);
+
+	dprint(DBG_OBJ|DBG_CM, "(CEP 0x%p): Free Object\n", cep);
+
+	if (cep->listen_cep)
+		siw_cep_put(cep->listen_cep);
+
+	/* kfree(NULL) is save */
+	kfree(cep->mpa.pdata);
+	spin_lock_bh(&cep->lock);
+	if (!list_empty(&cep->work_freelist))
+		siw_cm_free_work(cep);
+	spin_unlock_bh(&cep->lock);
+
+	kfree(cep);
+}
+
+static struct siw_cm_work *siw_get_work(struct siw_cep *cep)
+{
+	struct siw_cm_work	*work = NULL;
+
+	spin_lock_bh(&cep->lock);
+	if (!list_empty(&cep->work_freelist)) {
+		work = list_entry(cep->work_freelist.next, struct siw_cm_work,
+				  list);
+		list_del_init(&work->list);
+	}
+	spin_unlock_bh(&cep->lock);
+	return work;
+}
+
+static int siw_cm_alloc_work(struct siw_cep *cep, int num)
+{
+	struct siw_cm_work	*work;
+
+	BUG_ON(!list_empty(&cep->work_freelist));
+
+	while (num--) {
+		work = kmalloc(sizeof *work, GFP_KERNEL);
+		if (!work) {
+			if (!(list_empty(&cep->work_freelist)))
+				siw_cm_free_work(cep);
+			dprint(DBG_ON, " Failed\n");
+			return -ENOMEM;
+		}
+		work->cep = cep;
+		INIT_LIST_HEAD(&work->list);
+		list_add(&work->list, &cep->work_freelist);
+	}
+	return 0;
+}
+
+static void siw_cm_release(struct siw_cep *cep)
+{
+	if (cep->llp.sock) {
+		siw_socket_disassoc(cep->llp.sock);
+		sock_release(cep->llp.sock);
+		cep->llp.sock = NULL;
+	}
+	if (cep->qp) {
+		struct siw_qp *qp = cep->qp;
+		cep->qp = NULL;
+		siw_qp_put(qp);
+	}
+	if (cep->cm_id) {
+		cep->cm_id->rem_ref(cep->cm_id);
+		cep->cm_id = NULL;
+		siw_cep_put(cep);
+	}
+	cep->state = SIW_EPSTATE_CLOSED;
+}
+
+/*
+ * Test and set CEP into CLOSE pending. After calling
+ * this function, the CEP conn_close flag is set. Returns:
+ *
+ *  1, if CEP is currently in use,
+ *  0, if CEP is not in use and not already in CLOSE,
+ * -1, if CEP is not in use and already in CLOSE.
+ */
+int siw_cep_in_close(struct siw_cep *cep)
+{
+	int rv;
+
+	spin_lock_bh(&cep->lock);
+
+	dprint(DBG_CM, " (CEP 0x%p): close %d, use %d\n",
+		cep, cep->conn_close, cep->in_use);
+
+	rv = cep->in_use ? 1 : (cep->conn_close ? -1 : 0);
+	cep->conn_close = 1; /* may be redundant */
+
+	spin_unlock_bh(&cep->lock);
+
+	return rv;
+}
+
+/*
+ * siw_qp_cm_drop()
+ *
+ * Drops established LLP connection if present and not already
+ * scheduled for dropping. Called from user context, SQ workqueue
+ * or receive IRQ. Caller signals if socket can be immediately
+ * closed (basically, if not in IRQ) and if IWCM should get
+ * informed of LLP state change.
+ */
+void siw_qp_cm_drop(struct siw_qp *qp, int schedule)
+{
+	struct siw_cep *cep = qp->cep;
+
+	qp->rx_ctx.rx_suspend = 1;
+	qp->tx_ctx.tx_suspend = 1;
+
+	if (cep && !siw_cep_in_close(cep)) {
+		if (schedule) {
+			siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP);
+			return;
+		}
+		/*
+		 * Immediately close socket
+		 */
+		dprint(DBG_CM, "(): immediate close, cep->state=%d\n",
+			cep->state);
+
+		if (cep->cm_id) {
+			switch (cep->state) {
+
+			case SIW_EPSTATE_AWAIT_MPAREP:
+				siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+					      IW_CM_EVENT_STATUS_EINVAL);
+				break;
+
+			case SIW_EPSTATE_RDMA_MODE:
+				siw_cm_upcall(cep, IW_CM_EVENT_CLOSE,
+					      IW_CM_EVENT_STATUS_OK);
+
+				break;
+
+			case SIW_EPSTATE_IDLE:
+			case SIW_EPSTATE_LISTENING:
+			case SIW_EPSTATE_CONNECTING:
+			case SIW_EPSTATE_AWAIT_MPAREQ:
+			case SIW_EPSTATE_RECVD_MPAREQ:
+			case SIW_EPSTATE_CLOSED:
+			default:
+
+				break;
+			}
+			cep->cm_id->rem_ref(cep->cm_id);
+			cep->cm_id = NULL;
+			siw_cep_put(cep);
+		}
+		cep->state = SIW_EPSTATE_CLOSED;
+
+		if (cep->llp.sock) {
+			siw_socket_disassoc(cep->llp.sock);
+			sock_release(cep->llp.sock);
+			cep->llp.sock = NULL;
+		}
+		cep->qp = NULL;
+		siw_qp_put(qp);
+	}
+}
+
+
+/*
+ * Set CEP in_use flag. Returns:
+ *
+ *  1, if CEP was not in use and not scheduled for closing,
+ *  0, if CEP was not in use but scheduled for closing,
+ * -1, if CEP is currently in use.
+ */
+static int siw_cep_set_inuse(struct siw_cep *cep)
+{
+	int rv;
+
+	spin_lock_bh(&cep->lock);
+
+	dprint(DBG_CM, " (CEP 0x%p): close %d, use %d\n",
+		cep, cep->conn_close, cep->in_use);
+
+	rv = cep->in_use ? -1 : (cep->conn_close ? 0 : 1);
+	cep->in_use = 1; /* may be redundant */
+
+	spin_unlock_bh(&cep->lock);
+
+	return rv;
+}
+
+/*
+ * Clear CEP in_use flag. Returns:
+ *
+ *  1, if CEP is not scheduled for closing,
+ *  0, else.
+ */
+static int siw_cep_set_free(struct siw_cep *cep)
+{
+	int rv;
+
+	spin_lock_bh(&cep->lock);
+
+	dprint(DBG_CM, " (CEP 0x%p): close %d, use %d\n",
+		cep, cep->conn_close, cep->in_use);
+
+	cep->in_use = 0;
+	rv = cep->conn_close ? 0 : 1;
+
+	spin_unlock_bh(&cep->lock);
+
+	wake_up(&cep->waitq);
+
+	return rv;
+}
+
+
+void siw_cep_put(struct siw_cep *cep)
+{
+	dprint(DBG_OBJ|DBG_CM, "(CEP 0x%p): New refcount: %d\n",
+		cep, atomic_read(&cep->ref.refcount) - 1);
+
+	if (!kref_put(&cep->ref, __siw_cep_dealloc))
+		wake_up(&cep->waitq);
+}
+
+void siw_cep_get(struct siw_cep *cep)
+{
+	kref_get(&cep->ref);
+	dprint(DBG_OBJ|DBG_CM, "(CEP 0x%p): New refcount: %d\n",
+		cep, atomic_read(&cep->ref.refcount));
+}
+
+
+
+static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
+			     int flags)
+{
+	struct kvec iov = {buf, size};
+	struct msghdr msg = {.msg_name = NULL, .msg_flags = flags};
+
+	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
+}
+
+/*
+ * Receive MPA Request/Reply heder.
+ *
+ * Returns 0 if complete MPA Request/Reply haeder including
+ * eventual private data was received. Returns -EAGAIN if
+ * header was partially received or negative error code otherwise.
+ *
+ * Context: May be called in process context only
+ */
+static int siw_recv_mpa_rr(struct siw_cep *cep)
+{
+	struct mpa_rr	*hdr = &cep->mpa.hdr;
+	struct socket	*s = cep->llp.sock;
+	int		rcvd, to_rcv;
+
+	if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
+
+		rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd,
+				  sizeof(struct mpa_rr) -
+				  cep->mpa.bytes_rcvd, 0);
+
+		if (rcvd <= 0)
+			return -ECONNABORTED;
+
+		cep->mpa.bytes_rcvd += rcvd;
+
+		if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr))
+			return -EAGAIN;
+
+		hdr->params.pd_len = ntohs(hdr->params.pd_len);
+
+		if (hdr->params.pd_len > MPA_MAX_PRIVDATA)
+			return -EPROTO;
+	}
+
+	/*
+	 * At least the MPA Request/Reply header (frame not including
+	 * private data) has been received.
+	 * Receive (or continue receiving) any private data.
+	 */
+	to_rcv = hdr->params.pd_len -
+		 (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr));
+
+	if (!to_rcv) {
+		/*
+		 * We must have hdr->params.pd_len == 0 and thus received a
+		 * complete MPA Request/Reply frame.
+		 * Check against peer protocol violation.
+		 */
+		__u32 word;
+
+		rcvd = ksock_recv(s, (char *)&word, sizeof word, MSG_DONTWAIT);
+		if (rcvd == -EAGAIN)
+			return 0;
+
+		if (rcvd == 0) {
+			dprint(DBG_CM, " peer EOF\n");
+			return -EPIPE;
+		}
+		if (rcvd < 0) {
+			dprint(DBG_CM, " ERROR: %d: \n", rcvd);
+			return rcvd;
+		}
+		dprint(DBG_CM, " peer sent extra data: %d\n", rcvd);
+		return -EPROTO;
+	}
+
+	/*
+	 * At this point, we must have hdr->params.pd_len != 0.
+	 * A private data buffer gets allocated iff hdr->params.pd_len != 0.
+	 * Ownership of this buffer will be transferred to the IWCM
+	 * when calling siw_cm_upcall().
+	 */
+	if (!cep->mpa.pdata &&
+	    !(cep->mpa.pdata = kmalloc(hdr->params.pd_len + 4, GFP_KERNEL)))
+		return -ENOMEM;
+
+	rcvd = ksock_recv(s, cep->mpa.pdata + cep->mpa.bytes_rcvd
+			  - sizeof(struct mpa_rr), to_rcv + 4, MSG_DONTWAIT);
+
+	if (rcvd < 0)
+		return rcvd;
+
+	if (rcvd > to_rcv)
+		return -EPROTO;
+
+	cep->mpa.bytes_rcvd += rcvd;
+
+	if (to_rcv == rcvd) {
+		dprint(DBG_CM, "%d bytes private_data received",
+			hdr->params.pd_len);
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+
+static void siw_proc_mpareq(struct siw_cep *cep)
+{
+	int err = siw_recv_mpa_rr(cep);
+
+	if (err)
+		goto out;
+
+	if (cep->mpa.hdr.params.rev > MPA_REVISION_1) {
+		/* allow for 0 and 1 only */
+		err = -EPROTO;
+		goto out;
+	}
+
+	if (memcmp(cep->mpa.hdr.key, MPA_KEY_REQ, sizeof cep->mpa.hdr.key)) {
+		err = -EPROTO;
+		goto out;
+	}
+	cep->state = SIW_EPSTATE_RECVD_MPAREQ;
+
+	if (cep->listen_cep->state == SIW_EPSTATE_LISTENING) {
+		/*
+		 * Since siw_cm_upcall() called with success, iwcm must hold
+		 * a reference to the CEP until the IW_CM_EVENT_CONNECT_REQUEST
+		 * has been accepted or rejected.
+		 * NOTE: If the iwcm never calls back with accept/reject,
+		 * (e.g., the user types ^C instead), the CEP can never be
+		 * free'd. It results in a memory hole which should be
+		 * fixed by calling siw_reject() in case of application
+		 * termination..
+		 */
+		siw_cep_get(cep);
+
+		err = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST,
+				    IW_CM_EVENT_STATUS_OK);
+		if (err)
+			siw_cep_put(cep);
+	} else {
+		/*
+		 * listener lost: new connection cannot be signalled
+		 */
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): Listener lost:!\n", cep);
+		err = -EINVAL;
+	}
+out:
+	if (err) {
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): error %d\n", cep, err);
+
+		if (!siw_cep_in_close(cep)) {
+			/*
+			 * remove reference from listening cep and clear
+			 * information on related listener.
+			 */
+			siw_cep_put(cep->listen_cep);
+			cep->listen_cep = NULL;
+
+			siw_socket_disassoc(cep->llp.sock);
+			sock_release(cep->llp.sock);
+			cep->llp.sock = NULL;
+
+			cep->state = SIW_EPSTATE_CLOSED;
+			siw_cep_put(cep);
+		}
+	}
+}
+
+
+static void siw_proc_mpareply(struct siw_cep *cep)
+{
+	struct siw_qp_attrs	qp_attrs;
+	struct siw_qp		*qp = cep->qp;
+	int			rv;
+
+	rv = siw_recv_mpa_rr(cep);
+	if (rv == -EAGAIN)
+		/* incomplete mpa reply */
+		return;
+
+	if (rv)
+		goto error;
+
+	if (cep->mpa.hdr.params.rev > MPA_REVISION_1) {
+		/* allow for 0 and 1 only */
+		rv = -EPROTO;
+		goto error;
+	}
+	if (memcmp(cep->mpa.hdr.key, MPA_KEY_REP, sizeof cep->mpa.hdr.key)) {
+		rv = -EPROTO;
+		goto error;
+	}
+	/*
+	 * TODO: 1. handle eventual MPA reject (upcall with ECONNREFUSED)
+	 *       2. finish mpa parameter check/negotiation
+	 */
+	memset(&qp_attrs, 0, sizeof qp_attrs);
+	qp_attrs.mpa.marker_rcv = 0;
+	qp_attrs.mpa.marker_snd = 0;
+	qp_attrs.mpa.crc = CONFIG_RDMA_SIW_CRC_ENFORCED;
+	qp_attrs.mpa.version = 1;
+	qp_attrs.ird = cep->ird;
+	qp_attrs.ord = cep->ord;
+	qp_attrs.llp_stream_handle = cep->llp.sock;
+	qp_attrs.state = SIW_QP_STATE_RTS;
+
+	/* Move socket RX/TX under QP control */
+	down_write(&qp->state_lock);
+	if (qp->attrs.state > SIW_QP_STATE_RTR) {
+		rv = -EINVAL;
+		up_write(&qp->state_lock);
+		goto error;
+	}
+	rv = siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE|
+					       SIW_QP_ATTR_LLP_HANDLE|
+					       SIW_QP_ATTR_ORD|
+					       SIW_QP_ATTR_IRD|
+					       SIW_QP_ATTR_MPA);
+
+	if (!rv) {
+		cep->state = SIW_EPSTATE_RDMA_MODE;
+		siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+			      IW_CM_EVENT_STATUS_OK);
+
+		up_write(&qp->state_lock);
+		return;
+	}
+	up_write(&qp->state_lock);
+error:
+	/*
+	 * failed socket handover returns responsibility:
+	 * inform iwcm and drop connection
+	 * TODO: 1. send MPA reject for MPA rev==1
+	 *	    if rv != ECONNREFUSED
+	 */
+	siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, rv);
+
+	if (!siw_cep_in_close(cep)) {
+
+		cep->cm_id->rem_ref(cep->cm_id);
+		cep->cm_id = NULL;
+		siw_cep_put(cep);
+
+		siw_socket_disassoc(cep->llp.sock);
+		sock_release(cep->llp.sock);
+		cep->llp.sock = NULL;
+
+		cep->qp = NULL;
+		siw_qp_put(cep->qp);
+	}
+	cep->state = SIW_EPSTATE_CLOSED;
+}
+
+/*
+ * siw_accept_newconn - accept an incoming pending connection
+ *
+ */
+static void siw_accept_newconn(struct siw_cep *cep)
+{
+	struct socket		*s = cep->llp.sock;
+	struct socket		*new_s = NULL;
+	struct siw_cep		*new_cep = NULL;
+	int			rv = 0; /* debug only. should disappear */
+
+	new_cep = siw_cep_alloc();
+	if (!new_cep)
+		goto error;
+
+	if (siw_cm_alloc_work(new_cep, 4) != 0)
+		goto error;
+
+	/*
+	 * Copy saved socket callbacks from listening CEP
+	 * and assign new socket with new CEP
+	 */
+	new_cep->sk_state_change = cep->sk_state_change;
+	new_cep->sk_data_ready   = cep->sk_data_ready;
+	new_cep->sk_write_space  = cep->sk_write_space;
+	new_cep->sk_error_report = cep->sk_error_report;
+
+	rv = kernel_accept(s, &new_s, O_NONBLOCK);
+	if (rv != 0) {
+		/*
+		 * TODO: Already aborted by peer?
+		 * Is there anything we should do?
+		 */
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): ERROR: "
+			"kernel_accept(): rv=%d\n", cep, rv);
+		goto error;
+	}
+	new_cep->llp.sock = new_s;
+	siw_cep_get(new_cep);
+	new_s->sk->sk_user_data = new_cep;
+
+	dprint(DBG_CM, "(cep=0x%p, s=0x%p, new_s=0x%p): "
+		"New LLP connection accepted\n", cep, s, new_s);
+
+	rv = siw_sock_nodelay(new_s);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): ERROR: "
+			"siw_sock_nodelay(): rv=%d\n", cep, rv);
+		goto error;
+	}
+
+	rv = kernel_peername(new_s, &new_cep->llp.raddr);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): ERROR: "
+			"kernel_peername(): rv=%d\n", cep, rv);
+		goto error;
+	}
+	rv = kernel_localname(new_s, &new_cep->llp.laddr);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(cep=0x%p): ERROR: "
+			"kernel_localname(): rv=%d\n", cep, rv);
+		goto error;
+	}
+
+	/*
+	 * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep.
+	 */
+	new_cep->listen_cep = cep;
+	siw_cep_get(cep);
+
+	new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
+
+	if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
+		/*
+		 * MPA REQ already queued
+		 */
+		dprint(DBG_CM, "(cep=0x%p): Immediate MPA req.\n", cep);
+
+		siw_proc_mpareq(new_cep);
+	}
+	return;
+
+error:
+	if (new_cep)
+		siw_cep_put(new_cep);
+
+	if (new_s) {
+		siw_socket_disassoc(new_s);
+		sock_release(new_s);
+	}
+	dprint(DBG_CM|DBG_ON, "(cep=0x%p): ERROR: rv=%d\n", cep, rv);
+}
+
+/*
+ * Expects params->pd_len in host byte order
+ *
+ * TODO: We might want to combine the arguments params and pdata to a single
+ * pointer to a struct siw_mpa_info as defined in siw_cm.h.
+ * This way, all private data parameters would be in a common struct.
+ */
+static int siw_send_mpareqrep(struct socket *s, struct mpa_rr_params *params,
+				char *key, char *pdata)
+{
+	struct mpa_rr	hdr;
+	struct kvec	iov[2];
+	struct msghdr	msg;
+
+	int		rv;
+	unsigned short 	pd_len = params->pd_len;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&hdr, 0, sizeof hdr);
+	memcpy(hdr.key, key, 16);
+
+	/*
+	 * TODO: By adding a union to struct mpa_rr_params, it should be
+	 * possible to replace the next 4 statements by one
+	 */
+	hdr.params.r = params->r;
+	hdr.params.c = params->c;
+	hdr.params.m = params->m;
+	hdr.params.rev = params->rev;
+
+	if (pd_len > MPA_MAX_PRIVDATA)
+		return -EINVAL;
+
+	hdr.params.pd_len = htons(pd_len);
+
+	iov[0].iov_base = &hdr;
+	iov[0].iov_len = sizeof hdr;
+
+	if (pd_len) {
+		iov[1].iov_base = pdata;
+		iov[1].iov_len = pd_len;
+
+		rv =  kernel_sendmsg(s, &msg, iov, 2, pd_len + sizeof hdr);
+	} else
+		rv =  kernel_sendmsg(s, &msg, iov, 1, sizeof hdr);
+
+	return rv < 0 ? rv : 0;
+}
+
+/*
+ * siw_cm_upcall()
+ *
+ * Upcall to IWCM to inform about async connection events
+ */
+int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
+			    enum iw_cm_event_status status)
+{
+	struct iw_cm_event	event;
+	struct iw_cm_id 	*cm_id;
+
+	memset(&event, 0, sizeof event);
+	event.status = status;
+	event.event = reason;
+
+	if (cep->mpa.hdr.params.pd_len != 0) {
+		/*
+		 * hand over MPA private data
+		 */
+		event.private_data_len = cep->mpa.hdr.params.pd_len;
+		event.private_data = cep->mpa.pdata;
+		cep->mpa.hdr.params.pd_len = 0;
+
+#ifdef OFED_PRIVATE_DATA_BY_REFERENCE
+		/*
+		 * The cm_id->event_handler() is called in process
+		 * context below. Since we allocated a private data
+		 * buffer already, it would make sense to transfer the
+		 * ownership of this buffer to cm_id->event_handler()
+		 * instead of doing another copy at the iwcm.
+		 * This would require a change to
+		 * infiniband/drivers/core/iwcm.c::cm_event_handler().
+		 */
+		cep->mpa.pdata = NULL;
+#endif /* OFED_PRIVATE_DATA_BY_REFERENCE */
+	}
+	if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
+	    reason == IW_CM_EVENT_CONNECT_REPLY) {
+		event.local_addr = cep->llp.laddr;
+		event.remote_addr = cep->llp.raddr;
+	}
+	if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+		event.provider_data = cep;
+		cm_id = cep->listen_cep->cm_id;
+	} else
+		cm_id = cep->cm_id;
+
+	dprint(DBG_CM, " (QP%d): cep=0x%p, id=0x%p, dev(id)=%s, "
+		"reason=%d, status=%d\n",
+		cep->qp ? QP_ID(cep->qp) : -1, cep, cm_id,
+		cm_id->device->name, reason, status);
+
+	return cm_id->event_handler(cm_id, &event);
+}
+
+static void siw_cm_work_handler(struct work_struct *w)
+{
+	struct siw_cm_work	*work;
+	struct siw_cep		*cep;
+	int rv;
+
+	work = container_of(w, struct siw_cm_work, work);
+	cep = work->cep;
+
+	dprint(DBG_CM, " (QP%d): WORK type: %d, CEP: 0x%p\n",
+		cep->qp ? QP_ID(cep->qp) : -1, work->type, cep);
+
+	switch (work->type) {
+
+	case SIW_CM_WORK_ACCEPT:
+
+		rv = siw_cep_set_inuse(cep);
+		if (rv > 0) {
+			if (cep->state == SIW_EPSTATE_LISTENING)
+				siw_accept_newconn(cep);
+
+			if (!siw_cep_set_free(cep)) {
+				siw_cm_release(cep);
+				siw_cep_put(cep);
+			}
+			break;
+		}
+		/*
+		 * CEP already scheduled for closing
+		 */
+		if (!rv) {
+			siw_cm_release(cep);
+			(void) siw_cep_set_free(cep);
+		}
+		break;
+
+	case SIW_CM_WORK_READ_MPAHDR:
+
+		rv = siw_cep_set_inuse(cep);
+		if (rv > 0) {
+			switch (cep->state) {
+
+			case SIW_EPSTATE_AWAIT_MPAREQ:
+
+				siw_proc_mpareq(cep);
+				break;
+
+			case SIW_EPSTATE_AWAIT_MPAREP:
+
+				siw_proc_mpareply(cep);
+				break;
+
+			default:
+				/*
+				 * CEP already moved out of MPA handshake.
+				 * any connection management already done.
+				 * silently ignore the mpa packet.
+				 */
+				dprint(DBG_CM, "(): CEP not in MPA "
+					"handshake state: %d\n", cep->state);
+			}
+			if (!siw_cep_set_free(cep))
+				siw_cm_release(cep);
+
+			break;
+		}
+		/*
+		 * CEP already scheduled for closing
+		 */
+		if (!rv) {
+			siw_cm_release(cep);
+			(void) siw_cep_set_free(cep);
+		}
+		break;
+
+	case SIW_CM_WORK_CLOSE_LLP:
+		/*
+		 * QP scheduled LLP close
+		 */
+		dprint(DBG_CM, "(): SIW_CM_WORK_CLOSE_LLP, cep->state=%d\n",
+			cep->state);
+
+		cep->state = SIW_EPSTATE_CLOSED;
+
+		if (cep->llp.sock) {
+			siw_socket_disassoc(cep->llp.sock);
+			sock_release(cep->llp.sock);
+			cep->llp.sock = NULL;
+		}
+		if (cep->qp) {
+			siw_qp_llp_close(cep->qp);
+			siw_qp_put(cep->qp);
+			cep->qp = NULL;
+		}
+		if (cep->cm_id) {
+			siw_cm_upcall(cep, IW_CM_EVENT_CLOSE,
+				      IW_CM_EVENT_STATUS_OK);
+
+			cep->cm_id->rem_ref(cep->cm_id);
+			cep->cm_id = NULL;
+			siw_cep_put(cep);
+		}
+		break;
+
+	case SIW_CM_WORK_PEER_CLOSE:
+
+		dprint(DBG_CM, "(): SIW_CM_WORK_PEER_CLOSE, "
+			"cep->state=%d\n", cep->state);
+
+		if (cep->cm_id) {
+			switch (cep->state) {
+
+			case SIW_EPSTATE_AWAIT_MPAREP:
+				/*
+				 * MPA reply not received, but connection drop
+				 */
+				siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+						-ECONNRESET);
+				break;
+
+			case SIW_EPSTATE_RDMA_MODE:
+				/*
+				 * NOTE: IW_CM_EVENT_DISCONNECT is given just
+				 *       to transition IWCM into CLOSING.
+				 *       FIXME: is that needed?
+				 */
+				siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT,
+					      IW_CM_EVENT_STATUS_OK);
+				siw_cm_upcall(cep, IW_CM_EVENT_CLOSE,
+					      IW_CM_EVENT_STATUS_OK);
+
+				break;
+
+			default:
+
+				break;
+				/*
+				 * for these states there is no connection
+				 * known to the IWCM. Even not for
+				 * SIW_EPSTATE_RECVD_MPAREQ.
+				 */
+			}
+			cep->cm_id->rem_ref(cep->cm_id);
+			cep->cm_id = NULL;
+			siw_cep_put(cep);
+		}
+		if (cep->qp) {
+			siw_qp_llp_close(cep->qp);
+			siw_qp_put(cep->qp);
+			cep->qp = NULL;
+		}
+		if (cep->state != SIW_EPSTATE_CLOSED) {
+			cep->state = SIW_EPSTATE_CLOSED;
+			siw_socket_disassoc(cep->llp.sock);
+			sock_release(cep->llp.sock);
+			cep->llp.sock = NULL;
+		}
+
+		break;
+
+	default:
+		BUG();
+	}
+	dprint(DBG_CM, " (Exit): WORK type: %d, CEP: 0x%p\n", work->type, cep);
+	siw_put_work(work);
+	siw_cep_put(cep);
+}
+
+static struct workqueue_struct *siw_cm_wq;
+
+int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type)
+{
+	struct siw_cm_work *work = siw_get_work(cep);
+
+	dprint(DBG_CM, " (QP%d): WORK type: %d, CEP: 0x%p\n",
+		cep->qp ? QP_ID(cep->qp) : -1, type, cep);
+
+	if (!work) {
+		dprint(DBG_ON, " Failed\n");
+		return -ENOMEM;
+	}
+	work->type = type;
+	work->cep = cep;
+
+	siw_cep_get(cep);
+
+	INIT_WORK(&work->work, siw_cm_work_handler);
+	queue_work(siw_cm_wq, &work->work);
+
+	return 0;
+}
+
+
+static void siw_cm_llp_data_ready(struct sock *sk, int flags)
+{
+	struct siw_cep	*cep;
+
+	read_lock(&sk->sk_callback_lock);
+
+	cep = sk_to_cep(sk);
+	if (!cep) {
+		WARN_ON(1);
+		goto out;
+	}
+
+	if (cep->conn_close)
+		goto out;
+
+	dprint(DBG_CM, "(): cep 0x%p, state: %d, flags %x\n", cep,
+		cep->state, flags);
+
+	switch (cep->state) {
+
+	case SIW_EPSTATE_RDMA_MODE:
+	case SIW_EPSTATE_LISTENING:
+
+		break;
+
+	case SIW_EPSTATE_AWAIT_MPAREQ:
+	case SIW_EPSTATE_AWAIT_MPAREP:
+
+		siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR);
+		break;
+
+	default:
+		dprint(DBG_CM, "(): Unexpected DATA, state %d\n", cep->state);
+		break;
+	}
+out:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+static void siw_cm_llp_write_space(struct sock *sk)
+{
+	struct siw_cep	*cep = sk_to_cep(sk);
+
+	if (cep)
+		dprint(DBG_CM, "(): cep: 0x%p, state: %d\n", cep, cep->state);
+}
+
+static void siw_cm_llp_error_report(struct sock *sk)
+{
+	struct siw_cep	*cep = sk_to_cep(sk);
+
+	dprint(DBG_CM, "(): error: %d, state: %d\n", sk->sk_err, sk->sk_state);
+
+	if (cep) {
+		cep->sk_error = sk->sk_err;
+		dprint(DBG_CM, "(): cep->state: %d\n", cep->state);
+		cep->sk_error_report(sk);
+	}
+}
+
+static void siw_cm_llp_state_change(struct sock *sk)
+{
+	struct siw_cep	*cep;
+	struct socket 	*s;
+	void (*orig_state_change)(struct sock *);
+
+
+	read_lock(&sk->sk_callback_lock);
+
+	cep = sk_to_cep(sk);
+	if (!cep) {
+		WARN_ON(1);
+		read_unlock(&sk->sk_callback_lock);
+		return;
+	}
+	orig_state_change = cep->sk_state_change;
+
+	s = sk->sk_socket;
+
+	dprint(DBG_CM, "(): cep: 0x%p, state: %d\n", cep, cep->state);
+
+	switch (sk->sk_state) {
+
+	case TCP_ESTABLISHED:
+		/*
+		 * handle accepting socket as special case where only
+		 * new connection is possible
+		 */
+		if (cep->conn_close)
+			break;
+
+		if (cep->state == SIW_EPSTATE_LISTENING &&
+			siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT) != 0) {
+				dprint(DBG_ON, "Cannot accept\n");
+		}
+		break;
+
+	case TCP_CLOSE:
+	case TCP_CLOSE_WAIT:
+		if (cep->state <= SIW_EPSTATE_LISTENING) {
+			dprint(DBG_CM, "() Close before accept()\n");
+			break;
+		}
+		if (cep->qp)
+			cep->qp->tx_ctx.tx_suspend = 1;
+
+		if (!siw_cep_in_close(cep))
+			siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE);
+
+		break;
+
+	default:
+		dprint(DBG_CM, "Unexpected sock state %d\n", sk->sk_state);
+	}
+	read_unlock(&sk->sk_callback_lock);
+	orig_state_change(sk);
+}
+
+
+static int kernel_bindconnect(struct socket *s,
+			      struct sockaddr *laddr, int laddrlen,
+			      struct sockaddr *raddr, int raddrlen, int flags)
+{
+	int err, s_val = 1;
+	/*
+	 * XXX
+	 * Tentative fix. Should not be needed but sometimes iwcm
+	 * chooses ports in use
+	 */
+	err = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
+				sizeof s_val);
+	if (err < 0)
+		goto done;
+
+	err = s->ops->bind(s, laddr, laddrlen);
+	if (err < 0)
+		goto done;
+
+	err = s->ops->connect(s, raddr, raddrlen, flags);
+	if (err < 0)
+		goto done;
+
+	err = s->ops->getname(s, laddr, &s_val, 0);
+
+done:
+	return err;
+}
+
+
+int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+	struct siw_dev	*dev = siw_dev_ofa2siw(id->device);
+	struct siw_qp	*qp;
+	struct siw_cep	*cep = NULL;
+	struct socket 	*s = NULL;
+	struct sockaddr	*laddr, *raddr;
+
+	u16		pd_len = params->private_data_len;
+	int 		rv, size;
+
+	if (pd_len > MPA_MAX_PRIVDATA)
+		return -EINVAL;
+
+	qp = siw_qp_id2obj(dev, params->qpn);
+	BUG_ON(!qp);
+
+	dprint(DBG_CM, "(id=0x%p, QP%d): dev(id)=%s, l2dev=%s\n",
+		id, QP_ID(qp), dev->ofa_dev.name, dev->l2dev->name);
+	dprint(DBG_CM, "(id=0x%p, QP%d): laddr=(0x%x,%d), raddr=(0x%x,%d)\n",
+		id, QP_ID(qp),
+		ntohl(id->local_addr.sin_addr.s_addr),
+		ntohs(id->local_addr.sin_port),
+		ntohl(id->remote_addr.sin_addr.s_addr),
+		ntohs(id->remote_addr.sin_port));
+
+	down_write(&qp->state_lock);
+	if (qp->attrs.state > SIW_QP_STATE_RTR) {
+		rv = -EINVAL;
+		goto error;
+	}
+
+	laddr = (struct sockaddr *)&id->local_addr;
+	raddr = (struct sockaddr *)&id->remote_addr;
+
+	rv = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
+	if (rv < 0)
+		goto error;
+
+	size = SOCKBUFSIZE;
+	rv = kernel_setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&size,
+			       sizeof size);
+	if (rv < 0)
+		goto error;
+
+	rv = kernel_setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&size,
+			       sizeof size);
+	if (rv < 0)
+		goto error;
+
+	/*
+	 * NOTE: For simplification, connect() is called in blocking
+	 * mode. Might be reconsidered for async connection setup at
+	 * TCP level.
+	 */
+	rv = kernel_bindconnect(s, laddr, sizeof *laddr, raddr,
+				sizeof *raddr, 0);
+	if (rv != 0) {
+		dprint(DBG_CM, "(id=0x%p, QP%d): kernel_bindconnect: rv=%d\n",
+			id, QP_ID(qp), rv);
+		goto error;
+	}
+	rv = siw_sock_nodelay(s);
+	if (rv != 0) {
+		dprint(DBG_CM, "(id=0x%p, QP%d): siw_sock_nodelay(): rv=%d\n",
+			id, QP_ID(qp), rv);
+		goto error;
+	}
+	cep = siw_cep_alloc();
+	if (!cep) {
+		rv =  -ENOMEM;
+		goto error;
+	}
+
+	/* Associate QP with CEP */
+	siw_cep_get(cep);
+	qp->cep = cep;
+
+	/* siw_qp_get(qp) already done by QP lookup */
+	cep->qp = qp;
+
+	id->add_ref(id);
+	cep->cm_id = id;
+
+	rv = siw_cm_alloc_work(cep, 4);
+	if (rv != 0) {
+		rv = -ENOMEM;
+		goto error;
+	}
+	cep->mpa.hdr.params.pd_len = pd_len;
+	cep->ird = params->ird;
+	cep->ord = params->ord;
+	cep->state = SIW_EPSTATE_CONNECTING;
+
+	rv = kernel_peername(s, &cep->llp.raddr);
+	if (rv)
+		goto error;
+
+	rv = kernel_localname(s, &cep->llp.laddr);
+	if (rv)
+		goto error;
+
+	dprint(DBG_CM, "(id=0x%p, QP%d): pd_len = %u\n", id, QP_ID(qp), pd_len);
+	if (pd_len)
+		dprint(DBG_CM, "%d bytes private_data\n", pd_len);
+	/*
+	 * Associate CEP with socket
+	 */
+	siw_cep_socket_assoc(cep, s);
+
+	cep->state = SIW_EPSTATE_AWAIT_MPAREP;
+
+	rv = siw_send_mpareqrep(cep->llp.sock, &cep->mpa.hdr.params,
+				MPA_KEY_REQ, (char *)params->private_data);
+
+	/*
+	 * Reset private data len: in case connection drops w/o peer
+	 * sending MPA reply we would report stale data pointer during
+	 * IW_CM_EVENT_CONNECT_REPLY.
+	 */
+	cep->mpa.hdr.params.pd_len = 0;
+
+	if (rv >= 0) {
+		dprint(DBG_CM, "(id=0x%p, QP%d): Exit\n", id, QP_ID(qp));
+		up_write(&qp->state_lock);
+		return 0;
+	}
+error:
+	up_write(&qp->state_lock);
+
+	dprint(DBG_ON, " Failed: %d\n", rv);
+
+	if (cep && !siw_cep_in_close(cep)) {
+
+		siw_socket_disassoc(s);
+		sock_release(s);
+		cep->llp.sock = NULL;
+
+		cep->qp = NULL;
+
+		cep->cm_id = NULL;
+		id->rem_ref(id);
+		siw_cep_put(cep);
+
+		qp->cep = NULL;
+		siw_cep_put(cep);
+
+		cep->state = SIW_EPSTATE_CLOSED;
+	} else if (!cep && s)
+		sock_release(s);
+
+	siw_qp_put(qp);
+
+	return rv;
+}
+
+/*
+ * siw_accept - Let SoftiWARP accept an RDMA connection request
+ *
+ * @id:		New connection management id to be used for accepted
+ *		connection request
+ * @params:	Connection parameters provided by ULP for accepting connection
+ *
+ * Transition QP to RTS state, associate new CM id @id with accepted CEP
+ * and get prepared for TCP input by installing socket callbacks.
+ * Then send MPA Reply and generate the "connection established" event.
+ * Socket callbacks must be installed before sending MPA Reply, because
+ * the latter may cause a first RDMA message to arrive from the RDMA Initiator
+ * side very quickly, at which time the socket callbacks must be ready.
+ */
+int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+	struct siw_dev		*dev = siw_dev_ofa2siw(id->device);
+	struct siw_cep		*cep = (struct siw_cep *)id->provider_data;
+	struct siw_qp		*qp;
+	struct siw_qp_attrs	qp_attrs;
+	char			*pdata = NULL;
+	int 			rv;
+
+retry:
+	rv = siw_cep_set_inuse(cep);
+	if (rv < 0) {
+		dprint(DBG_CM, "(id=0x%p, cep=0x%p): CEP in use\n", id, cep);
+		wait_event(cep->waitq, !cep->in_use);
+		goto retry;
+	}
+	if (!rv) {
+		dprint(DBG_CM, "(id=0x%p, cep=0x%p): CEP in close\n", id, cep);
+		(void) siw_cep_set_free(cep);
+		return -EINVAL;
+	}
+	if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
+		if (cep->state == SIW_EPSTATE_CLOSED) {
+
+			dprint(DBG_CM, "(id=0x%p): Out of State\n", id);
+			(void) siw_cep_set_free(cep);
+
+			siw_cep_put(cep);
+			return -ECONNRESET;
+		}
+		BUG();
+	}
+	/* clear iwcm reference to CEP from IW_CM_EVENT_CONNECT_REQUEST */
+	siw_cep_put(cep);
+
+	qp = siw_qp_id2obj(dev, params->qpn);
+	BUG_ON(!qp); /* The OFA core should prevent this */
+
+	down_write(&qp->state_lock);
+	if (qp->attrs.state > SIW_QP_STATE_RTR) {
+		rv = -EINVAL;
+		goto unlock;
+	}
+
+	dprint(DBG_CM, "(id=0x%p, QP%d): dev(id)=%s\n",
+		id, QP_ID(qp), dev->ofa_dev.name);
+
+	if (params->ord > qp->attrs.ord || params->ird > qp->attrs.ird) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p, QP%d): "
+			"ORD: %d (max: %d), IRD: %d (max: %d)\n",
+			id, QP_ID(qp),
+			params->ord, qp->attrs.ord,
+			params->ird, qp->attrs.ird);
+		rv = -EINVAL;
+		goto unlock;
+	}
+	if (params->private_data_len > MPA_MAX_PRIVDATA) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p, QP%d): "
+			"Private data too long: %d (max: %d)\n",
+			id, QP_ID(qp),
+			params->private_data_len, MPA_MAX_PRIVDATA);
+		rv =  -EINVAL;
+		goto unlock;
+	}
+	cep->cm_id = id;
+	id->add_ref(id);
+
+	memset(&qp_attrs, 0, sizeof qp_attrs);
+	qp_attrs.ord = params->ord;
+	qp_attrs.ird = params->ird;
+	qp_attrs.llp_stream_handle = cep->llp.sock;
+
+	/*
+	 * TODO: Add MPA negotiation
+	 */
+	qp_attrs.mpa.marker_rcv = 0;
+	qp_attrs.mpa.marker_snd = 0;
+	qp_attrs.mpa.crc = CONFIG_RDMA_SIW_CRC_ENFORCED;
+	qp_attrs.mpa.version = 0;
+	qp_attrs.state = SIW_QP_STATE_RTS;
+
+	dprint(DBG_CM, "(id=0x%p, QP%d): Moving to RTS\n", id, QP_ID(qp));
+
+	/* Associate QP with CEP */
+	siw_cep_get(cep);
+	qp->cep = cep;
+
+	/* siw_qp_get(qp) already done by QP lookup */
+	cep->qp = qp;
+
+	cep->state = SIW_EPSTATE_RDMA_MODE;
+
+	/* Move socket RX/TX under QP control */
+	rv = siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE|
+					  SIW_QP_ATTR_LLP_HANDLE|
+					  SIW_QP_ATTR_ORD|
+					  SIW_QP_ATTR_IRD|
+					  SIW_QP_ATTR_MPA);
+	up_write(&qp->state_lock);
+
+	if (rv)
+		goto error;
+
+
+	/*
+	 * TODO: It might be more elegant and concise to check the
+	 * private data length cep->mpa.hdr.params.pd_len
+	 * inside siw_send_mpareqrep().
+	 */
+	if (params->private_data_len) {
+		pdata = (char *)params->private_data;
+
+		dprint(DBG_CM, "(id=0x%p, QP%d): %d bytes private_data\n",
+				id, QP_ID(qp), params->private_data_len);
+	}
+	cep->mpa.hdr.params.pd_len = params->private_data_len;
+
+	dprint(DBG_CM, "(id=0x%p, QP%d): Sending MPA Reply\n", id, QP_ID(qp));
+
+	rv = siw_send_mpareqrep(cep->llp.sock, &cep->mpa.hdr.params,
+				MPA_KEY_REP, pdata);
+	if (!rv) {
+		/*
+		 * FIXME: In order to ensure that the first FPDU will be sent
+		 * from the RDMA Initiator side, the "connection established"
+		 * event should be delayed until Softiwarp has received the
+		 * first FPDU from the RDMA Initiator side.
+		 * Alternatively, Softiwarp could prevent this side to
+		 * send a first FPDU until a first FPDU has been received.
+		 *
+		 * The two alternatives above will work if
+		 * (1) the RDMA application is iWARP standards compliant
+		 *     by sending its first RDMA payload from the
+		 *     RDMA Initiator side, or
+		 * (2) the RDMA Initiator side RNIC inserts an under-cover
+		 *     zero-length RDMA operation (negotiated through an
+		 *     extended MPA Request/Reply handshake) such as a
+		 *     zero-length RDMA Write or Read.
+		 * Note that (2) would require an extension of the MPA RFC.
+		 *
+		 * A third alternative (which may be the easiest for now) is to
+		 * return an error to an RDMA application that attempts to send
+		 * the first RDMA payload from the RDMA Responder side.
+		 */
+		siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED,
+				IW_CM_EVENT_STATUS_OK);
+
+		if (!siw_cep_set_free(cep))
+			siw_cm_release(cep);
+
+		dprint(DBG_CM, "(id=0x%p, QP%d): Exit\n", id, QP_ID(qp));
+		return 0;
+	}
+
+error:
+	if (siw_cep_set_free(cep)) {
+
+		siw_socket_disassoc(cep->llp.sock);
+		sock_release(cep->llp.sock);
+		cep->llp.sock = NULL;
+
+		cep->state = SIW_EPSTATE_CLOSED;
+
+		cep->cm_id->rem_ref(id);
+		cep->cm_id = NULL;
+
+		if (qp->cep) {
+			siw_cep_put(cep);
+			qp->cep = NULL;
+		}
+		cep->qp = NULL;
+		siw_qp_put(qp);
+	}
+	return rv;
+unlock:
+	up_write(&qp->state_lock);
+	goto error;
+}
+
+/*
+ * siw_reject()
+ *
+ * Local connection reject case. Send private data back to peer,
+ * close connection and dereference connection id.
+ */
+int siw_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
+{
+	struct siw_cep	*cep = (struct siw_cep *)id->provider_data;
+
+	dprint(DBG_CM, "(id=0x%p): cep->state=%d\n", id, cep->state);
+	dprint(DBG_CM, " Reject: %s\n", plen ? (char *)pdata:"(no data)");
+
+	if (!siw_cep_in_close(cep)) {
+
+		dprint(DBG_ON, " Sending REJECT not yet implemented\n");
+
+		siw_socket_disassoc(cep->llp.sock);
+		sock_release(cep->llp.sock);
+		cep->llp.sock = NULL;
+
+		siw_cep_put(cep);
+		cep->state = SIW_EPSTATE_CLOSED;
+	} else {
+		dprint(DBG_CM, " (id=0x%p): Connection lost\n", id);
+	}
+
+	/*
+	 * clear iwcm reference to CEP from
+	 * IW_CM_EVENT_CONNECT_REQUEST
+	 */
+	siw_cep_put(cep);
+
+	return 0;
+}
+
+int siw_listen_address(struct iw_cm_id *id, int backlog, struct sockaddr *laddr)
+{
+	struct socket 		*s;
+	struct siw_cep		*cep = NULL;
+	int 			rv = 0, s_val;
+
+	rv = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
+	if (rv < 0) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): ERROR: "
+			"sock_create(): rv=%d\n", id, rv);
+		return rv;
+	}
+#ifdef SIW_ON_BGP
+	if (backlog >= 100 && backlog < 4096)
+		backlog = 4096;
+#endif
+
+	s_val = SOCKBUFSIZE;
+	rv = kernel_setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&s_val,
+			       sizeof s_val);
+	if (rv)
+		goto error;
+
+	rv = kernel_setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&s_val,
+			       sizeof s_val);
+	if (rv)
+		goto error;
+
+	/*
+	 * Probably to be removed later. Allows binding
+	 * local port when still in TIME_WAIT from last close.
+	 */
+	s_val = 1;
+	rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
+			       sizeof s_val);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): ERROR: "
+			"kernel_setsockopt(): rv=%d\n", id, rv);
+		goto error;
+	}
+
+	rv = s->ops->bind(s, laddr, sizeof *laddr);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): ERROR: bind(): rv=%d\n",
+			id, rv);
+		goto error;
+	}
+
+	cep = siw_cep_alloc();
+	if (!cep) {
+		rv = -ENOMEM;
+		goto error;
+	}
+	siw_cep_socket_assoc(cep, s);
+
+	rv = siw_cm_alloc_work(cep, backlog);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): ERROR: "
+			"siw_cm_alloc_work(backlog=%d): rv=%d\n",
+			id, backlog, rv);
+		goto error;
+	}
+
+	rv = s->ops->listen(s, backlog);
+	if (rv != 0) {
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): ERROR: listen() rv=%d\n",
+			id, rv);
+		goto error;
+	}
+
+	/*
+	 * TODO: Do we really need the copies of local_addr and remote_addr
+	 *	 in CEP ???
+	 */
+	memcpy(&cep->llp.laddr, &id->local_addr, sizeof cep->llp.laddr);
+	memcpy(&cep->llp.raddr, &id->remote_addr, sizeof cep->llp.raddr);
+
+	cep->cm_id = id;
+	id->add_ref(id);
+
+	/*
+	 * In case of a wildcard rdma_listen on a multi-homed device,
+	 * a listener's IWCM id is associated with more than one listening CEP.
+	 *
+	 * We currently use id->provider_data in three different ways:
+	 *
+	 * o For a listener's IWCM id, id->provider_data points to
+	 *   the list_head of the list of listening CEPs.
+	 *   Uses: siw_create_listen(), siw_destroy_listen()
+	 *
+	 * o For a passive-side IWCM id, id->provider_data points to
+	 *   the CEP itself. This is a consequence of
+	 *   - siw_cm_upcall() setting event.provider_data = cep and
+	 *   - the IWCM's cm_conn_req_handler() setting provider_data of the
+	 *     new passive-side IWCM id equal to event.provider_data
+	 *   Uses: siw_accept(), siw_reject()
+	 *
+	 * o For an active-side IWCM id, id->provider_data is not used at all.
+	 *
+	 */
+	if (!id->provider_data) {
+		id->provider_data = kmalloc(sizeof(struct list_head),
+					    GFP_KERNEL);
+		if (!id->provider_data) {
+			rv = -ENOMEM;
+			goto error;
+		}
+		INIT_LIST_HEAD((struct list_head *)id->provider_data);
+	}
+
+	dprint(DBG_CM, "(id=0x%p): dev(id)=%s, l2dev=%s, "
+		"id->provider_data=0x%p, cep=0x%p\n",
+		id, id->device->name,
+		siw_dev_ofa2siw(id->device)->l2dev->name,
+		id->provider_data, cep);
+
+	list_add_tail(&cep->list, (struct list_head *)id->provider_data);
+	cep->state = SIW_EPSTATE_LISTENING;
+	return 0;
+
+error:
+	dprint(DBG_ON, " Failed: %d\n", rv);
+
+	if (cep) {
+		cep->llp.sock = NULL;
+		siw_socket_disassoc(s);
+		cep->state = SIW_EPSTATE_CLOSED;
+		siw_cep_put(cep);
+	}
+	sock_release(s);
+	return rv;
+}
+
+
+/*
+ * siw_create_listen - Create resources for a listener's IWCM ID @id
+ *
+ * Listens on the socket addresses id->local_addr and id->remote_addr.
+ * We support listening on multi-homed devices, i.e., Softiwarp devices
+ * whose underlying net_device is associated with multiple IP addresses.
+ * Wildcard listening (listening with zero IP address) is also supported.
+ *
+ * There are three design options for Softiwarp device management supporting
+ * - multiple physical Ethernet ports, i.e., multiple net_device instances, and
+ * - multiple IP addresses associated with net_device,
+ * as follows:
+ *
+ *    Option 1: One Softiwarp device per net_device and
+ *              IP address associated with the net_device
+ *    Option 2: One Softiwarp device per net_device
+ *              (and all IP addresses associated with the net_device)
+ *    Option 3: Single Softiwarp device for all net_device instances
+ *              (and all IP addresses associated with these instances)
+ *
+ * We currently use Option 2, registering a separate siw_dev for
+ * each net_device. Consequently, siw_create_listen() (called separately
+ * by the IWCM for each Softiwarp device) handles the associated IP address(es)
+ * as follows:
+ *
+ * - If the listener's @id provides a specific local IP address, at most one
+ *   listening socket is created and associated with @id.
+ *
+ * - If the listener's @id provides the wildcard (zero) local IP address,
+ *   a separate listen is performed for each local IP address of the device
+ *   by creating a listening socket and binding to that local IP address.
+ *   This avoids attempts to bind to the wildcard (zero) IP address
+ *   on multiple devices, which fails with -EADDRINUSE on the second and
+ *   all subsequent devices.
+ *
+ *   For the given IWCM and Option 2 above, the alternative approach of doing
+ *   a single wildcard listen by creating one listening socket and binding it
+ *   to the wildcard IP address is not a good idea if
+ *   - there is more than one Softiwarp device (e.g., for lo and eth0), or
+ *   - there are non-Softiwarp iWARP devices that cannot cooperate.
+ */
+int siw_create_listen(struct iw_cm_id *id, int backlog)
+{
+	struct ib_device	*ofa_dev = id->device;
+	struct siw_dev		*dev = siw_dev_ofa2siw(ofa_dev);
+	int			rv = 0;
+
+	dprint(DBG_CM, "(id=0x%p): dev(id)=%s, l2dev=%s backlog=%d\n",
+		id, ofa_dev->name, dev->l2dev->name, backlog);
+
+#ifdef SIW_ON_BGP
+	if (backlog >= 100 && backlog < 8192)
+		backlog = 8192;
+#endif
+	/*
+	 * IPv4/v6 design differences regarding multi-homing
+	 * propagate up to iWARP:
+	 * o For IPv4, use dev->l2dev->ip_ptr
+	 * o For IPv6, use dev->l2dev->ipv6_ptr
+	 */
+	if (id->local_addr.sin_family == AF_INET) {
+		/* IPv4 */
+		struct sockaddr_in	laddr = id->local_addr;
+		u8			*l_ip, *r_ip;
+		struct in_device 	*in_dev;
+
+		l_ip = (u8 *) &id->local_addr.sin_addr.s_addr;
+		r_ip = (u8 *) &id->remote_addr.sin_addr.s_addr;
+		dprint(DBG_CM, "(id=0x%p): "
+			"laddr(id)  : ipv4=%d.%d.%d.%d, port=%d; "
+			"raddr(id)  : ipv4=%d.%d.%d.%d, port=%d\n",
+			id,
+			l_ip[0], l_ip[1], l_ip[2], l_ip[3],
+			ntohs(id->local_addr.sin_port),
+			r_ip[0], r_ip[1], r_ip[2], r_ip[3],
+			ntohs(id->remote_addr.sin_port));
+
+		in_dev = in_dev_get(dev->l2dev);
+		if (!in_dev) {
+			dprint(DBG_CM|DBG_ON, "(id=0x%p): "
+				"l2dev has no in_device\n", id);
+			return -ENODEV;
+		}
+
+		/*
+		 * If in_dev is not configured, in_dev->ifa_list may be empty
+		 */
+		for_ifa(in_dev) {
+			/*
+			 * Create a listening socket if id->local_addr
+			 * contains the wildcard IP address OR
+			 * the IP address of the interface.
+			 */
+#ifdef KERNEL_VERSION_PRE_2_6_26
+			if (ZERONET(id->local_addr.sin_addr.s_addr) ||
+#else
+			if (ipv4_is_zeronet(id->local_addr.sin_addr.s_addr) ||
+#endif
+					id->local_addr.sin_addr.s_addr ==
+					ifa->ifa_address) {
+				laddr.sin_addr.s_addr = ifa->ifa_address;
+
+				l_ip = (u8 *) &laddr.sin_addr.s_addr;
+				dprint(DBG_CM, "(id=0x%p): "
+					"laddr(bind): ipv4=%d.%d.%d.%d,"
+					" port=%d\n", id,
+					l_ip[0], l_ip[1], l_ip[2],
+					l_ip[3], ntohs(laddr.sin_port));
+
+				rv = siw_listen_address(id, backlog,
+						(struct sockaddr *)&laddr);
+				if (rv)
+					break;
+			}
+		}
+		endfor_ifa(in_dev);
+		in_dev_put(in_dev);
+
+		if (rv) {
+			/*
+			 * TODO: Cleanup resources already associated with
+			 *	 id->provider_data
+			 */
+			dprint(DBG_CM|DBG_ON, "(id=0x%p): "
+				"TODO: Cleanup resources\n", id);
+		}
+
+	} else {
+		/* IPv6 */
+		dprint(DBG_CM|DBG_ON, "(id=0x%p): TODO: IPv6 support\n", id);
+	}
+	if (!rv)
+		dprint(DBG_CM, "(id=0x%p): Success\n", id);
+
+	return rv;
+}
+
+
+int siw_destroy_listen(struct iw_cm_id *id)
+{
+	struct list_head	*p, *tmp;
+	struct siw_cep		*cep;
+
+	dprint(DBG_CM, "(id=0x%p): dev(id)=%s, l2dev=%s\n",
+		id, id->device->name,
+		siw_dev_ofa2siw(id->device)->l2dev->name);
+
+	if (!id->provider_data) {
+		/*
+		 * TODO: See if there's a way to avoid getting any
+		 *       listener ids without a list of CEPs
+		 */
+		dprint(DBG_CM, "(id=0x%p): Listener id: no CEP(s)\n", id);
+		return 0;
+	}
+
+	/*
+	 * In case of a wildcard rdma_listen on a multi-homed device,
+	 * a listener's IWCM id is associated with more than one listening CEP.
+	 */
+	list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
+
+		cep = list_entry(p, struct siw_cep, list);
+		list_del(p);
+
+		if (siw_cep_set_inuse(cep) > 0) {
+
+			cep->conn_close = 1;
+
+			siw_socket_disassoc(cep->llp.sock);
+			sock_release(cep->llp.sock);
+			cep->llp.sock = NULL;
+			id->rem_ref(id);
+
+			cep->state = SIW_EPSTATE_CLOSED;
+			/*
+			 * Do not set the CEP free again. The CEP is dead.
+			 * (void) siw_cep_set_free(cep);
+			 */
+		} else
+			cep->state = SIW_EPSTATE_CLOSED;
+
+		siw_cep_put(cep);
+	}
+	kfree(id->provider_data);
+	id->provider_data = NULL;
+
+	return 0;
+}
+
+int __init siw_cm_init(void)
+{
+	/*
+	 * create_single_workqueue for strict ordering
+	 */
+	siw_cm_wq = create_singlethread_workqueue("siw_cm_wq");
+	if (!siw_cm_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void __exit siw_cm_exit(void)
+{
+	if (siw_cm_wq) {
+		flush_workqueue(siw_cm_wq);
+		destroy_workqueue(siw_cm_wq);
+	}
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_cm.h b/drivers/infiniband/hw/softiwarp/siw_cm.h
new file mode 100644
index 0000000..c6cd36d
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_cm.h
@@ -0,0 +1,161 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_CM_H
+#define _SIW_CM_H
+
+#include <net/sock.h>
+#include <linux/tcp.h>
+
+#include <rdma/iw_cm.h>
+
+
+enum siw_cep_state {
+	SIW_EPSTATE_IDLE = 1,
+	SIW_EPSTATE_LISTENING,
+	SIW_EPSTATE_CONNECTING,
+	SIW_EPSTATE_AWAIT_MPAREQ,
+	SIW_EPSTATE_RECVD_MPAREQ,
+	SIW_EPSTATE_AWAIT_MPAREP,
+	SIW_EPSTATE_RDMA_MODE,
+	SIW_EPSTATE_CLOSED
+};
+
+struct siw_mpa_info {
+	struct mpa_rr	hdr; 	/* peer mpa hdr in host byte order */
+	char		*pdata;
+	int		bytes_rcvd;
+};
+
+struct siw_llp_info {
+	struct socket		*sock;
+	struct sockaddr_in	laddr;	/* redundant with socket info above */
+	struct sockaddr_in	raddr;	/* dito, consider removal */
+	struct siw_sk_upcalls	sk_def_upcalls;
+};
+
+struct siw_cep {
+	struct iw_cm_id		*cm_id;
+
+	/*
+	 * The provider_data element of a listener IWCM ID
+	 * refers to a list of one or more listener CEPs
+	 */
+	struct list_head	list;
+
+	struct siw_cep		*listen_cep;
+	struct siw_qp		*qp;
+	spinlock_t		lock;
+	wait_queue_head_t	waitq;
+	struct kref		ref;
+	enum siw_cep_state	state;
+	short			conn_close; /* sched. for closing or closed */
+	short			in_use;
+	struct siw_cm_work	*mpa_timer;
+	struct list_head	work_freelist;
+	struct siw_llp_info	llp;
+	struct siw_mpa_info	mpa;
+	int			ord;
+	int			ird;
+	int			sk_error; /* not (yet) used XXX */
+
+	/* Saved upcalls of socket llp.sock */
+	void    (*sk_state_change)(struct sock *sk);
+	void    (*sk_data_ready)(struct sock *sk, int bytes);
+	void    (*sk_write_space)(struct sock *sk);
+	void    (*sk_error_report)(struct sock *sk);
+};
+
+enum siw_work_type {
+	SIW_CM_WORK_ACCEPT 	= 1,
+	SIW_CM_WORK_READ_MPAHDR,
+	SIW_CM_WORK_CLOSE_LLP,		/* close socket */
+	SIW_CM_WORK_PEER_CLOSE,		/* socket indicated peer close */
+	SIW_CM_WORK_MPATIMEOUT		/* to be done ! */
+};
+
+struct siw_cm_work {
+	struct work_struct	work;
+	struct list_head	list;
+	enum siw_work_type	type;
+	struct siw_cep	*cep;
+};
+
+extern int siw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+extern int siw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+extern int siw_reject(struct iw_cm_id *, const void *, u8);
+extern int siw_create_listen(struct iw_cm_id *, int);
+extern int siw_destroy_listen(struct iw_cm_id *);
+
+extern int siw_cm_upcall(struct siw_cep *, enum iw_cm_event_type,
+			    enum iw_cm_event_status);
+
+extern void siw_cep_upcall(struct siw_cep *, enum iw_cm_event_type);
+
+extern void siw_cep_put(struct siw_cep *);
+extern void siw_cep_get(struct siw_cep *);
+extern int siw_cep_in_close(struct siw_cep *);
+
+extern int siw_cm_queue_work(struct siw_cep *, enum siw_work_type);
+
+extern int siw_cm_init(void);
+extern void siw_cm_exit(void);
+
+/*
+ * TCP socket interface
+ */
+#define sk_to_qp(sk)	(((struct siw_cep *)((sk)->sk_user_data))->qp)
+#define sk_to_cep(sk)	((struct siw_cep *)((sk)->sk_user_data))
+
+/*
+ * Should we use tcp_current_mss()?
+ * But its not exported by kernel.
+ */
+static inline unsigned int get_tcp_mss(struct sock *sk)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
+	return ((struct tcp_sock *)sk)->xmit_size_goal;
+#else
+	return ((struct tcp_sock *)sk)->xmit_size_goal_segs *
+			((struct tcp_sock *)sk)->mss_cache;
+#endif
+
+}
+
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_cq.c b/drivers/infiniband/hw/softiwarp/siw_cq.c
new file mode 100644
index 0000000..5e642bc
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_cq.c
@@ -0,0 +1,245 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+static int siw_wc_op_siw2ofa[SIW_WR_NUM] = {
+	[SIW_WR_RDMA_WRITE]		= IB_WC_RDMA_WRITE,
+	[SIW_WR_RDMA_WRITE_WITH_IMM]	= IB_WC_RDMA_WRITE,
+	[SIW_WR_SEND]			= IB_WC_SEND,
+	[SIW_WR_SEND_WITH_IMM]		= IB_WC_SEND,
+	[SIW_WR_RDMA_READ_REQ]		= IB_WC_RDMA_READ,
+	[SIW_WR_ATOMIC_CMP_AND_SWP]	= IB_WC_COMP_SWAP,
+	[SIW_WR_ATOMIC_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
+	[SIW_WR_BIND_MW]		= IB_WC_BIND_MW,
+#if (OFA_VERSION >= 140)
+	[SIW_WR_FASTREG]		= IB_WC_FAST_REG_MR,
+	[SIW_WR_INVAL_STAG]		= IB_WC_LOCAL_INV,
+#endif
+	[SIW_WR_RECEIVE]		= IB_WC_RECV,
+	[SIW_WR_RDMA_READ_RESP]		= 0 /* not used */
+};
+
+/*
+ * translate wc into ofa syntax
+ */
+static void siw_wc_siw2ofa(struct siw_wqe *siw_wc, struct ib_wc *ofa_wc)
+{
+	memset(ofa_wc, 0, sizeof *ofa_wc);
+
+	ofa_wc->wr_id = wr_id(siw_wc);
+	ofa_wc->status = siw_wc->wc_status;
+	ofa_wc->byte_len = siw_wc->processed;
+	ofa_wc->qp = &siw_wc->qp->ofa_qp;
+
+	BUG_ON(wr_type(siw_wc) >= SIW_WR_NUM);
+	ofa_wc->opcode = siw_wc_op_siw2ofa[wr_type(siw_wc)];
+	/*
+	 * ofa_wc->imm_data = 0;
+	 * ofa_wc->vendor_err = 0;
+	 * ofa_wc->src_qp = 0;
+	 * ofa_wc->wc_flags = 0; ADD immediate data support
+	 * ofa_wc->pkey_index = 0;
+	 * ofa_wc->slid = 0;
+	 * ofa_wc->sl = 0;
+	 * ofa_wc->dlid_path_bits = 0;
+	 * ofa_wc->port_num = 0;
+	 */
+}
+
+/*
+ * Reap one CQE from the CQ.
+ *
+ * Caller must hold qp read lock
+ *
+ * TODO: Provide routine which can read more than one CQE
+ */
+int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *ofa_wc)
+{
+	struct siw_wqe	*cqe = NULL;
+	unsigned long flags;
+
+	lock_cq_rxsave(cq, flags);
+
+	if (!list_empty(&cq->queue)) {
+		cqe = list_first_wqe(&cq->queue);
+		list_del(&cqe->list);
+		atomic_dec(&cq->qlen);
+	}
+	unlock_cq_rxsave(cq, flags);
+
+	if (cqe) {
+		siw_wc_siw2ofa(cqe, ofa_wc);
+
+		dprint(DBG_WR, " QP%d, CQ%d: Reap WQE type: %d, p: %p\n",
+			  QP_ID(cqe->qp), OBJ_ID(cq), wr_type(cqe), cqe);
+
+		siw_wqe_put(cqe);
+		return 1;
+	} else
+		return 0;
+}
+
+/*
+ * siw_cq_flush()
+ *
+ * Flush all CQ elements. No CQ lock is taken.
+ */
+void siw_cq_flush(struct siw_cq *cq)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*cqe;
+
+	dprint(DBG_CM|DBG_OBJ, "(CQ%d:) Enter\n", OBJ_ID(cq));
+
+	if (list_empty(&cq->queue))
+		return;
+
+	list_for_each_safe(pos, n, &cq->queue) {
+		cqe = list_entry_wqe(pos);
+		list_del(&cqe->list);
+
+		dprint(DBG_OBJ|DBG_WR, " WQE: 0x%llu:, type: %d, p: %p\n",
+			(unsigned long long)wr_id(cqe),
+			wr_type(cqe), cqe);
+
+		siw_wqe_put(cqe);
+	}
+	atomic_set(&cq->qlen, 0);
+}
+
+
+
+/*
+ * siw_rq_complete()
+ *
+ * Appends RQ/SRQ WQE to CQ, if assigned.
+ * Must be called with qp state read locked
+ */
+void siw_rq_complete(struct siw_wqe *wqe, struct siw_qp *qp)
+{
+	struct siw_cq	*cq = qp->rcq;
+	unsigned long flags;
+
+	dprint(DBG_OBJ|DBG_WR, " QP%d WQE: 0x%llu:, type: %d, p: %p\n",
+		QP_ID(qp),
+		(unsigned long long)wr_id(wqe), wr_type(wqe), wqe);
+
+	if (cq) {
+		lock_cq_rxsave(cq, flags);
+
+		list_add_tail(&wqe->list, &cq->queue);
+		atomic_inc(&cq->qlen); /* FIXME: test overflow */
+
+		unlock_cq_rxsave(cq, flags);
+
+		/*
+		 * SRQ space was already incremented when WQE was fetched
+		 * by some QP
+		 */
+		if (!qp->srq)	/* XXX to be deferred to reaping ? */
+			atomic_inc(&qp->rq_space);
+
+		if (cq->ofa_cq.comp_handler != NULL &&
+			((cq->notify & SIW_CQ_NOTIFY_ALL) ||
+			 (cq->notify == SIW_CQ_NOTIFY_SOLICITED &&
+			  wr_flags(wqe) & IB_SEND_SOLICITED))) {
+				cq->notify = SIW_CQ_NOTIFY_NOT;
+				(*cq->ofa_cq.comp_handler)
+					(&cq->ofa_cq, cq->ofa_cq.cq_context);
+		}
+	} else {
+		if (!qp->srq)
+			atomic_inc(&qp->rq_space);
+		siw_wqe_put(wqe);
+	}
+}
+
+/*
+ * siw_sq_complete()
+ * Appends list of former SQ WQE's to CQ, if assigned.
+ * Must be called with qp state read locked
+ */
+void siw_sq_complete(struct list_head *c_list, struct siw_qp *qp, int num,
+		     enum ib_send_flags send_flags)
+{
+	struct siw_cq		*cq = qp->scq;
+	unsigned long flags;
+
+	if (cq) {
+		lock_cq_rxsave(cq, flags);
+
+		list_splice_tail(c_list, &cq->queue);
+		atomic_add(num, &cq->qlen); /* FIXME: test overflow */
+
+
+		dprint(DBG_WR, " CQ%d: add %d from QP%d, CQ len %d\n",
+			OBJ_ID(cq), num, QP_ID(qp), atomic_read(&cq->qlen));
+
+		/* XXX to be deferred to reaping */
+		atomic_add(num, &qp->sq_space);
+
+		if (cq->ofa_cq.comp_handler != NULL &&
+			((cq->notify & SIW_CQ_NOTIFY_ALL) ||
+			 (cq->notify == SIW_CQ_NOTIFY_SOLICITED &&
+			  send_flags & IB_SEND_SOLICITED))) {
+				cq->notify = SIW_CQ_NOTIFY_NOT;
+				(*cq->ofa_cq.comp_handler)
+					(&cq->ofa_cq, cq->ofa_cq.cq_context);
+		}
+		unlock_cq_rxsave(cq, flags);
+	} else {
+		struct list_head *pos;
+
+		list_for_each(pos, c_list)
+			siw_wqe_put(list_entry_wqe(pos));
+
+		atomic_add(num, &qp->sq_space);
+	}
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_debug.c b/drivers/infiniband/hw/softiwarp/siw_debug.c
new file mode 100644
index 0000000..6340272
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_debug.c
@@ -0,0 +1,198 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *          Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+
+
+void siw_print_qp_attr_mask(enum ib_qp_attr_mask attr_mask, char *msg)
+{
+	printk(KERN_INFO "-------- %s -------\n", msg);
+	if (IB_QP_STATE & attr_mask)
+		printk(KERN_INFO "IB_QP_STATE\n");
+	if (IB_QP_CUR_STATE & attr_mask)
+		printk(KERN_INFO "IB_QP_CUR_STATE\n");
+	if (IB_QP_EN_SQD_ASYNC_NOTIFY & attr_mask)
+		printk(KERN_INFO "IB_QP_EN_SQD_ASYNC_NOTIFY\n");
+	if (IB_QP_ACCESS_FLAGS & attr_mask)
+		printk(KERN_INFO "IB_QP_ACCESS_FLAGS\n");
+	if (IB_QP_PKEY_INDEX & attr_mask)
+		printk(KERN_INFO "IB_QP_PKEY_INDEX\n");
+	if (IB_QP_PORT & attr_mask)
+		printk(KERN_INFO "IB_QP_PORT\n");
+	if (IB_QP_QKEY & attr_mask)
+		printk(KERN_INFO "IB_QP_QKEY\n");
+	if (IB_QP_AV & attr_mask)
+		printk(KERN_INFO "IB_QP_AV\n");
+	if (IB_QP_PATH_MTU & attr_mask)
+		printk(KERN_INFO "IB_QP_PATH_MTU\n");
+	if (IB_QP_TIMEOUT & attr_mask)
+		printk(KERN_INFO "IB_QP_TIMEOUT\n");
+	if (IB_QP_RETRY_CNT & attr_mask)
+		printk(KERN_INFO "IB_QP_RETRY_CNT\n");
+	if (IB_QP_RNR_RETRY & attr_mask)
+		printk(KERN_INFO "IB_QP_RNR_RETRY\n");
+	if (IB_QP_RQ_PSN & attr_mask)
+		printk(KERN_INFO "IB_QP_RQ_PSN\n");
+	if (IB_QP_MAX_QP_RD_ATOMIC & attr_mask)
+		printk(KERN_INFO "IB_QP_MAX_QP_RD_ATOMIC\n");
+	if (IB_QP_ALT_PATH & attr_mask)
+		printk(KERN_INFO "IB_QP_ALT_PATH\n");
+	if (IB_QP_MIN_RNR_TIMER & attr_mask)
+		printk(KERN_INFO "IB_QP_MIN_RNR_TIMER\n");
+	if (IB_QP_SQ_PSN & attr_mask)
+		printk(KERN_INFO "IB_QP_SQ_PSN\n");
+	if (IB_QP_MAX_DEST_RD_ATOMIC & attr_mask)
+		printk(KERN_INFO "IB_QP_MAX_DEST_RD_ATOMIC\n");
+	if (IB_QP_PATH_MIG_STATE & attr_mask)
+		printk(KERN_INFO "IB_QP_PATH_MIG_STATE\n");
+	if (IB_QP_CAP & attr_mask)
+		printk(KERN_INFO "IB_QP_CAP\n");
+	if (IB_QP_DEST_QPN & attr_mask)
+		printk(KERN_INFO "IB_QP_DEST_QPN\n");
+	printk(KERN_INFO "-------- %s -(end)-\n", msg);
+}
+
+
+void siw_print_hdr(union iwarp_hdrs *hdr, int qp_id, char *msg)
+{
+	switch (hdr->ctrl.opcode) {
+
+	case RDMAP_RDMA_WRITE:
+		printk(KERN_INFO "QP%04d %s(WRITE, MPA len %d): %08x %016llx\n",
+			qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->rwrite.sink_stag, hdr->rwrite.sink_to);
+		break;
+
+	case RDMAP_RDMA_READ_REQ:
+		printk(KERN_INFO "QP%04d %s(RREQ, MPA len %d): %08x %08x "
+			"%08x %08x %016llx %08x %08x %016llx\n", qp_id, msg,
+			ntohs(hdr->ctrl.mpa_len),
+			hdr->rreq.ddp_qn, hdr->rreq.ddp_msn,
+			hdr->rreq.ddp_mo, hdr->rreq.sink_stag,
+			hdr->rreq.sink_to, hdr->rreq.read_size,
+			hdr->rreq.source_stag, hdr->rreq.source_to);
+
+		break;
+	case RDMAP_RDMA_READ_RESP:
+		printk(KERN_INFO "QP%04d %s(RRESP, MPA len %d): %08x %016llx\n",
+			qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->rresp.sink_stag, hdr->rresp.sink_to);
+		break;
+
+	case RDMAP_SEND:
+		printk(KERN_INFO "QP%04d %s(SEND, MPA len %d): %08x %08x "
+			"%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->send.ddp_qn, hdr->send.ddp_msn, hdr->send.ddp_mo);
+		break;
+
+	case RDMAP_SEND_INVAL:
+		printk(KERN_INFO "QP%04d %s(S_INV, MPA len %d): %08x %08x "
+			"%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->send.ddp_qn, hdr->send.ddp_msn,
+			hdr->send.ddp_mo);
+		break;
+
+	case RDMAP_SEND_SE:
+		printk(KERN_INFO "QP%04d %s(S_SE, MPA len %d): %08x %08x "
+			"%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->send.ddp_qn, hdr->send.ddp_msn,
+			hdr->send.ddp_mo);
+		break;
+
+	case RDMAP_SEND_SE_INVAL:
+		printk(KERN_INFO "QP%04d %s(S_SE_INV, MPA len %d): %08x %08x "
+			"%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len),
+			hdr->send.ddp_qn, hdr->send.ddp_msn,
+			hdr->send.ddp_mo);
+		break;
+
+	case RDMAP_TERMINATE:
+		printk(KERN_INFO "QP%04d %s(TERM, MPA len %d):\n", qp_id, msg,
+			ntohs(hdr->ctrl.mpa_len));
+		break;
+
+	default:
+		printk(KERN_INFO "QP%04d %s ?????\n", qp_id, msg);
+		break;
+	}
+}
+
+void siw_print_rctx(struct siw_iwarp_rx *rctx)
+{
+	printk(KERN_INFO "---RX Context-->\n");
+	siw_print_hdr(&rctx->hdr, RX_QPID(rctx), "\nCurrent Pkt:\t");
+	printk(KERN_INFO "Skbuf State:\tp:0x%p, new:%d, off:%d, copied:%d\n",
+		rctx->skb, rctx->skb_new, rctx->skb_offset, rctx->skb_copied);
+	printk(KERN_INFO "FPDU State:\trx_state:%d,\n\t\trcvd:%d, rem:%d, "
+		"pad:%d\n", rctx->state, rctx->fpdu_part_rcvd,
+		rctx->fpdu_part_rem, rctx->pad);
+	printk(KERN_INFO "Rx Mem:\t\tp:0x%p, chunk:0x%p,\n\t\tp_ix:%d, "
+		"p_off:%d, stag:0x%08x, mem_id:%d\n",
+		rctx->dest.wqe, rctx->umem_chunk, rctx->pg_idx, rctx->pg_off,
+		rctx->ddp_stag, rctx->ddp_stag >> 8);
+	printk(KERN_INFO "DDP State:\tprev_op:%d, first_seg:%d, "
+		"more_segs:%d\n", rctx->prev_ddp_opcode, rctx->first_ddp_seg,
+		rctx->more_ddp_segs);
+	printk(KERN_INFO "MPA State:\tlen:%d, crc_enabled:%d, crc:0x%x\n",
+		rctx->hdr.ctrl.mpa_len, rctx->crc_enabled, rctx->trailer.crc);
+	printk(KERN_INFO "<---------------\n");
+}
+
+#if DPRINT_MASK > 0
+char ib_qp_state_to_string[IB_QPS_ERR+1][sizeof "RESET"] = {
+	[IB_QPS_RESET]	= "RESET",
+	[IB_QPS_INIT]	= "INIT",
+	[IB_QPS_RTR]	= "RTR",
+	[IB_QPS_RTS]	= "RTS",
+	[IB_QPS_SQD]	= "SQD",
+	[IB_QPS_SQE]	= "SQE",
+	[IB_QPS_ERR]	= "ERR"
+};
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_debug.h b/drivers/infiniband/hw/softiwarp/siw_debug.h
new file mode 100644
index 0000000..58615fd
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_debug.h
@@ -0,0 +1,159 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_DEBUG_H
+#define _SIW_DEBUG_H
+
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>	/* in_interrupt() */
+
+/*
+ * dprint: Selective debug printing
+ *
+ * Use an OR combination of DBG_* as dbgcat in dprint*(dbgcat,...)
+ * to assign debug messages to categories:
+ *
+ * dbgcat	Debug message belongs to category
+ * -----------------------------------------------------------------------------
+ * DBG_ON	Always on, for really important events or error conditions
+ * DBG_TMP	Temporarily on for fine-grained debugging
+ * DBQ_OBJ	Object management (object construction/destruction/refcounting)
+ * DBG_MM	Memory management
+ * DBG_EH	Event handling (completion events and asynchronous events)
+ * DBG_CM	Connection management, QP states
+ * DBG_WR	Work requests
+ * DBG_TX	iWARP TX path
+ * DBG_RX	iWARP RX path
+ * DBG_SK	Socket operations
+ * DBG_KT	Kernel threads
+ * DBG_IRQ	Interrupt context (SoftIRQ or HardIRQ)
+ * DBG_DM	Device management
+ * DBG_HDR	Packet HDRs
+ * DBG_ALL	All categories above
+ */
+#define DBG_ON		0x00000001
+#define DBG_TMP		0x00000002
+#define DBG_OBJ		0x00000004
+#define DBG_MM		0x00000008
+#define DBG_EH		0x00000010
+#define DBG_CM		0x00000020
+#define DBG_WR		0x00000040
+#define DBG_TX		0x00000080
+#define DBG_RX		0x00000100
+#define DBG_SK		0x00000200
+#define DBG_KT		0x00000400
+#define DBG_IRQ		0x00000800
+#define DBG_DM		0x00001000
+#define DBG_HDR		0x00002000
+#define DBG_ALL		(DBG_IRQ|DBG_KT|DBG_SK|DBG_RX|DBG_TX|DBG_WR|\
+DBG_CM|DBG_EH|DBG_MM|DBG_OBJ|DBG_TMP|DBG_DM|DBG_ON|DBG_HDR)
+#define DBG_ALL_NOHDR	(DBG_IRQ|DBG_KT|DBG_SK|DBG_RX|DBG_TX|DBG_WR|\
+DBG_CM|DBG_EH|DBG_MM|DBG_OBJ|DBG_TMP|DBG_DM|DBG_ON)
+#define DBG_CTRL	(DBG_ON|DBG_CM|DBG_DM)
+
+/*
+ * Set DPRINT_MASK to tailor your debugging needs:
+ *
+ * DPRINT_MASK value		Enables debug messages for
+ * ---------------------------------------------------------------------
+ * DBG_ON			Important events / error conditions only
+ *				(minimum number of debug messages)
+ * OR-ed combination of DBG_*	Selective debugging
+ * DBG_KT|DBG_ON		Kernel threads
+ * DBG_ALL			All categories
+ */
+#define DPRINT_MASK	0
+
+extern void siw_print_hdr(union iwarp_hdrs *, int, char *);
+extern void siw_print_rctx(struct siw_iwarp_rx *);
+extern void siw_print_qp_attr_mask(enum ib_qp_attr_mask, char *);
+
+#if DPRINT_MASK > 0
+
+/**
+ * dprint - Selective debug print for process, SoftIRQ or HardIRQ context
+ *
+ * Debug print with selectable debug categories,
+ * starting with header
+ * - "( pid /cpu) __func__" for process context
+ * - "( irq /cpu) __func__" for IRQ context
+ *
+ * @dbgcat	: Set of debug categories (OR-ed combination of DBG_* above),
+ *		  to which this debug message is assigned.
+ * @fmt		: printf compliant format string
+ * @args	: printf compliant argument list
+ */
+#define dprint(dbgcat, fmt, args...)					\
+	do {								\
+		if ((dbgcat) & DPRINT_MASK) {				\
+			if (!in_interrupt())				\
+				printk(KERN_INFO "(%5d/%1d) %s" fmt,	\
+					current->pid,			\
+					current_thread_info()->cpu,	\
+					__func__, ## args);		\
+			else						\
+				printk(KERN_INFO "( irq /%1d) %s" fmt,	\
+					current_thread_info()->cpu,	\
+					__func__, ## args);		\
+		}							\
+	} while (0)
+
+
+#define siw_dprint_rctx(r)	siw_print_rctx(r)
+extern char ib_qp_state_to_string[IB_QPS_ERR+1][sizeof "RESET"];
+
+#else
+#define dprint(dbgcat, fmt, args...)	do { } while (0)
+#define siw_dprint_rctx(r)	do { } while (0)
+#endif
+
+
+#if DPRINT_MASK & DBG_HDR
+#define siw_dprint_hdr(h, i, m)	siw_print_hdr(h, i, m)
+#else
+#define siw_dprint_hdr(h, i, m)	do { } while (0)
+#endif
+
+#if DPRINT_MASK & DBG_CM
+#define siw_dprint_qp_attr_mask(mask)\
+		siw_print_qp_attr_mask(mask, (char *)__func__)
+#else
+#define siw_dprint_qp_attr_mask(mask)	do { } while (0)
+#endif
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_main.c b/drivers/infiniband/hw/softiwarp/siw_main.c
new file mode 100644
index 0000000..c97adee
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_main.c
@@ -0,0 +1,482 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_arp.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+#include "siw_verbs.h"
+
+
+MODULE_DESCRIPTION("Software iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+
+static int loopback_enabled;
+module_param(loopback_enabled, int, 0644);
+MODULE_PARM_DESC(loopback_enabled, "enable_loopback");
+
+struct siw_dev *siw_device;
+
+#if defined(KERNEL_VERSION_PRE_2_6_26) && (OFA_VERSION < 140)
+static ssize_t show_sw_version(struct class_device *class_dev, char *buf)
+{
+	struct siw_dev *siw_dev = container_of(class_dev, struct siw_dev,
+					       ofa_dev.class_dev);
+
+	return sprintf(buf, "%x\n", siw_dev->attrs.version);
+}
+
+static ssize_t show_if_type(struct class_device *class_dev, char *buf)
+{
+	struct siw_dev *siw_dev = container_of(class_dev, struct siw_dev,
+					       ofa_dev.class_dev);
+
+	return sprintf(buf, "%d\n", siw_dev->attrs.iftype);
+}
+#else
+static ssize_t show_sw_version(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct siw_dev *siw_dev = container_of(dev, struct siw_dev,
+						 ofa_dev.dev);
+
+	return sprintf(buf, "%x\n", siw_dev->attrs.version);
+}
+
+static ssize_t show_if_type(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct siw_dev *siw_dev = container_of(dev, struct siw_dev,
+					       ofa_dev.dev);
+
+	return sprintf(buf, "%d\n", siw_dev->attrs.iftype);
+}
+#endif
+
+#if defined(KERNEL_VERSION_PRE_2_6_26) && (OFA_VERSION < 140)
+static CLASS_DEVICE_ATTR(sw_version, S_IRUGO, show_sw_version, NULL);
+static CLASS_DEVICE_ATTR(if_type, S_IRUGO, show_if_type, NULL);
+
+static struct class_device_attribute *siw_dev_attributes[] = {
+	&class_device_attr_sw_version,
+	&class_device_attr_if_type
+};
+#else
+static DEVICE_ATTR(sw_version, S_IRUGO, show_sw_version, NULL);
+static DEVICE_ATTR(if_type, S_IRUGO, show_if_type, NULL);
+
+static struct device_attribute *siw_dev_attributes[] = {
+	&dev_attr_sw_version,
+	&dev_attr_if_type
+};
+#endif
+
+int siw_register_device(struct siw_dev *dev)
+{
+	struct ib_device *ibdev = &dev->ofa_dev;
+	int rv, i;
+
+	if (dev->l2dev->type != ARPHRD_LOOPBACK)
+		strlcpy(ibdev->name, "siw%d", IB_DEVICE_NAME_MAX);
+	else
+		strlcpy(ibdev->name, "siw_lo%d", IB_DEVICE_NAME_MAX);
+	memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
+	memcpy(&ibdev->node_guid, dev->l2dev->dev_addr, 6);
+
+	ibdev->owner = THIS_MODULE;
+
+	ibdev->uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+
+	ibdev->node_type = RDMA_NODE_RNIC;
+	memcpy(ibdev->node_desc, SIW_NODE_DESC, sizeof(SIW_NODE_DESC));
+
+	/*
+	 * Current model (one-to-one device association):
+	 * One Softiwarp device per net_device or, equivalently,
+	 * per physical port.
+	 */
+	ibdev->phys_port_cnt = 1;
+
+	ibdev->num_comp_vectors = 1;
+	/*
+	 * While DMA adresses are not used a device must be provided
+	 * as long as the code relies on OFA's ib_umem_get() function for
+	 * memory pinning. calling ib_umem_get() includes a
+	 * (for siw case useless) translation of memory to DMA
+	 * adresses for that device.
+	 */
+	ibdev->dma_device = dev->l2dev->dev.parent;
+	ibdev->query_device = siw_query_device;
+	ibdev->query_port = siw_query_port;
+	ibdev->query_qp = siw_query_qp;
+	ibdev->modify_port = NULL;
+	ibdev->query_pkey = siw_query_pkey;
+	ibdev->query_gid = siw_query_gid;
+	ibdev->alloc_ucontext = siw_alloc_ucontext;
+	ibdev->dealloc_ucontext = siw_dealloc_ucontext;
+	ibdev->mmap = siw_mmap;
+	ibdev->alloc_pd = siw_alloc_pd;
+	ibdev->dealloc_pd = siw_dealloc_pd;
+	ibdev->create_ah = siw_create_ah;
+	ibdev->destroy_ah = siw_destroy_ah;
+	ibdev->create_qp = siw_create_qp;
+	ibdev->modify_qp = siw_ofed_modify_qp;
+	ibdev->destroy_qp = siw_destroy_qp;
+	ibdev->create_cq = siw_create_cq;
+	ibdev->destroy_cq = siw_destroy_cq;
+	ibdev->resize_cq = NULL;
+	ibdev->poll_cq = siw_poll_cq;
+	ibdev->get_dma_mr = siw_get_dma_mr;
+	ibdev->reg_phys_mr = NULL;
+	ibdev->rereg_phys_mr = NULL;
+	ibdev->reg_user_mr = siw_reg_user_mr;
+	ibdev->dereg_mr = siw_dereg_mr;
+	ibdev->alloc_mw = NULL;
+	ibdev->bind_mw = NULL;
+	ibdev->dealloc_mw = NULL;
+
+	ibdev->create_srq = siw_create_srq;
+	ibdev->modify_srq = siw_modify_srq;
+	ibdev->query_srq = siw_query_srq;
+	ibdev->destroy_srq = siw_destroy_srq;
+	ibdev->post_srq_recv = siw_post_srq_recv;
+
+	ibdev->attach_mcast = NULL;
+	ibdev->detach_mcast = NULL;
+	ibdev->process_mad = siw_no_mad;
+
+	ibdev->req_notify_cq = siw_req_notify_cq;
+	ibdev->post_send = siw_post_send;
+	ibdev->post_recv = siw_post_receive;
+
+
+	ibdev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+	if (!ibdev->iwcm)
+		return -ENOMEM;
+
+	ibdev->iwcm->connect = siw_connect;
+	ibdev->iwcm->accept = siw_accept;
+	ibdev->iwcm->reject = siw_reject;
+	ibdev->iwcm->create_listen = siw_create_listen;
+	ibdev->iwcm->destroy_listen = siw_destroy_listen;
+	ibdev->iwcm->add_ref = siw_qp_get_ref;
+	ibdev->iwcm->rem_ref = siw_qp_put_ref;
+	ibdev->iwcm->get_qp = siw_get_ofaqp;
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 34)
+	rv = ib_register_device(ibdev, NULL);
+#else
+	rv = ib_register_device(ibdev);
+#endif
+	if (rv) {
+		dprint(DBG_DM|DBG_ON, "(dev=%s): "
+			"ib_register_device failed: rv=%d\n", ibdev->name, rv);
+		return rv;
+	}
+
+	/*
+	 * set and register sw version + user if type
+	 */
+	dev->attrs.version = VERSION_ID_SOFTIWARP;
+	dev->attrs.iftype  = SIW_IF_OFED;
+
+	dev->attrs.vendor_id = SIW_VENDOR_ID;
+	dev->attrs.vendor_part_id = SIW_VENDORT_PART_ID;
+	dev->attrs.sw_version = SIW_SW_VERSION;
+	dev->attrs.max_qp = SIW_MAX_QP;
+	dev->attrs.max_qp_wr = SIW_MAX_QP_WR;
+	dev->attrs.max_ord = SIW_MAX_ORD;
+	dev->attrs.max_ird = SIW_MAX_IRD;
+	dev->attrs.cap_flags = 0;
+	dev->attrs.max_sge = SIW_MAX_SGE;
+	dev->attrs.max_sge_rd = SIW_MAX_SGE_RD;
+	dev->attrs.max_cq = SIW_MAX_CQ;
+	dev->attrs.max_cqe = SIW_MAX_CQE;
+	dev->attrs.max_mr = SIW_MAX_MR;
+	dev->attrs.max_mr_size = SIW_MAX_MR_SIZE;
+	dev->attrs.max_pd = SIW_MAX_PD;
+	dev->attrs.max_mw = SIW_MAX_MW;
+	dev->attrs.max_fmr = SIW_MAX_FMR;
+	dev->attrs.max_srq = SIW_MAX_SRQ;
+	dev->attrs.max_srq_wr = SIW_MAX_SRQ_WR;
+	dev->attrs.max_srq_sge = SIW_MAX_SGE;
+
+	siw_idr_init(dev);
+
+	atomic_set(&dev->num_srq, 0);
+	atomic_set(&dev->num_qp, 0);
+	atomic_set(&dev->num_cq, 0);
+	atomic_set(&dev->num_mem, 0);
+	atomic_set(&dev->num_pd, 0);
+
+	for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) {
+#if defined(KERNEL_VERSION_PRE_2_6_26) && (OFA_VERSION < 140)
+		rv = class_device_create_file(&ibdev->class_dev,
+					      siw_dev_attributes[i]);
+#else
+		rv = device_create_file(&ibdev->dev, siw_dev_attributes[i]);
+#endif
+		if (rv) {
+			dprint(DBG_DM|DBG_ON, "(dev=%s): "
+				"device_create_file failed: i=%d, rv=%d\n",
+				ibdev->name, i, rv);
+			ib_unregister_device(ibdev);
+			return rv;
+		}
+	}
+
+	dprint(DBG_DM, ": Registered '%s' for interface '%s', "
+		"HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n",
+		ibdev->name, dev->l2dev->name,
+		*(u8 *)dev->l2dev->dev_addr,
+		*((u8 *)dev->l2dev->dev_addr + 1),
+		*((u8 *)dev->l2dev->dev_addr + 2),
+		*((u8 *)dev->l2dev->dev_addr + 3),
+		*((u8 *)dev->l2dev->dev_addr + 4),
+		*((u8 *)dev->l2dev->dev_addr + 5));
+	return 0;
+}
+
+void siw_deregister_device(struct siw_dev *dev)
+{
+	int i;
+
+	siw_idr_release(dev);
+
+	WARN_ON(atomic_read(&dev->num_srq) || atomic_read(&dev->num_qp) ||
+		atomic_read(&dev->num_cq) || atomic_read(&dev->num_mem) ||
+		atomic_read(&dev->num_pd));
+
+	for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i)
+#if defined(KERNEL_VERSION_PRE_2_6_26) && (OFA_VERSION < 140)
+		class_device_remove_file(&dev->ofa_dev.class_dev,
+					 siw_dev_attributes[i]);
+#else
+		device_remove_file(&dev->ofa_dev.dev, siw_dev_attributes[i]);
+#endif
+
+	dprint(DBG_OBJ, ": Unregister '%s' for interface '%s'\n",
+		dev->ofa_dev.name, dev->l2dev->name);
+
+	ib_unregister_device(&dev->ofa_dev);
+}
+
+
+/*
+ * siw_init_module - Initialize Softiwarp module and create Softiwarp devices
+ *
+ * There are three design options for Softiwarp device management supporting
+ * - multiple physical Ethernet ports, i.e., multiple net_device instances
+ * - and multi-homing, i.e., multiple IP addresses associated with net_device,
+ * as follows:
+ *
+ *    Option 1: One Softiwarp device per net_device and
+ *              IP address associated with the net_device
+ *    Option 2: One Softiwarp device per net_device
+ *              (and all IP addresses associated with the net_device)
+ *    Option 3: Single Softiwarp device for all net_device instances
+ *              (and all IP addresses associated with these instances)
+ *
+ * We currently use Option 2, registering a separate siw_dev for
+ * each net_device.
+ *
+ * TODO: Dynamic device management (network device registration/removal).
+ *       IPv6 support.
+ */
+static __init int siw_init_module(void)
+{
+	struct net_device	*dev;
+	struct siw_dev		*siw_p;
+	int rv = 0;
+
+	/*
+	 * Identify all net_device instances and create a
+	 * Softiwarp device for each net_device supporting IPv4
+	 *
+	 * TODO:
+	 * - Do we have to generalize for IPv6?
+	 * - Exclude devices based on IPoIB - if any
+	 * - Consider excluding Ethernet devices with an
+	 *   associated iWARP hardware device
+	 */
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		struct in_device *in_dev;
+
+		in_dev = in_dev_get(dev);
+		if (!in_dev) {
+			dprint(DBG_DM, ": Skipped %s (no in_dev)\n", dev->name);
+			continue;
+		}
+		if (!in_dev->ifa_list) {
+			dprint(DBG_DM, ": Skipped %s (no ifa)\n", dev->name);
+			in_dev_put(in_dev);
+			continue;
+		}
+		/*
+		 * This device has an in_device attached. Attach to it
+		 * if it is LOOPBACK or ETHER or IEEE801-TR device.
+		 *
+		 * Additional hardware support can be added here
+		 * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see
+		 * <linux/if_arp.h> for type identifiers.
+		 *
+		 * NOTE: ARPHRD_TUNNEL/6 are excluded.
+		 */
+		if (dev->type == ARPHRD_ETHER ||
+		    dev->type == ARPHRD_IEEE802 ||
+		    (dev->type == ARPHRD_LOOPBACK && loopback_enabled)) {
+#ifdef CHECK_DMA_CAPABILITIES
+			if (!dev->dev.parent || !get_dma_ops(dev->dev.parent)) {
+				dprint(DBG_DM|DBG_ON,
+					": No DMA capabilities: %s (skipped)\n",
+					dev->name);
+				in_dev_put(in_dev);
+				continue;
+			}
+#endif
+			siw_p =
+			      (struct siw_dev *)ib_alloc_device(sizeof *siw_p);
+
+			if (!siw_p) {
+				in_dev_put(in_dev);
+				rv = -ENOMEM;
+				break;
+			}
+			if (!siw_device) {
+				siw_device = siw_p;
+				siw_p->next = NULL;
+			} else {
+				siw_p->next = siw_device->next;
+				siw_device->next = siw_p;
+			}
+			siw_p->l2dev = dev;
+
+			rv = siw_register_device(siw_p);
+			if (rv) {
+				if (siw_device != siw_p)
+					siw_device->next = siw_p->next;
+				else
+					siw_device = NULL;
+
+				in_dev_put(in_dev);
+				ib_dealloc_device(&siw_p->ofa_dev);
+
+				break;
+			}
+		} else {
+			dprint(DBG_DM, ": Skipped %s (type %d)\n",
+				dev->name, dev->type);
+			in_dev_put(in_dev);
+		}
+	}
+	rtnl_unlock();
+
+	if (!siw_device)
+		return -ENODEV;
+
+	if (rv)
+		return rv;
+	/*
+	 * FIXME: In case of error, we leave devices allocated.
+	 *        Is this correct?
+	 */
+	rv = siw_cm_init();
+	if (rv)
+		return rv;
+
+	rv = siw_sq_worker_init();
+
+	printk(KERN_INFO "SoftIWARP attached\n");
+	return rv;
+}
+
+static void __exit siw_exit_module(void)
+{
+	struct siw_dev	*siw_p;
+
+	siw_sq_worker_exit();
+	siw_cm_exit();
+
+	while (siw_device) {
+		siw_p = siw_device->next;
+		siw_deregister_device(siw_device);
+		in_dev_put(siw_device->l2dev->ip_ptr);
+		ib_dealloc_device(&siw_device->ofa_dev);
+		siw_device = siw_p;
+	}
+	printk(KERN_INFO "SoftIWARP detached\n");
+}
+
+module_init(siw_init_module);
+module_exit(siw_exit_module);
diff --git a/drivers/infiniband/hw/softiwarp/siw_obj.c b/drivers/infiniband/hw/softiwarp/siw_obj.c
new file mode 100644
index 0000000..b5a1a3d
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_obj.c
@@ -0,0 +1,499 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+void siw_objhdr_init(struct siw_objhdr *hdr)
+{
+	kref_init(&hdr->ref);
+}
+
+void siw_idr_init(struct siw_dev *dev)
+{
+	spin_lock_init(&dev->idr_lock);
+
+	idr_init(&dev->qp_idr);
+	idr_init(&dev->cq_idr);
+	idr_init(&dev->pd_idr);
+	idr_init(&dev->mem_idr);
+}
+
+void siw_idr_release(struct siw_dev *dev)
+{
+	idr_destroy(&dev->qp_idr);
+	idr_destroy(&dev->cq_idr);
+	idr_destroy(&dev->pd_idr);
+	idr_destroy(&dev->mem_idr);
+}
+
+static inline int siw_add_obj(spinlock_t *lock, struct idr *idr,
+			      struct siw_objhdr *obj)
+{
+	u32		pre_id, id;
+	unsigned long	flags;
+	int		rv;
+
+	get_random_bytes(&pre_id, sizeof pre_id);
+	pre_id &= 0xffff;
+again:
+	do {
+		if (!(idr_pre_get(idr, GFP_KERNEL)))
+			return -ENOMEM;
+
+		spin_lock_irqsave(lock, flags);
+		rv = idr_get_new_above(idr, obj, pre_id, &id);
+		spin_unlock_irqrestore(lock, flags);
+
+	} while  (rv == -EAGAIN);
+
+	if (rv == 0) {
+		siw_objhdr_init(obj);
+		obj->id = id;
+		dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id);
+	} else if (rv == -ENOSPC && pre_id != 1) {
+		pre_id = 1;
+		goto again;
+	} else {
+		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
+	}
+	return rv;
+}
+
+static inline struct siw_objhdr *siw_get_obj(struct idr *idr, int id)
+{
+	struct siw_objhdr *obj;
+
+	obj = idr_find(idr, id);
+	if (obj)
+		kref_get(&obj->ref);
+
+	return obj;
+}
+
+struct siw_cq *siw_cq_id2obj(struct siw_dev *dev, int id)
+{
+	struct siw_objhdr *obj = siw_get_obj(&dev->cq_idr, id);
+	if (obj)
+		return container_of(obj, struct siw_cq, hdr);
+
+	return NULL;
+}
+
+struct siw_qp *siw_qp_id2obj(struct siw_dev *dev, int id)
+{
+	struct siw_objhdr *obj = siw_get_obj(&dev->qp_idr, id);
+	if (obj)
+		return container_of(obj, struct siw_qp, hdr);
+
+	return NULL;
+}
+
+/*
+ * siw_mem_id2obj()
+ *
+ * resolves memory from stag given by id. might be called from:
+ * o process context before sending out of sgl
+ * o or in softirq when resolving target memory
+ */
+struct siw_mem *siw_mem_id2obj(struct siw_dev *dev, int id)
+{
+	struct siw_objhdr *obj;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->idr_lock, flags);
+	obj = siw_get_obj(&dev->mem_idr, id);
+	spin_unlock_irqrestore(&dev->idr_lock, flags);
+
+	if (obj) {
+		dprint(DBG_MM|DBG_OBJ, "(MEM%d): New refcount: %d\n",
+		       obj->id, obj->ref.refcount.counter);
+
+		return container_of(obj, struct siw_mem, hdr);
+	}
+	dprint(DBG_MM|DBG_OBJ|DBG_ON, "(MEM%d): not found!\n", id);
+
+	return NULL;
+}
+
+int siw_qp_add(struct siw_dev *dev, struct siw_qp *qp)
+{
+	int rv = siw_add_obj(&dev->idr_lock, &dev->qp_idr, &qp->hdr);
+	if (!rv) {
+		dprint(DBG_OBJ, "(QP%d): New Object\n", QP_ID(qp));
+		qp->hdr.dev = dev;
+	}
+	return rv;
+}
+
+int siw_cq_add(struct siw_dev *dev, struct siw_cq *cq)
+{
+	int rv = siw_add_obj(&dev->idr_lock, &dev->cq_idr, &cq->hdr);
+	if (!rv) {
+		dprint(DBG_OBJ, "(CQ%d): New Object\n", cq->hdr.id);
+		cq->hdr.dev = dev;
+	}
+	return rv;
+}
+
+int siw_pd_add(struct siw_dev *dev, struct siw_pd *pd)
+{
+	int rv = siw_add_obj(&dev->idr_lock, &dev->pd_idr, &pd->hdr);
+	if (!rv) {
+		dprint(DBG_OBJ, "(PD%d): New Object\n", pd->hdr.id);
+		pd->hdr.dev = dev;
+	}
+	return rv;
+}
+
+/*
+ * Stag lookup is based on its index part only (24 bits)
+ * It is assumed that the idr_get_new_above(,,1,) function will
+ * always return a new id within this range (0x1...0xffffff),
+ * if one is available.
+ * The code avoids special Stag of zero and tries to randomize
+ * STag values.
+ */
+int siw_mem_add(struct siw_dev *dev, struct siw_mem *m)
+{
+	u32		id, pre_id;
+	unsigned long	flags;
+	int		rv;
+
+	do {
+		get_random_bytes(&pre_id, sizeof pre_id);
+		pre_id &= 0xffff;
+	} while (pre_id == 0);
+again:
+	do {
+		if (!(idr_pre_get(&dev->mem_idr, GFP_KERNEL)))
+			return -ENOMEM;
+
+		spin_lock_irqsave(&dev->idr_lock, flags);
+		rv = idr_get_new_above(&dev->mem_idr, m, pre_id, &id);
+		spin_unlock_irqrestore(&dev->idr_lock, flags);
+
+	} while (rv == -EAGAIN);
+
+	if (rv == -ENOSPC || (rv == 0 && id > SIW_STAG_MAX)) {
+		if (rv == 0) {
+			spin_lock_irqsave(&dev->idr_lock, flags);
+			idr_remove(&dev->mem_idr, id);
+			spin_unlock_irqrestore(&dev->idr_lock, flags);
+		}
+		if (pre_id == 1) {
+			dprint(DBG_OBJ|DBG_MM|DBG_ON,
+				"(IDR): New Object failed: %d\n", pre_id);
+			return -ENOSPC;
+		}
+		pre_id = 1;
+		goto again;
+	} else if (rv) {
+		dprint(DBG_OBJ|DBG_MM|DBG_ON,
+			"(IDR%d): New Object failed: rv %d\n", id, rv);
+		return rv;
+	}
+	siw_objhdr_init(&m->hdr);
+	m->hdr.id = id;
+	m->hdr.dev = dev;
+	dprint(DBG_OBJ|DBG_MM, "(IDR%d): New Object\n", id);
+
+	return 0;
+}
+
+void siw_remove_obj(spinlock_t *lock, struct idr *idr,
+		      struct siw_objhdr *hdr)
+{
+	unsigned long	flags;
+
+	dprint(DBG_OBJ, "(OBJ%d): IDR Remove Object\n", hdr->id);
+
+	spin_lock_irqsave(lock, flags);
+	idr_remove(idr, hdr->id);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+
+/********** routines to put objs back and free if no ref left *****/
+
+static void siw_free_cq(struct kref *ref)
+{
+	struct siw_cq *cq =
+		(container_of(container_of(ref, struct siw_objhdr, ref),
+			      struct siw_cq, hdr));
+
+	dprint(DBG_OBJ, "(CQ%d): Free Object\n", cq->hdr.id);
+
+	kfree(cq);
+}
+
+static void siw_free_qp(struct kref *ref)
+{
+	struct siw_qp	*qp =
+		container_of(container_of(ref, struct siw_objhdr, ref),
+			     struct siw_qp, hdr);
+
+	dprint(DBG_OBJ|DBG_CM, "(QP%d): Free Object\n", QP_ID(qp));
+
+	if (qp->cep)
+		siw_cep_put(qp->cep);
+
+	kfree(qp);
+}
+
+static void siw_free_pd(struct kref *ref)
+{
+	struct siw_pd	*pd =
+		container_of(container_of(ref, struct siw_objhdr, ref),
+			     struct siw_pd, hdr);
+
+	dprint(DBG_OBJ, "(PD%d): Free Object\n", pd->hdr.id);
+
+	kfree(pd);
+}
+
+static void siw_free_mem(struct kref *ref)
+{
+	struct siw_mem *m;
+
+	m = container_of(container_of(ref, struct siw_objhdr, ref),
+			 struct siw_mem, hdr);
+
+	dprint(DBG_MM|DBG_OBJ, "(MEM%d): Free Object\n", OBJ_ID(m));
+
+	if (SIW_MEM_IS_MW(m)) {
+		struct siw_mw *mw = container_of(m, struct siw_mw, mem);
+		kfree(mw);
+	} else {
+		struct siw_mr *mr = container_of(m, struct siw_mr, mem);
+		dprint(DBG_MM|DBG_OBJ, "(MEM%d): Release UMem\n", OBJ_ID(m));
+		ib_umem_release(mr->umem);
+		kfree(mr);
+	}
+}
+
+
+void siw_cq_put(struct siw_cq *cq)
+{
+	dprint(DBG_OBJ, "(CQ%d): Old refcount: %d\n",
+		OBJ_ID(cq), atomic_read(&cq->hdr.ref.refcount));
+	kref_put(&cq->hdr.ref, siw_free_cq);
+}
+
+void siw_qp_put(struct siw_qp *qp)
+{
+	dprint(DBG_OBJ, "(QP%d): Old refcount: %d\n",
+		QP_ID(qp), atomic_read(&qp->hdr.ref.refcount));
+	kref_put(&qp->hdr.ref, siw_free_qp);
+}
+
+void siw_pd_put(struct siw_pd *pd)
+{
+	dprint(DBG_OBJ, "(PD%d): Old refcount: %d\n",
+		OBJ_ID(pd), atomic_read(&pd->hdr.ref.refcount));
+	kref_put(&pd->hdr.ref, siw_free_pd);
+}
+
+void siw_mem_put(struct siw_mem *m)
+{
+	dprint(DBG_MM|DBG_OBJ, "(MEM%d): Old refcount: %d\n",
+		OBJ_ID(m), atomic_read(&m->hdr.ref.refcount));
+	kref_put(&m->hdr.ref, siw_free_mem);
+}
+
+
+/***** routines for WQE handling ***/
+
+/*
+ * siw_wqe_get()
+ *
+ * Get new WQE. For READ RESPONSE, take it from the free list which
+ * has a maximum size of maximum inbound READs. All other WQE are
+ * malloc'ed which creates some overhead. Consider change to
+ *
+ * 1. malloc WR only if it cannot be synchonously completed, or
+ * 2. operate own cache of reuseable WQE's.
+ *
+ * Current code trusts on malloc efficiency.
+ */
+inline struct siw_wqe *siw_wqe_get(struct siw_qp *qp, enum siw_wr_opcode op)
+{
+	struct siw_wqe *wqe;
+
+	if (op == SIW_WR_RDMA_READ_RESP) {
+		spin_lock(&qp->freelist_lock);
+		if (!(list_empty(&qp->wqe_freelist))) {
+			wqe = list_entry(qp->wqe_freelist.next,
+					 struct siw_wqe, list);
+			list_del(&wqe->list);
+			spin_unlock(&qp->freelist_lock);
+			wqe->processed = 0;
+			dprint(DBG_OBJ|DBG_WR,
+				"(QP%d): WQE from FreeList p: %p\n",
+				QP_ID(qp), wqe);
+		} else {
+			spin_unlock(&qp->freelist_lock);
+			wqe = NULL;
+			dprint(DBG_ON|DBG_OBJ|DBG_WR,
+				"(QP%d): FreeList empty!\n", QP_ID(qp));
+		}
+	} else {
+		wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
+		dprint(DBG_OBJ|DBG_WR, "(QP%d): New WQE p: %p\n",
+			QP_ID(qp), wqe);
+	}
+	if (wqe) {
+		INIT_LIST_HEAD(&wqe->list);
+		siw_qp_get(qp);
+		wqe->qp = qp;
+	}
+	return wqe;
+}
+
+inline struct siw_wqe *siw_srq_wqe_get(struct siw_srq *srq)
+{
+	struct siw_wqe *wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
+
+	dprint(DBG_OBJ|DBG_WR, "(SRQ%p): New WQE p: %p\n", srq, wqe);
+	if (wqe) {
+		/* implicite: wqe->qp = NULL; */
+		INIT_LIST_HEAD(&wqe->list);
+		wqe->qp = NULL;
+	}
+	return wqe;
+}
+
+/*
+ * siw_srq_fetch_wqe()
+ *
+ * fetch one RQ wqe from the SRQ and inform user
+ * if SRQ lower watermark reached
+ */
+inline struct siw_wqe *siw_srq_fetch_wqe(struct siw_qp *qp)
+{
+	struct siw_wqe *wqe;
+	struct siw_srq *srq = qp->srq;
+	int qlen;
+
+	lock_srq(srq);
+	if (!list_empty(&srq->rq)) {
+		wqe = list_first_wqe(&srq->rq);
+		list_del_init(&wqe->list);
+		qlen = srq->max_wr - atomic_inc_return(&srq->space);
+		unlock_srq(srq);
+		wqe->qp = qp;
+		if (srq->armed && qlen < srq->limit) {
+			srq->armed = 0;
+			siw_async_srq_ev(srq, IB_EVENT_SRQ_LIMIT_REACHED);
+		}
+		return wqe;
+	}
+	unlock_srq(srq);
+	return NULL;
+}
+
+inline void siw_free_inline_sgl(struct siw_sge *sge, int num_sge)
+{
+	while (num_sge--) {
+		kfree(sge->mem.buf); /* kfree handles NULL pointers */
+		sge++;
+	}
+}
+
+inline void siw_unref_mem_sgl(struct siw_sge *sge, int num_sge)
+{
+	while (num_sge--) {
+		if (sge->mem.obj != NULL)
+			siw_mem_put(sge->mem.obj);
+		sge++;
+	}
+}
+
+
+void siw_wqe_put(struct siw_wqe *wqe)
+{
+	struct siw_qp *qp = wqe->qp;
+	unsigned long flags;
+
+	dprint(DBG_OBJ|DBG_WR, " WQE: %llu:, type: %d, p: %p\n",
+		(unsigned long long)wr_id(wqe), wr_type(wqe), wqe);
+
+	switch (wr_type(wqe)) {
+
+	case SIW_WR_SEND:
+	case SIW_WR_RDMA_WRITE:
+		if (likely(!SIW_INLINED_DATA(wqe)))
+			siw_unref_mem_sgl(wqe->wr.sgl.sge,
+					  wqe->wr.sgl.num_sge);
+		else
+			siw_free_inline_sgl(wqe->wr.sgl.sge,
+					    wqe->wr.sgl.num_sge);
+	case SIW_WR_RDMA_WRITE_WITH_IMM:
+	case SIW_WR_SEND_WITH_IMM:
+		kfree(wqe);
+		break;
+
+	case SIW_WR_RECEIVE:
+	case SIW_WR_RDMA_READ_REQ:
+		siw_unref_mem_sgl(wqe->wr.sgl.sge, wqe->wr.sgl.num_sge);
+		kfree(wqe);
+		break;
+
+	case SIW_WR_RDMA_READ_RESP:
+		siw_unref_mem_sgl(wqe->wr.sgl.sge, 1);
+		wqe->wr.sgl.sge[0].mem.obj = NULL;
+		/*
+		 * freelist can be accessed by tx processing (rresp done)
+		 * and rx softirq (get new wqe for rresponse scheduling)
+		 */
+		INIT_LIST_HEAD(&wqe->list);
+		spin_lock_irqsave(&wqe->qp->freelist_lock, flags);
+		list_add_tail(&wqe->list, &wqe->qp->wqe_freelist);
+		spin_unlock_irqrestore(&wqe->qp->freelist_lock, flags);
+		break;
+
+	default:
+		WARN_ON(1);
+	}
+	siw_qp_put(qp);
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_obj.h b/drivers/infiniband/hw/softiwarp/siw_obj.h
new file mode 100644
index 0000000..6fa8084
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_obj.h
@@ -0,0 +1,114 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_OBJ_H
+#define _SIW_OBJ_H
+
+#include <linux/idr.h>
+#include <linux/rwsem.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
+#include <asm/semaphore.h>
+#else
+#include <linux/semaphore.h>
+#endif
+
+#include <rdma/ib_verbs.h>
+
+#include "siw_debug.h"
+
+
+static inline struct siw_dev *siw_dev_ofa2siw(struct ib_device *ofa_dev)
+{
+	return container_of(ofa_dev, struct siw_dev, ofa_dev);
+}
+
+static inline void siw_cq_get(struct siw_cq *cq)
+{
+	kref_get(&cq->hdr.ref);
+	dprint(DBG_OBJ, "(CQ%d): New refcount: %d\n",
+		OBJ_ID(cq), atomic_read(&cq->hdr.ref.refcount));
+}
+static inline void siw_qp_get(struct siw_qp *qp)
+{
+	kref_get(&qp->hdr.ref);
+	dprint(DBG_OBJ, "(QP%d): New refcount: %d\n",
+		OBJ_ID(qp), atomic_read(&qp->hdr.ref.refcount));
+}
+static inline void siw_pd_get(struct siw_pd *pd)
+{
+	kref_get(&pd->hdr.ref);
+	dprint(DBG_OBJ, "(PD%d): New refcount: %d\n",
+		OBJ_ID(pd), atomic_read(&pd->hdr.ref.refcount));
+}
+static inline void siw_mem_get(struct siw_mem *mem)
+{
+	kref_get(&mem->hdr.ref);
+	dprint(DBG_OBJ|DBG_MM, "(MEM%d): New refcount: %d\n",
+		OBJ_ID(mem), atomic_read(&mem->hdr.ref.refcount));
+}
+
+extern void siw_remove_obj(spinlock_t *lock, struct idr *idr,
+				struct siw_objhdr *hdr);
+
+extern void siw_objhdr_init(struct siw_objhdr *);
+extern void siw_idr_init(struct siw_dev *);
+extern void siw_idr_release(struct siw_dev *);
+
+extern struct siw_cq *siw_cq_id2obj(struct siw_dev *, int);
+extern struct siw_qp *siw_qp_id2obj(struct siw_dev *, int);
+extern struct siw_mem *siw_mem_id2obj(struct siw_dev *, int);
+
+extern int siw_qp_add(struct siw_dev *, struct siw_qp *);
+extern int siw_cq_add(struct siw_dev *, struct siw_cq *);
+extern int siw_pd_add(struct siw_dev *, struct siw_pd *);
+extern int siw_mem_add(struct siw_dev *, struct siw_mem *m);
+
+extern struct siw_wqe *siw_wqe_get(struct siw_qp *, enum siw_wr_opcode);
+extern struct siw_wqe *siw_srq_wqe_get(struct siw_srq *);
+extern struct siw_wqe *siw_srq_fetch_wqe(struct siw_qp *);
+
+extern void siw_cq_put(struct siw_cq *);
+extern void siw_qp_put(struct siw_qp *);
+extern void siw_pd_put(struct siw_pd *);
+extern void siw_mem_put(struct siw_mem *);
+extern void siw_wqe_put(struct siw_wqe *);
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_qp.c b/drivers/infiniband/hw/softiwarp/siw_qp.c
new file mode 100644
index 0000000..42bc143
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_qp.c
@@ -0,0 +1,989 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *          Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/file.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
+	[SIW_QP_STATE_IDLE]		= "IDLE",
+	[SIW_QP_STATE_RTR]		= "RTR",
+	[SIW_QP_STATE_RTS]		= "RTS",
+	[SIW_QP_STATE_CLOSING]		= "CLOSING",
+	[SIW_QP_STATE_TERMINATE]	= "TERMINATE",
+	[SIW_QP_STATE_ERROR]		= "ERROR",
+	[SIW_QP_STATE_MORIBUND]		= "MORIBUND",
+	[SIW_QP_STATE_UNDEF]		= "UNDEF"
+};
+
+
+/*
+ * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
+ * per-RDMAP message basis. Please keep order of initializer. All MPA len
+ * is initialized to minimum packet size.
+ */
+struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] =
+{ {
+	.hdr_len = sizeof(struct iwarp_rdma_write),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_RDMA_WRITE,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 1,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_write
+},
+{
+	.hdr_len = sizeof(struct iwarp_rdma_rreq),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_RDMA_READ_REQ,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_rreq
+},
+{
+	.hdr_len = sizeof(struct iwarp_rdma_rresp),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_RDMA_READ_RESP,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 1,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_rresp
+},
+{
+	.hdr_len = sizeof(struct iwarp_send),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_SEND,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_send
+},
+{
+	.hdr_len = sizeof(struct iwarp_send_inv),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_SEND_INVAL,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_unsupp
+},
+{
+	.hdr_len = sizeof(struct iwarp_send),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_SEND_SE,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_send
+},
+{
+	.hdr_len = sizeof(struct iwarp_send_inv),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_SEND_SE_INVAL,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_unsupp
+},
+{
+	.hdr_len = sizeof(struct iwarp_terminate),
+	.ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
+	.ctrl.dv = DDP_VERSION,
+	.ctrl.opcode = RDMAP_TERMINATE,
+	.ctrl.rv = RDMAP_VERSION,
+	.ctrl.t = 0,
+	.ctrl.l = 1,
+	.proc_data = siw_proc_terminate
+} };
+
+
+static void siw_qp_llp_data_ready(struct sock *sk, int flags)
+{
+	struct siw_qp		*qp;
+
+	read_lock(&sk->sk_callback_lock);
+
+	if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) {
+		dprint(DBG_ON, " No QP: %p\n", sk->sk_user_data);
+		goto done;
+	}
+	qp = sk_to_qp(sk);
+
+	if (down_read_trylock(&qp->state_lock)) {
+		read_descriptor_t	rd_desc = {.arg.data = qp, .count = 1};
+
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"state (before tcp_read_sock)=%d, flags=%x\n",
+			QP_ID(qp), qp->attrs.state, flags);
+
+		if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
+			/*
+			 * Implements data receive operation during
+			 * socket callback. TCP gracefully catches
+			 * the case where there is nothing to receive
+			 * (not calling siw_tcp_rx_data() then).
+			 */
+			tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
+
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"state (after tcp_read_sock)=%d, flags=%x\n",
+			QP_ID(qp), qp->attrs.state, flags);
+
+		up_read(&qp->state_lock);
+	} else {
+		dprint(DBG_SK|DBG_RX, "(QP%d): "
+			"Unable to acquire state_lock\n", QP_ID(qp));
+	}
+done:
+	read_unlock(&sk->sk_callback_lock);
+}
+
+
+void siw_qp_llp_close(struct siw_qp *qp)
+{
+	dprint(DBG_CM, "(QP%d): Enter: SIW QP state = %s, cep=0x%p\n",
+		QP_ID(qp), siw_qp_state_to_string[qp->attrs.state],
+		qp->cep);
+
+	down_write(&qp->state_lock);
+
+	qp->rx_ctx.rx_suspend = 1;
+	qp->tx_ctx.tx_suspend = 1;
+	qp->attrs.llp_stream_handle = NULL;
+
+	switch (qp->attrs.state) {
+
+	case SIW_QP_STATE_RTS:
+	case SIW_QP_STATE_RTR:
+	case SIW_QP_STATE_IDLE:
+	case SIW_QP_STATE_TERMINATE:
+
+		qp->attrs.state = SIW_QP_STATE_ERROR;
+
+		break;
+	/*
+	 * SIW_QP_STATE_CLOSING:
+	 *
+	 * This is a forced close. shall the QP be moved to
+	 * ERROR or IDLE ?
+	 */
+	case SIW_QP_STATE_CLOSING:
+		if (!TX_IDLE(qp))
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+		else
+			qp->attrs.state = SIW_QP_STATE_IDLE;
+
+		break;
+
+	default:
+		dprint(DBG_CM, " No state transition needed: %d\n",
+			qp->attrs.state);
+		break;
+	}
+	siw_sq_flush(qp);
+	siw_rq_flush(qp);
+
+	up_write(&qp->state_lock);
+
+	dprint(DBG_CM, "(QP%d): Exit: SIW QP state = %s\n",
+		QP_ID(qp), siw_qp_state_to_string[qp->attrs.state]);
+}
+
+
+/*
+ * socket callback routine informing about newly available send space.
+ * Function schedules SQ work for processing SQ items.
+ */
+static void siw_qp_llp_write_space(struct sock *sk)
+{
+	struct siw_qp	*qp = sk_to_qp(sk);
+
+	/*
+	 * TODO:
+	 * Resemble sk_stream_write_space() logic for iWARP constraints:
+	 * Clear SOCK_NOSPACE only if sendspace may hold some reasonable
+	 * sized FPDU.
+	 */
+#ifdef SIW_TX_FULLSEGS
+	struct socket *sock = sk->sk_socket;
+	if (sk_stream_wspace(sk) >= (int)qp->tx_ctx.fpdu_len && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+		siw_sq_queue_work(qp);
+	}
+#else
+	sk_stream_write_space(sk);
+
+	if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+		siw_sq_queue_work(qp);
+#endif
+}
+
+static void siw_qp_socket_assoc(struct socket *s, struct siw_qp *qp)
+{
+	struct sock *sk = s->sk;
+
+	write_lock_bh(&sk->sk_callback_lock);
+
+	qp->attrs.llp_stream_handle = s;
+	s->sk->sk_data_ready = siw_qp_llp_data_ready;
+	s->sk->sk_write_space = siw_qp_llp_write_space;
+
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+
+static int siw_qp_irq_init(struct siw_qp *qp, int i)
+{
+	struct siw_wqe *wqe;
+
+	dprint(DBG_CM|DBG_WR, "(QP%d): irq size: %d\n", QP_ID(qp), i);
+
+	INIT_LIST_HEAD(&qp->wqe_freelist);
+
+	/*
+	 * Give the IRD one extra entry since after sending
+	 * the RResponse it may trigger another peer RRequest
+	 * before the RResponse goes back to freelist.
+	 */
+	i++;
+
+	while (i--) {
+		wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
+		if (!wqe) {
+			siw_qp_freeq_flush(qp);
+			return -ENOMEM;
+		}
+		INIT_LIST_HEAD(&wqe->list);
+		wr_type(wqe) = SIW_WR_RDMA_READ_RESP;
+		list_add(&wqe->list, &qp->wqe_freelist);
+	}
+	return 0;
+}
+
+
+static void siw_send_terminate(struct siw_qp *qp)
+{
+	struct iwarp_terminate	pkt;
+
+	memset(&pkt, 0, sizeof pkt);
+	/*
+	 * TODO: send TERMINATE
+	 */
+	dprint(DBG_CM, "(QP%d): Todo\n", QP_ID(qp));
+}
+
+
+/*
+ * caller holds qp->state_lock
+ */
+int
+siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
+	      enum siw_qp_attr_mask mask)
+{
+	int	drop_conn, rv;
+
+	if (!mask)
+		return 0;
+
+	dprint(DBG_CM, "(QP%d)\n", QP_ID(qp));
+
+	if (mask != SIW_QP_ATTR_STATE) {
+		/*
+		 * changes of qp attributes (maybe state, too)
+		 */
+		if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
+
+			if (attrs->flags & SIW_RDMA_BIND_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
+
+			if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+			if (attrs->flags & SIW_RDMA_READ_ENABLED)
+				qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
+			else
+				qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+		}
+		/*
+		 * TODO: what else ??
+		 */
+	}
+	if (!(mask & SIW_QP_ATTR_STATE))
+		return 0;
+
+	dprint(DBG_CM, "(QP%d): SIW QP state: %s => %s\n", QP_ID(qp),
+		siw_qp_state_to_string[qp->attrs.state],
+		   siw_qp_state_to_string[attrs->state]);
+
+	drop_conn = 0;
+
+	switch (qp->attrs.state) {
+
+	case SIW_QP_STATE_IDLE:
+	case SIW_QP_STATE_RTR:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_RTS:
+
+			if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
+				dprint(DBG_ON, "(QP%d): socket?\n", QP_ID(qp));
+				return -EINVAL;
+			}
+			if (!(mask & SIW_QP_ATTR_MPA)) {
+				dprint(DBG_ON, "(QP%d): MPA?\n", QP_ID(qp));
+				return -EINVAL;
+			}
+			dprint(DBG_CM, "(QP%d): Enter RTS: "
+				"peer 0x%08x, local 0x%08x\n", QP_ID(qp),
+				qp->cep->llp.raddr.sin_addr.s_addr,
+				qp->cep->llp.laddr.sin_addr.s_addr);
+			/*
+			 * Initialize global iWARP TX state
+			 */
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
+			qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
+
+			/*
+			 * Initialize global iWARP RX state
+			 */
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
+			qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
+
+			/*
+			 * init IRD freequeue, caller has already checked
+			 * limits
+			 */
+			rv = siw_qp_irq_init(qp, attrs->ird);
+			if (rv)
+				return rv;
+
+			atomic_set(&qp->orq_space, attrs->ord);
+
+			qp->attrs.ord = attrs->ord;
+			qp->attrs.ird = attrs->ird;
+			qp->attrs.mpa = attrs->mpa;
+			/*
+			 * move socket rx and tx under qp's control
+			 */
+			siw_qp_socket_assoc(attrs->llp_stream_handle, qp);
+
+			qp->attrs.state = SIW_QP_STATE_RTS;
+			/*
+			 * set initial mss
+			 */
+			qp->tx_ctx.tcp_seglen =
+				get_tcp_mss(attrs->llp_stream_handle->sk);
+
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+			drop_conn = 1;
+			break;
+
+		case SIW_QP_STATE_RTR:
+			/* ignore */
+			break;
+
+		default:
+			dprint(DBG_CM,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			break;
+		}
+		break;
+
+	case SIW_QP_STATE_RTS:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_CLOSING:
+			/*
+			 * Verbs: move to IDLE if SQ and ORQ are empty.
+			 * Move to ERROR otherwise. But first of all we must
+			 * close the connection. So we keep CLOSING or ERROR
+			 * as a transient state, schedule connection drop work
+			 * and wait for the socket state change upcall to
+			 * come back closed.
+			 */
+			if (TX_IDLE(qp))
+				qp->attrs.state = SIW_QP_STATE_CLOSING;
+			else {
+				qp->attrs.state = SIW_QP_STATE_ERROR;
+				siw_sq_flush(qp);
+			}
+			siw_rq_flush(qp);
+
+			drop_conn = 1;
+			break;
+
+		case SIW_QP_STATE_TERMINATE:
+			qp->attrs.state = SIW_QP_STATE_TERMINATE;
+			siw_send_terminate(qp);
+			drop_conn = 1;
+
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			/*
+			 * This is an emergency close.
+			 *
+			 * Any in progress transmit operation will get
+			 * cancelled.
+			 * This will likely result in a protocol failure,
+			 * if a TX operation is in transit. The caller
+			 * could unconditional wait to give the current
+			 * operation a chance to complete.
+			 * Esp., how to handle the non-empty IRQ case?
+			 * The peer was asking for data transfer at a valid
+			 * point in time.
+			 */
+			siw_sq_flush(qp);
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+			drop_conn = 1;
+
+			break;
+
+		default:
+			dprint(DBG_ON,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			break;
+		}
+		break;
+
+	case SIW_QP_STATE_TERMINATE:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_ERROR:
+			siw_rq_flush(qp);
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+
+			if (!TX_IDLE(qp))
+				siw_sq_flush(qp);
+
+			break;
+
+		default:
+			dprint(DBG_ON,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+		}
+		break;
+
+	case SIW_QP_STATE_CLOSING:
+
+		switch (attrs->state) {
+
+		case SIW_QP_STATE_IDLE:
+			BUG_ON(!TX_IDLE(qp));
+			qp->attrs.state = SIW_QP_STATE_IDLE;
+
+			break;
+
+		case SIW_QP_STATE_CLOSING:
+			/*
+			 * The LLP may already moved the QP to closing
+			 * due to graceful peer close init
+			 */
+			break;
+
+		case SIW_QP_STATE_ERROR:
+			/*
+			 * QP was moved to CLOSING by LLP event
+			 * not yet seen by user.
+			 */
+			qp->attrs.state = SIW_QP_STATE_ERROR;
+
+			if (!TX_IDLE(qp))
+				siw_sq_flush(qp);
+
+			siw_rq_flush(qp);
+
+			break;
+
+		default:
+			dprint(DBG_CM,
+				" QP state transition undefined: %s => %s\n",
+				siw_qp_state_to_string[qp->attrs.state],
+				siw_qp_state_to_string[attrs->state]);
+			return -ECONNABORTED;
+		}
+		break;
+
+	default:
+		dprint(DBG_CM, " NOP: State: %d\n", qp->attrs.state);
+		break;
+	}
+	if (drop_conn)
+		siw_qp_cm_drop(qp, 0);
+
+	return 0;
+}
+
+struct ib_qp *siw_get_ofaqp(struct ib_device *dev, int id)
+{
+	struct siw_qp *qp =  siw_qp_id2obj(siw_dev_ofa2siw(dev), id);
+
+	dprint(DBG_OBJ, ": dev_name: %s, OFA QPID: %d, QP: %p\n",
+		dev->name, id, qp);
+	if (qp) {
+		/*
+		 * siw_qp_id2obj() increments object reference count
+		 */
+		siw_qp_put(qp);
+		dprint(DBG_OBJ, " QPID: %d\n", QP_ID(qp));
+		return &qp->ofa_qp;
+	}
+	return (struct ib_qp *)NULL;
+}
+
+/*
+ * siw_check_mem()
+ *
+ * Check protection domain, STAG state, access permissions and
+ * address range for memory object.
+ *
+ * @pd:		Protection Domain memory should belong to
+ * @mem:	memory to be checked
+ * @addr:	starting addr of mem
+ * @perms:	requested access permissions
+ * @len:	len of memory interval to be checked
+ *
+ */
+int siw_check_mem(struct siw_pd *pd, struct siw_mem *mem, u64 addr,
+		  enum siw_access_flags perms, int len)
+{
+	if (siw_mem2mr(mem)->pd != pd) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): PD mismatch %p : %p\n",
+			OBJ_ID(pd),
+			siw_mem2mr(mem)->pd, pd);
+
+		return -EINVAL;
+	}
+	if (mem->stag_state == STAG_INVALID) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): STAG 0x%08x invalid\n",
+			OBJ_ID(pd), OBJ_ID(mem));
+		return -EPERM;
+	}
+	/*
+	 * check access permissions
+	 */
+	if ((mem->perms & perms) < perms) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): "
+			"INSUFFICIENT permissions 0x%08x : 0x%08x\n",
+			OBJ_ID(pd), mem->perms, perms);
+		return -EPERM;
+	}
+	/*
+	 * Check address interval: we relax check to allow memory shrinked
+	 * from the start address _after_ placing or fetching len bytes.
+	 * TODO: this relaxation is probably overdone
+	 */
+	if (addr < mem->va || addr + len > mem->va + mem->len) {
+		dprint(DBG_WR|DBG_ON, "(PD%d): MEM interval len %d "
+			"[0x%016llx, 0x%016llx) out of bounds "
+			"[0x%016llx, 0x%016llx) for LKey=0x%08x\n",
+			OBJ_ID(pd), len, (unsigned long long)addr,
+			(unsigned long long)(addr + len),
+			(unsigned long long)mem->va,
+			(unsigned long long)(mem->va + mem->len),
+			OBJ_ID(mem));
+
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * siw_check_sge()
+ *
+ * Check SGE for access rights in given interval
+ *
+ * @pd:		Protection Domain memory should belong to
+ * @sge:	SGE to be checked
+ * @perms:	requested access permissions
+ * @off:	starting offset in SGE
+ * @len:	len of memory interval to be checked
+ *
+ * NOTE: Function references each SGE's memory object (sge->mem)
+ * if not yet done. New reference is kept if check went ok and
+ * released if check failed. If sge->mem is already valid, no new
+ * lookup is being done and mem is not released it check fails.
+ */
+int
+siw_check_sge(struct siw_pd *pd, struct siw_sge *sge,
+	      enum siw_access_flags perms, u32 off, int len)
+{
+	struct siw_dev	*dev = pd->hdr.dev;
+	struct siw_mem	*mem;
+	int		new_ref = 0, rv = 0;
+
+	if (len + off > sge->len) {
+		rv = -EPERM;
+		goto fail;
+	}
+	if (sge->mem.obj == NULL) {
+		mem = siw_mem_id2obj(dev, sge->lkey >> 8);
+		if (!mem) {
+			rv = -EINVAL;
+			goto fail;
+		}
+		sge->mem.obj = mem;
+		new_ref = 1;
+	} else {
+		mem = sge->mem.obj;
+		new_ref = 0;
+	}
+	rv = siw_check_mem(pd, mem, sge->addr + off, perms, len);
+	if (rv)
+		goto fail;
+
+	return 0;
+
+fail:
+	if (new_ref) {
+		siw_mem_put(mem);
+		sge->mem.obj = NULL;
+	}
+	return rv;
+}
+
+
+/*
+ * siw_check_sgl()
+ *
+ * Check permissions for a list of SGE's (SGL)
+ *
+ * @pd:		Protection Domain SGL should belong to
+ * @sge:	List of SGE to be checked
+ * @perms:	requested access permissions
+ * @off:	starting offset in SGL
+ * @len:	len of memory interval to be checked
+ *
+ * Function checks only subinterval of SGL described by bytelen @len,
+ * check starts with byte offset @off which must be within
+ * the length of the first SGE.
+ *
+ * The caller is responsible for keeping @len + @off within
+ * the total byte len of the SGL.
+ */
+
+int siw_check_sgl(struct siw_pd *pd, struct siw_sge *sge,
+		  enum siw_access_flags perms, u32 off, int len)
+{
+	int	rv = 0;
+
+	dprint(DBG_WR, "(PD%d): Enter\n", OBJ_ID(pd));
+
+	BUG_ON(off >= sge->len);
+
+	while (len > 0) {
+		dprint(DBG_WR, "(PD%d): sge=%p, perms=0x%x, "
+			"len=%d, off=%u, sge->len=%d\n",
+			OBJ_ID(pd), sge, perms, len, off, sge->len);
+		/*
+		 * rdma verbs: do not check stag for a zero length sge
+		 */
+		if (sge->len == 0) {
+			sge++;
+			continue;
+		}
+
+		rv = siw_check_sge(pd, sge, perms, off, sge->len - off);
+		if (rv)
+			break;
+
+		len -= sge->len - off;
+		off = 0;
+		sge++;
+	}
+	return rv;
+}
+
+int siw_crc_array(struct hash_desc *desc, u8 *start, size_t len)
+{
+	struct scatterlist sg;
+
+	sg_init_one(&sg, start, len);
+	return crypto_hash_update(desc, &sg, len);
+}
+
+int siw_crc_sg(struct hash_desc *desc, struct scatterlist *sg,
+	       int off, int len)
+{
+	int rv;
+
+	if (off == 0)
+		rv = crypto_hash_update(desc, sg, len);
+	else {
+		struct scatterlist t_sg;
+
+		sg_init_table(&t_sg, 1);
+		sg_set_page(&t_sg, sg_page(sg), len, off);
+		rv = crypto_hash_update(desc, &t_sg, len);
+	}
+	return rv;
+}
+
+/*
+ * siw_qp_freeq_flush()
+ *
+ * Flush any WQE on the QP's free list
+ */
+void siw_qp_freeq_flush(struct siw_qp *qp)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*wqe;
+
+	dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+	if (list_empty(&qp->wqe_freelist))
+		return;
+
+	list_for_each_safe(pos, n, &qp->wqe_freelist) {
+		wqe = list_entry_wqe(pos);
+		list_del(&wqe->list);
+		kfree(wqe);
+	}
+}
+
+
+/*
+ * siw_sq_flush()
+ *
+ * Flush SQ and ORRQ entries to CQ.
+ * IRRQ entries are silently dropped.
+ *
+ * TODO: Add termination code for in-progress WQE.
+ * TODO: an in-progress WQE may have been partially
+ *       processed. It should be enforced, that transmission
+ *       of a started DDP segment must be completed if possible
+ *       by any chance.
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, SQ and ORQ lock must not be taken.
+ */
+void siw_sq_flush(struct siw_qp *qp)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*wqe = tx_wqe(qp);
+	struct siw_cq		*cq = qp->scq;
+	int			async_event = 0;
+
+	dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+	/*
+	 * flush the in-progress wqe, if there.
+	 */
+	if (wqe) {
+		/*
+		 * TODO: Add iWARP Termination code
+		 */
+		tx_wqe(qp) = NULL;
+
+		dprint(DBG_WR,
+			" (QP%d): Flush current WQE %p, type %d\n",
+			QP_ID(qp), wqe, wr_type(wqe));
+
+		if (wr_type(wqe) == SIW_WR_RDMA_READ_RESP) {
+			siw_wqe_put(wqe);
+			wqe = NULL;
+		} else if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
+			/*
+			 *  A RREQUEST is already on the ORRQ
+			 */
+			list_add_tail(&wqe->list, &qp->orq);
+	}
+	if (!list_empty(&qp->irq))
+		list_for_each_safe(pos, n, &qp->irq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush IRQ WQE %p, status %d\n",
+				QP_ID(qp), wqe, wqe->wr_status);
+			list_del(&wqe->list);
+			siw_wqe_put(wqe);
+		}
+
+	if (!list_empty(&qp->orq))
+		list_for_each_safe(pos, n, &qp->orq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush ORQ WQE %p, type %d,"
+				" status %d\n", QP_ID(qp), wqe, wr_type(wqe),
+				wqe->wr_status);
+			if (wqe->wr_status != SR_WR_DONE) {
+				async_event = 1;
+				wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+				wqe->wr_status = SR_WR_DONE;
+			}
+			if (cq) {
+				lock_cq(cq);
+				list_move_tail(&wqe->list, &cq->queue);
+				/* TODO: enforce CQ limits */
+				atomic_inc(&cq->qlen);
+				unlock_cq(cq);
+			} else {
+				list_del(&wqe->list);
+				siw_wqe_put(wqe);
+			}
+		}
+	if (!list_empty(&qp->sq))
+		async_event = 1;
+		list_for_each_safe(pos, n, &qp->sq) {
+			wqe = list_entry_wqe(pos);
+			dprint(DBG_WR,
+				" (QP%d): Flush SQ WQE %p, type %d\n",
+				QP_ID(qp), wqe, wr_type(wqe));
+			if (cq) {
+				wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+				wqe->wr_status = SR_WR_DONE;
+				lock_cq(cq);
+				list_move_tail(&wqe->list, &cq->queue);
+				/* TODO: enforce CQ limits */
+				atomic_inc(&cq->qlen);
+				unlock_cq(cq);
+			} else  {
+				list_del(&wqe->list);
+				siw_wqe_put(wqe);
+			}
+		}
+	atomic_set(&qp->sq_space, qp->attrs.sq_size);
+
+	if (wqe != NULL && cq != NULL && cq->ofa_cq.comp_handler != NULL)
+		(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+
+	if (async_event)
+		siw_async_ev(qp, NULL, IB_EVENT_SQ_DRAINED);
+}
+
+/*
+ * siw_rq_flush()
+ *
+ * Flush recv queue entries to cq. An in-progress WQE may have some bytes
+ * processed (wqe->processed).
+ *
+ * Must be called with qp state write lock held.
+ * Therefore, RQ lock must not be taken.
+ */
+void siw_rq_flush(struct siw_qp *qp)
+{
+	struct list_head	*pos, *n;
+	struct siw_wqe		*wqe;
+	struct siw_cq		*cq;
+
+	dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
+
+	/*
+	 * Flush an in-progess WQE if present
+	 */
+	if (rx_wqe(qp)) {
+		if (qp->rx_ctx.hdr.ctrl.opcode != RDMAP_RDMA_WRITE)
+			list_add(&rx_wqe(qp)->list, &qp->rq);
+		else
+			siw_mem_put(rx_mem(qp));
+
+		rx_wqe(qp) = NULL;
+	}
+	if (list_empty(&qp->rq))
+		return;
+
+	cq = qp->rcq;
+
+	list_for_each_safe(pos, n, &qp->rq) {
+		wqe = list_entry_wqe(pos);
+		list_del_init(&wqe->list);
+		if (cq) {
+			wqe->wc_status = IB_WC_WR_FLUSH_ERR;
+			lock_cq(cq);
+			list_add_tail(&wqe->list, &cq->queue);
+			/* TODO: enforce CQ limits */
+			atomic_inc(&cq->qlen);
+			unlock_cq(cq);
+		} else
+			siw_wqe_put(wqe);
+
+		if (!qp->srq)
+			atomic_inc(&qp->rq_space);
+		else
+			atomic_inc(&qp->srq->space);
+
+	}
+	if (cq != NULL && cq->ofa_cq.comp_handler != NULL)
+		(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_qp_rx.c b/drivers/infiniband/hw/softiwarp/siw_qp_rx.c
new file mode 100644
index 0000000..dd9edd4
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_qp_rx.c
@@ -0,0 +1,1493 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *          Fredy Neeser <nfd@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+/*
+ * ----------------------------
+ * DDP reassembly for Softiwarp
+ * ----------------------------
+ * For the ordering of transmitted DDP segments, the relevant iWARP ordering
+ * rules are as follows:
+ *
+ * - RDMAP (RFC 5040): Section 7.5, Rule 17:
+ *   "RDMA Read Response Message processing at the Remote Peer (reading
+ *    the specified Tagged Buffer) MUST be started only after the RDMA
+ *    Read Request Message has been Delivered by the DDP layer (thus,
+ *    all previous RDMA Messages have been properly submitted for
+ *    ordered Placement)."
+ *
+ * - DDP (RFC 5041): Section 5.3:
+ *   "At the Data Source, DDP:
+ *    o MUST transmit DDP Messages in the order they were submitted to
+ *      the DDP layer,
+ *    o SHOULD transmit DDP Segments within a DDP Message in increasing
+ *      MO order for Untagged DDP Messages, and in increasing TO order
+ *      for Tagged DDP Messages."
+ *
+ * Combining these rules implies that, although RDMAP does not provide
+ * ordering between operations that are generated from the two ends of an
+ * RDMAP stream, DDP *must not* transmit an RDMA Read Response Message before
+ * it has finished transmitting SQ operations that were already submitted
+ * to the DDP layer. It follows that an iWARP transmitter must fully
+ * serialize RDMAP messages belonging to the same QP.
+ *
+ * Given that a TCP socket receives DDP segments in peer transmit order,
+ * we obtain the following ordering of received DDP segments:
+ *
+ * (i)  the received DDP segments of RDMAP messages for the same QP
+ *      cannot be interleaved
+ * (ii) the received DDP segments of a single RDMAP message *should*
+ *      arrive in order.
+ *
+ * The Softiwarp transmitter obeys rule #2 in DDP Section 5.3.
+ * With this property, the "should" becomes a "must" in (ii) above,
+ * which simplifies DDP reassembly considerably.
+ * The Softiwarp receiver currently relies on this property
+ * and reports an error if DDP segments of the same RDMAP message
+ * do not arrive in sequence.
+ */
+
+static inline int siw_crc_rxhdr(struct siw_iwarp_rx *ctx)
+{
+	crypto_hash_init(&ctx->mpa_crc_hd);
+
+	return siw_crc_array(&ctx->mpa_crc_hd, (u8 *)&ctx->hdr,
+			     ctx->fpdu_part_rcvd);
+}
+
+
+/*
+ * siw_rx_umem_init()
+ *
+ * Given memory region @mr and tagged offset @t_off within @mr,
+ * resolve corresponding ib_umem_chunk memory chunk pointer
+ * and update receive context variables to point at receive position.
+ * returns 0 on sucess and failure otherwise.
+ *
+ * NOTE: This function expects virtual addresses.
+ * TODO: Function needs generalization to support relative adressing
+ *       aka "ZBVA".
+ *
+ * @rctx:	Receive Context to be updated
+ * @mr:		Memory Region
+ * @t_off:	Offset within Memory Region
+ *
+ */
+static int siw_rx_umem_init(struct siw_iwarp_rx *rctx, struct siw_mr *mr,
+			    u64 t_off)
+{
+	struct ib_umem_chunk	*chunk;
+	u64			off_mr;   /* offset into MR */
+	int			psge_idx; /* Index of PSGE */
+
+	off_mr = t_off - (mr->mem.va & PAGE_MASK);
+	/*
+	 * Equivalent to
+	 * off_mr = t_off - mr->mem.va;
+	 * off_mr += mr->umem->offset;
+	 */
+
+	/* Skip pages not referenced by t_off */
+	psge_idx = off_mr >> PAGE_SHIFT;
+
+	list_for_each_entry(chunk, &mr->umem->chunk_list, list) {
+		if (psge_idx < chunk->nents)
+			break;
+		psge_idx -= chunk->nents;
+	}
+	if (psge_idx >= chunk->nents) {
+		dprint(DBG_MM|DBG_ON, "(QP%d): Short chunk list\n",
+			RX_QPID(rctx));
+		return -EINVAL;
+	}
+	rctx->pg_idx = psge_idx;
+	rctx->pg_off = off_mr & ~PAGE_MASK;
+	rctx->umem_chunk = chunk;
+
+	dprint(DBG_MM, "(QP%d): New chunk, idx %d\n", RX_QPID(rctx), psge_idx);
+	return 0;
+}
+
+
+/*
+ * siw_rx_umem()
+ *
+ * Receive data of @len into target referenced by @rctx.
+ * This function does not check if umem is within bounds requested by
+ * @len and @t_off. @umem_ends indicates if routine should
+ * not update chunk position pointers after the point it is
+ * currently receiving
+ *
+ * @rctx:	Receive Context
+ * @len:	Number of bytes to place
+ * @umen_ends:	1, if rctx chunk pointer should not be updated after len.
+ */
+static int siw_rx_umem(struct siw_iwarp_rx *rctx, int len, int umem_ends)
+{
+	struct scatterlist	*p_list;
+	void			*dest;
+	struct ib_umem_chunk    *chunk = rctx->umem_chunk;
+	int			pg_off = rctx->pg_off,
+				copied = 0,
+				bytes,
+				rv;
+
+	while (len) {
+		bytes  = min(len, (int)PAGE_SIZE - pg_off);
+		p_list = &chunk->page_list[rctx->pg_idx];
+
+		dest = kmap_atomic(sg_page(p_list), KM_SOFTIRQ0);
+
+		rv = skb_copy_bits(rctx->skb, rctx->skb_offset, dest + pg_off,
+				   bytes);
+
+		dprint(DBG_RX, "(QP%d): Page #%d, "
+			"bytes=%u, rv=%d returned by skb_copy_bits()\n",
+			RX_QPID(rctx), rctx->pg_idx, bytes, rv);
+
+		if (likely(!rv)) {
+			if (rctx->crc_enabled)
+				rv = siw_crc_sg(&rctx->mpa_crc_hd, p_list,
+						pg_off, bytes);
+
+			rctx->skb_offset += bytes;
+			copied += bytes;
+			len -= bytes;
+			pg_off += bytes;
+		}
+
+		kunmap_atomic(dest, KM_SOFTIRQ0);
+
+		if (unlikely(rv)) {
+			rctx->skb_copied += copied;
+			rctx->skb_new -= copied;
+			copied = -EFAULT;
+
+			dprint(DBG_RX|DBG_ON, "(QP%d): failed with %d\n",
+				RX_QPID(rctx), rv);
+
+			goto out;
+		}
+		if (pg_off == PAGE_SIZE) {
+			/*
+			 * end of page
+			 */
+			pg_off = 0;
+			/*
+			 * reference next page chunk if
+			 * - all pages in chunk used AND
+			 * - current loop fills more into this umem
+			 *   OR the next receive will go into this umem
+			 *   starting at the position where we are leaving
+			 *   the routine.
+			 */
+			if (++rctx->pg_idx == chunk->nents &&
+				(len > 0 || !umem_ends)) {
+
+				rctx->pg_idx = 0;
+				chunk = mem_chunk_next(chunk);
+			}
+		}
+	}
+	/*
+	 * store chunk position for resume
+	 */
+	rctx->umem_chunk = chunk;
+	rctx->pg_off = pg_off;
+
+	rctx->skb_copied += copied;
+	rctx->skb_new -= copied;
+out:
+	return copied;
+}
+
+
+/*
+ * siw_rresp_check_ntoh()
+ *
+ * Check incoming RRESP fragment header against expected
+ * header values and update expected values for potential next
+ * fragment.
+ *
+ * NOTE: This function must be called only if a RRESP DDP segment
+ *       starts but not for fragmented consecutive pieces of an
+ *       already started DDP segement.
+ */
+static inline int siw_rresp_check_ntoh(struct siw_iwarp_rx *rctx)
+{
+	struct iwarp_rdma_rresp	*rresp = &rctx->hdr.rresp;
+	struct siw_wqe		*wqe = rctx->dest.wqe;
+
+	rresp->sink_stag = be32_to_cpu(rresp->sink_stag);
+	rresp->sink_to   = be64_to_cpu(rresp->sink_to);
+
+	if (rctx->first_ddp_seg) {
+		rctx->ddp_stag = wqe->wr.rread.sge[0].lkey;
+		rctx->ddp_to   = wqe->wr.rread.sge[0].addr;
+	}
+	if (rctx->ddp_stag != rresp->sink_stag) {
+		dprint(DBG_RX|DBG_ON,
+			" received STAG=%08x, expected STAG=%08x\n",
+			rresp->sink_stag, rctx->ddp_stag);
+		/*
+		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
+		 */
+		return -EINVAL;
+	}
+	if (rctx->ddp_to != rresp->sink_to) {
+		dprint(DBG_RX|DBG_ON,
+			" received TO=%016llx, expected TO=%016llx\n",
+			(unsigned long long)rresp->sink_to,
+			(unsigned long long)rctx->ddp_to);
+		/*
+		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
+		 */
+		return -EINVAL;
+	}
+	if (rctx->more_ddp_segs)
+		rctx->ddp_to += rctx->fpdu_part_rem;
+
+	else if (wqe->processed + rctx->fpdu_part_rem != wqe->bytes) {
+		dprint(DBG_RX|DBG_ON,
+			" RRESP length does not match RREQ, "
+			"peer sent=%d, expected %d\n",
+			wqe->processed + rctx->fpdu_part_rem, wqe->bytes);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * siw_write_check_ntoh()
+ *
+ * Check incoming WRITE fragment header against expected
+ * header values and update expected values for potential next
+ * fragment
+ *
+ * NOTE: This function must be called only if a WRITE DDP segment
+ *       starts but not for fragmented consecutive pieces of an
+ *       already started DDP segement.
+ */
+static inline int siw_write_check_ntoh(struct siw_iwarp_rx *rctx)
+{
+	struct iwarp_rdma_write	*write = &rctx->hdr.rwrite;
+
+	write->sink_stag = be32_to_cpu(write->sink_stag);
+	write->sink_to   = be64_to_cpu(write->sink_to);
+
+	if (rctx->first_ddp_seg) {
+		rctx->ddp_stag = write->sink_stag;
+		rctx->ddp_to   = write->sink_to;
+	} else {
+		if (rctx->ddp_stag != write->sink_stag) {
+			dprint(DBG_RX|DBG_ON,
+				" received STAG=%08x, expected STAG=%08x\n",
+				write->sink_stag, rctx->ddp_stag);
+			/*
+			 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
+			 */
+			return -EINVAL;
+		}
+		if (rctx->ddp_to !=  write->sink_to) {
+			dprint(DBG_RX|DBG_ON,
+				" received TO=%016llx, expected TO=%016llx\n",
+				(unsigned long long)write->sink_to,
+				(unsigned long long)rctx->ddp_to);
+			/*
+			 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
+			 */
+			return -EINVAL;
+		}
+	}
+	/*
+	 * Update expected target offset for next incoming DDP segment
+	 */
+	if (rctx->more_ddp_segs != 0)
+		rctx->ddp_to += rctx->fpdu_part_rem;
+
+	return 0;
+}
+
+/*
+ * siw_send_check_ntoh()
+ *
+ * Check incoming SEND fragment header against expected
+ * header values and update expected MSN if no next
+ * fragment expected
+ *
+ * NOTE: This function must be called only if a SEND DDP segment
+ *       starts but not for fragmented consecutive pieces of an
+ *       already started DDP segement.
+ */
+static inline int siw_send_check_ntoh(struct siw_iwarp_rx *rctx)
+{
+	struct iwarp_send	*send = &rctx->hdr.send;
+	struct siw_wqe		*wqe = rctx->dest.wqe;
+
+	send->ddp_msn = be32_to_cpu(send->ddp_msn);
+	send->ddp_mo  = be32_to_cpu(send->ddp_mo);
+	send->ddp_qn  = be32_to_cpu(send->ddp_qn);
+
+	if (send->ddp_qn != RDMAP_UNTAGGED_QN_SEND) {
+		dprint(DBG_RX|DBG_ON, " Invalid DDP QN %d for SEND\n",
+			send->ddp_qn);
+		return -EINVAL;
+	}
+	if (send->ddp_msn != rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]) {
+		dprint(DBG_RX|DBG_ON, " received MSN=%d, expected MSN=%d\n",
+			rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND], send->ddp_msn);
+		/*
+		 * TODO: Error handling
+		 * async_event= RI_EVENT_QP_RQ_PROTECTION_ERROR_MSN_GAP;
+		 * cmpl_status= RI_WC_STATUS_LOCAL_QP_CATASTROPHIC;
+		 */
+		return -EINVAL;
+	}
+	if (send->ddp_mo != wqe->processed) {
+		dprint(DBG_RX|DBG_ON, " Received MO=%u, expected MO=%u\n",
+			send->ddp_mo, wqe->processed);
+		/*
+		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
+		 */
+		return -EINVAL;
+	}
+	if (rctx->first_ddp_seg) {
+		/* initialize user memory write position */
+		rctx->sge_idx = 0;
+		rctx->sge_off = 0;
+	}
+	if (wqe->bytes < wqe->processed + rctx->fpdu_part_rem) {
+		dprint(DBG_RX|DBG_ON, " Receive space short: %d < %d\n",
+			wqe->bytes - wqe->processed, rctx->fpdu_part_rem);
+		wqe->wc_status = IB_WC_LOC_LEN_ERR;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static inline struct siw_wqe *siw_get_rqe(struct siw_qp *qp)
+{
+	struct siw_wqe	*wqe = NULL;
+
+	if (!qp->srq) {
+		lock_rq(qp);
+		if (!list_empty(&qp->rq)) {
+			wqe = list_first_wqe(&qp->rq);
+			list_del_init(&wqe->list);
+			unlock_rq(qp);
+		} else {
+			unlock_rq(qp);
+			dprint(DBG_RX, " QP(%d): RQ empty!\n", QP_ID(qp));
+		}
+	} else {
+		wqe = siw_srq_fetch_wqe(qp);
+		if (!wqe)
+			dprint(DBG_RX, " QP(%d): SRQ empty!\n", QP_ID(qp));
+	}
+	return wqe;
+}
+
+
+/*
+ * siw_proc_send:
+ *
+ * Process one incoming SEND and place data into memory referenced by
+ * receive wqe.
+ *
+ * Function supports partially received sends (suspending/resuming
+ * current receive wqe processing)
+ *
+ * return value:
+ *	0:       reached the end of a DDP segment
+ *	-EAGAIN: to be called again to finish the DDP segment
+ */
+int siw_proc_send(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct siw_wqe	*wqe;
+	struct siw_sge	*sge;
+	struct siw_mr	*mr;
+	u32		data_bytes,	/* all data bytes available */
+			rcvd_bytes;	/* sum of data bytes rcvd */
+	int		rv = 0;
+
+	if (rctx->first_ddp_seg) {
+		WARN_ON(rx_wqe(qp) != NULL);
+
+		wqe = siw_get_rqe(qp);
+		if (!wqe)
+			return -ENOENT;
+
+		rx_wqe(qp) = wqe;
+		wqe->wr_status = SR_WR_INPROGRESS;
+	} else  {
+		wqe = rx_wqe(qp);
+		if (!wqe) {
+			/*
+			 * this is a siw bug!
+			 */
+			dprint(DBG_ON, "QP(%d): RQ failure\n", QP_ID(qp));
+			return -EPROTO;
+		}
+	}
+	if (rctx->state == SIW_GET_DATA_START) {
+		rv = siw_send_check_ntoh(rctx);
+		if (rv) {
+			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
+			return rv;
+		}
+		if (!rctx->fpdu_part_rem) /* zero length SEND */
+			return 0;
+	}
+	data_bytes = min(rctx->fpdu_part_rem, rctx->skb_new);
+	rcvd_bytes = 0;
+
+	while (data_bytes) {
+		struct siw_pd	*pd;
+		u32	sge_bytes;	/* data bytes avail for SGE */
+		int	umem_ends;	/* 1 if umem ends with current rcv */
+
+		sge = &wqe->wr.sgl.sge[rctx->sge_idx];
+
+		if (!sge->len) {
+			/* just skip empty sge's */
+			rctx->sge_idx++;
+			rctx->sge_off = 0;
+			continue;
+		}
+		sge_bytes = min(data_bytes, sge->len - rctx->sge_off);
+
+		/*
+		 * check with QP's PD if no SRQ present, SRQ's PD otherwise
+		 */
+		pd = qp->srq == NULL ? qp->pd : qp->srq->pd;
+
+		rv = siw_check_sge(pd, sge, SR_MEM_LWRITE, rctx->sge_off,
+				   sge_bytes);
+		if (rv) {
+			siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
+			break;
+		}
+		mr = siw_mem2mr(sge->mem.obj);
+
+		if (rctx->sge_off == 0) {
+			/*
+			 * started a new sge: update receive pointers
+			 */
+			rv = siw_rx_umem_init(rctx, mr, sge->addr);
+			if (rv)
+				break;
+		}
+		/*
+		 * Are we going to finish placing
+		 * - the last fragment of the current SGE or
+		 * - the last DDP segment (L=1) of the current RDMAP message?
+		 *
+		 * siw_rx_umem() must advance umem page_chunk position
+		 * after sucessful receive only, if receive into current
+		 * umem does not end. umem ends, if:
+		 * - current SGE gets completely filled, OR
+		 * - current MPA FPDU is last AND gets consumed now
+		 */
+		umem_ends = ((sge_bytes + rctx->sge_off == sge->len) ||
+			      (!rctx->more_ddp_segs &&
+			       rctx->fpdu_part_rcvd + sge_bytes ==
+					rctx->fpdu_part_rem)) ? 1 : 0;
+
+		rv = siw_rx_umem(rctx, sge_bytes, umem_ends);
+		if (rv != sge_bytes) {
+			/*
+			 * siw_rx_umem() must have updated
+			 * skb_new and skb_copied
+			 */
+			wqe->processed += rcvd_bytes;
+			return -EINVAL;
+		}
+		rctx->sge_off += rv;
+
+		if (rctx->sge_off == sge->len) {
+			rctx->sge_idx++;
+			rctx->sge_off = 0;
+		}
+		data_bytes -= rv;
+		rcvd_bytes += rv;
+
+		rctx->fpdu_part_rem -= rv;
+		rctx->fpdu_part_rcvd += rv;
+	}
+	wqe->processed += rcvd_bytes;
+
+	if (!rctx->fpdu_part_rem)
+		return 0;
+
+	return (rv < 0) ? rv : -EAGAIN;
+}
+
+/*
+ * siw_proc_write:
+ *
+ * Place incoming WRITE after referencing and checking target buffer
+
+ * Function supports partially received WRITEs (suspending/resuming
+ * current receive processing)
+ *
+ * return value:
+ *	0:       reached the end of a DDP segment
+ *	-EAGAIN: to be called again to finish the DDP segment
+ */
+
+int siw_proc_write(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct siw_dev		*dev = qp->hdr.dev;
+	struct iwarp_rdma_write	*write = &rctx->hdr.rwrite;
+	struct siw_mem		*mem;
+	int			bytes,
+				last_write,
+				rv;
+
+	if (rctx->state == SIW_GET_DATA_START) {
+
+		if (!rctx->fpdu_part_rem) /* zero length WRITE */
+			return 0;
+
+		rv = siw_write_check_ntoh(rctx);
+		if (rv) {
+			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
+			return rv;
+		}
+	}
+	bytes = min(rctx->fpdu_part_rem, rctx->skb_new);
+
+	/*
+	 * NOTE: bytes > 0 is always true, since this routine
+	 * gets only called if so.
+	 */
+	if (rctx->first_ddp_seg) {
+		/* DEBUG Code, to be removed */
+		if (rx_mem(qp) != 0) {
+			dprint(DBG_RX|DBG_ON, "(QP%d): Stale rctx state!\n",
+				QP_ID(qp));
+			return -EFAULT;
+		}
+		rx_mem(qp) = siw_mem_id2obj(dev, rctx->ddp_stag >> 8);
+	}
+	if (rx_mem(qp) == NULL) {
+		dprint(DBG_RX|DBG_ON, "(QP%d): "
+			"Sink STag not found or invalid,  STag=0x%08x\n",
+			QP_ID(qp), rctx->ddp_stag);
+		return -EINVAL;
+	}
+	mem = rx_mem(qp);
+	/*
+	 * Rtag not checked against mem's tag again because
+	 * hdr check guarantees same tag as before if fragmented
+	 */
+	rv = siw_check_mem(qp->pd, mem, write->sink_to + rctx->fpdu_part_rcvd,
+			   SR_MEM_RWRITE, bytes);
+	if (rv) {
+		siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
+		return rv;
+	}
+	if (rctx->first_ddp_seg) {
+		rv = siw_rx_umem_init(rctx, siw_mem2mr(mem), write->sink_to);
+		if (rv)
+			return -EINVAL;
+
+	} else if (!rctx->umem_chunk) {
+		/*
+		 * This should never happen.
+		 *
+		 * TODO: Remove tentative debug aid.
+		 */
+		dprint(DBG_RX|DBG_ON, "(QP%d): "
+			"Umem chunk not resolved!\n", QP_ID(qp));
+		return -EINVAL;
+	}
+	/*
+	 * Are we going to place the last piece of the last
+	 * DDP segment of the current RDMAP message?
+	 *
+	 * It is last if:
+	 * - rctx->fpdu_part_rem <= rctx->skb_new AND
+	 * - payload_rem (of current DDP segment) <= rctx->skb_new
+	 */
+	last_write = ((rctx->fpdu_part_rem <= rctx->skb_new) &&
+		      !rctx->more_ddp_segs) ? 1 : 0;
+
+	rv = siw_rx_umem(rctx, bytes, last_write);
+	if (rv != bytes)
+		return -EINVAL;
+
+	rctx->fpdu_part_rem -= rv;
+	rctx->fpdu_part_rcvd += rv;
+
+	if (!rctx->fpdu_part_rem)
+		return 0;
+
+	return (rv < 0) ? rv : -EAGAIN;
+}
+
+/*
+ * inbound RREQ's cannot carry user data.
+ */
+int siw_proc_rreq(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	if (!rctx->fpdu_part_rem)
+		return 0;
+
+	dprint(DBG_ON|DBG_RX, "(QP%d): RREQ with MPA len %d\n", QP_ID(qp),
+		rctx->hdr.ctrl.mpa_len);
+
+	return -EPROTO;
+}
+
+/*
+ * siw_init_rresp:
+ *
+ * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
+ * Put it at the tail of the IRQ, if there is another WQE currently in
+ * transmit processing. If not, make it the current WQE to be processed
+ * and schedule transmit processing.
+ *
+ * Can be called from softirq context and from process
+ * context (RREAD socket loopback case!)
+ *
+ * return value:
+ *	0:      success,
+ *		failure code otherwise
+ */
+
+int siw_init_rresp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct siw_wqe 	*rsp;
+
+	rsp = siw_wqe_get(qp, SIW_WR_RDMA_READ_RESP);
+	if (rsp) {
+		rsp->wr.rresp.sge.len = be32_to_cpu(rctx->hdr.rreq.read_size);
+		rsp->bytes = rsp->wr.rresp.sge.len;	/* redundant */
+		rsp->processed = 0;
+
+		rsp->wr.rresp.sge.addr = be64_to_cpu(rctx->hdr.rreq.source_to);
+		rsp->wr.rresp.num_sge = rsp->bytes ? 1 : 0;
+
+		rsp->wr.rresp.sge.mem.obj = NULL;	/* defer lookup */
+		rsp->wr.rresp.sge.lkey =
+			be32_to_cpu(rctx->hdr.rreq.source_stag);
+
+		rsp->wr.rresp.raddr = be64_to_cpu(rctx->hdr.rreq.sink_to);
+		rsp->wr.rresp.rtag = rctx->hdr.rreq.sink_stag; /* NBO */
+
+	} else {
+		dprint(DBG_RX|DBG_ON, "(QP%d): IRD exceeded!\n", QP_ID(qp));
+		return -EPROTO;
+	}
+	rsp->wr_status = SR_WR_QUEUED;
+
+	/*
+	 * Insert into IRQ
+	 *
+	 * TODO: Revisit ordering of genuine SQ WRs and Read Response
+	 * pseudo-WRs. RDMAP specifies that there is no ordering among
+	 * the two directions of transmission, so there is a degree of
+	 * freedom.
+	 *
+	 * The current logic favours Read Responses over SQ work requests
+	 * that are queued but not already in progress.
+	 */
+	lock_sq(qp);
+	if (!tx_wqe(qp)) {
+		tx_wqe(qp) = rsp;
+		unlock_sq(qp);
+		/*
+		 * schedule TX work, even if SQ was supended due to
+		 * ORD limit: it is always OK (and may even prevent peers
+		 * from appl lock) to send RRESPONSE's
+		 */
+		siw_sq_queue_work(qp);
+	} else {
+		list_add_tail(&rsp->list, &qp->irq);
+		unlock_sq(qp);
+	}
+	return 0;
+}
+
+/*
+ * siw_proc_rresp:
+ *
+ * Place incoming RRESP data into memory referenced by RREQ WQE.
+ *
+ * Function supports partially received RRESP's (suspending/resuming
+ * current receive processing)
+ */
+int siw_proc_rresp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct siw_wqe	*wqe;
+	struct siw_mr	*mr;
+	struct siw_sge	*sge;
+	int		bytes,
+			is_last,
+			rv;
+
+	if (rctx->first_ddp_seg) {
+		WARN_ON(rx_wqe(qp) != NULL);
+		/*
+		 * fetch pending RREQ from orq
+		 */
+		lock_orq(qp);
+		if (!list_empty(&qp->orq)) {
+			wqe = list_first_entry(&qp->orq, struct siw_wqe, list);
+			list_del_init(&wqe->list);
+		} else {
+			unlock_orq(qp);
+			dprint(DBG_RX|DBG_ON, "(QP%d): ORQ empty\n",
+				QP_ID(qp));
+			/*
+			 * TODO: Should generate an async error
+			 */
+			rv = -ENODATA; /* or -ENOENT ? */
+			goto done;
+		}
+		unlock_orq(qp);
+
+		rx_wqe(qp) = wqe;
+
+		if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ || wqe->processed) {
+			WARN_ON(wqe->processed);
+			WARN_ON(wr_type(wqe) != SIW_WR_RDMA_READ_REQ);
+			rv = -EINVAL;
+			goto done;
+		}
+
+		wqe->wr_status = SR_WR_INPROGRESS;
+
+		rv = siw_rresp_check_ntoh(rctx);
+		if (rv) {
+			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
+			goto done;
+		}
+	} else {
+		wqe = rx_wqe(qp);
+		if (!wqe) {
+			WARN_ON(1);
+			rv = -ENODATA;
+			goto done;
+		}
+	}
+	if (!rctx->fpdu_part_rem) /* zero length RRESPONSE */
+		return 0;
+
+	bytes = min(rctx->fpdu_part_rem, rctx->skb_new);
+	sge = wqe->wr.rread.sge; /* there is only one */
+
+	/*
+	 * check target memory which resolves memory on first fragment
+	 */
+	rv = siw_check_sge(qp->pd, sge, SR_MEM_LWRITE, wqe->processed, bytes);
+	if (rv) {
+		dprint(DBG_RX|DBG_ON, "(QP%d): siw_check_sge failed: %d\n",
+			QP_ID(qp), rv);
+		wqe->wc_status = IB_WC_LOC_PROT_ERR;
+		siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
+		goto done;
+	}
+	mr = siw_mem2mr(sge->mem.obj);
+
+	if (rctx->first_ddp_seg) {
+		rv = siw_rx_umem_init(rctx, mr, sge->addr);
+		if (rv) {
+			wqe->wc_status = IB_WC_LOC_PROT_ERR;
+			goto done;
+		}
+	} else if (!rctx->umem_chunk) {
+		/*
+		 * This should never happen.
+		 *
+		 * TODO: Remove tentative debug aid.
+		 */
+		dprint(DBG_RX|DBG_ON, "(QP%d): No target mem!\n", QP_ID(qp));
+		wqe->wc_status = IB_WC_GENERAL_ERR;
+		rv = -EPROTO;
+		goto done;
+	}
+	/*
+	 * Are we going to finish placing the last DDP segment (L=1)
+	 * of the current RDMAP message?
+	 *
+	 * NOTE: siw_rresp_check_ntoh() guarantees that the
+	 * last inbound RDMAP Read Response message exactly matches
+	 * with the RREQ WR.
+	 */
+	is_last = (bytes + wqe->processed == wqe->bytes) ? 1 : 0;
+
+	rv = siw_rx_umem(rctx,  bytes, is_last);
+	if (rv != bytes) {
+		wqe->wc_status = IB_WC_GENERAL_ERR;
+		rv = -EINVAL;
+		goto done;
+	}
+	rctx->fpdu_part_rem -= rv;
+	rctx->fpdu_part_rcvd += rv;
+
+	wqe->processed += rv;
+
+	if (!rctx->fpdu_part_rem)
+		return 0;
+done:
+	return (rv < 0) ? rv : -EAGAIN;
+}
+
+static void siw_drain_pkt(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	char	buf[4096];
+	int	len;
+
+	dprint(DBG_ON|DBG_RX, " (QP%d): drain %d bytes\n",
+		QP_ID(qp), rctx->fpdu_part_rem);
+
+	while (rctx->fpdu_part_rem) {
+		len = min(rctx->fpdu_part_rem, 4096);
+
+		skb_copy_bits(rctx->skb, rctx->skb_offset,
+				      buf, rctx->fpdu_part_rem);
+
+		rctx->skb_copied += len;
+		rctx->skb_offset += len;
+		rctx->skb_new -= len;
+		rctx->fpdu_part_rem -= len;
+	}
+}
+
+int siw_proc_unsupp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	WARN_ON(1);
+	siw_drain_pkt(qp, rctx);
+	return 0;
+}
+
+
+int siw_proc_terminate(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct iwarp_terminate	*term = &rctx->hdr.terminate;
+
+	printk(KERN_INFO "(QP%d): RX Terminate: etype=%d, layer=%d, ecode=%d\n",
+		QP_ID(qp), term->term_ctrl.etype, term->term_ctrl.layer,
+		term->term_ctrl.ecode);
+
+	siw_drain_pkt(qp, rctx);
+	return 0;
+}
+
+
+static int siw_get_trailer(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
+{
+	struct sk_buff	*skb = rctx->skb;
+	u8		*tbuf = (u8 *)&rctx->trailer.crc - rctx->pad;
+	int		avail;
+
+	avail = min(rctx->skb_new, rctx->fpdu_part_rem);
+
+	skb_copy_bits(skb, rctx->skb_offset,
+		      tbuf + rctx->fpdu_part_rcvd, avail);
+
+	rctx->fpdu_part_rcvd += avail;
+	rctx->fpdu_part_rem -= avail;
+
+	rctx->skb_new -= avail;
+	rctx->skb_offset += avail;
+	rctx->skb_copied += avail;
+
+	dprint(DBG_RX, " (QP%d): %d remaining (%d)\n", QP_ID(qp),
+		rctx->fpdu_part_rem, avail);
+
+	if (!rctx->fpdu_part_rem) {
+		u32	crc_in, crc_own = 0;
+		/*
+		 * check crc if required
+		 */
+		if (!rctx->crc_enabled)
+			return 0;
+
+		if (rctx->pad && siw_crc_array(&rctx->mpa_crc_hd,
+					       tbuf, rctx->pad) != 0)
+			return -EINVAL;
+
+		crypto_hash_final(&rctx->mpa_crc_hd, (u8 *)&crc_own);
+
+		/*
+		 * CRC32 is computed, transmitted and received directly in NBO,
+		 * so there's never a reason to convert byte order.
+		 */
+		crc_in = rctx->trailer.crc;
+
+		if (crc_in != crc_own) {
+			dprint(DBG_RX|DBG_ON,
+				" (QP%d): CRC ERROR in:=%08x, own=%08x\n",
+				QP_ID(qp), crc_in, crc_own);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+
+static int siw_get_hdr(struct siw_iwarp_rx *rctx)
+{
+	struct sk_buff		*skb = rctx->skb;
+	struct iwarp_ctrl	*c_hdr = &rctx->hdr.ctrl;
+
+	int bytes;
+
+	if (rctx->fpdu_part_rcvd < sizeof(struct iwarp_ctrl)) {
+		/*
+		 * copy first fix part of iwarp hdr
+		 */
+		bytes = min_t(int, rctx->skb_new,
+			      sizeof(struct iwarp_ctrl) - rctx->fpdu_part_rcvd);
+
+		skb_copy_bits(skb, rctx->skb_offset,
+			      (char *)c_hdr + rctx->fpdu_part_rcvd, bytes);
+
+		rctx->fpdu_part_rcvd += bytes;
+
+		rctx->skb_new -= bytes;
+		rctx->skb_offset += bytes;
+		rctx->skb_copied += bytes;
+
+		if (!rctx->skb_new ||
+			rctx->fpdu_part_rcvd < sizeof(struct iwarp_ctrl)) {
+			return -EAGAIN;
+		}
+
+		if (c_hdr->opcode > RDMAP_TERMINATE) {
+			dprint(DBG_RX|DBG_ON, " opcode %d\n", c_hdr->opcode);
+			return -EINVAL;
+		}
+		if (c_hdr->dv != DDP_VERSION) {
+			dprint(DBG_RX|DBG_ON, " dversion %d\n", c_hdr->dv);
+			return -EINVAL;
+		}
+		if (c_hdr->rv != RDMAP_VERSION) {
+			dprint(DBG_RX|DBG_ON, " rversion %d\n", c_hdr->rv);
+			return -EINVAL;
+		}
+		dprint(DBG_RX, "(QP%d): New Header, opcode:%d\n",
+			RX_QPID(rctx), c_hdr->opcode);
+	}
+	/*
+	 * figure out len of current hdr: variable length of
+	 * iwarp hdr forces us to copy hdr information
+	 */
+	bytes = min(rctx->skb_new,
+		  iwarp_pktinfo[c_hdr->opcode].hdr_len - rctx->fpdu_part_rcvd);
+
+	skb_copy_bits(skb, rctx->skb_offset,
+		      (char *)c_hdr + rctx->fpdu_part_rcvd, bytes);
+
+	rctx->fpdu_part_rcvd += bytes;
+
+	rctx->skb_new -= bytes;
+	rctx->skb_offset += bytes;
+	rctx->skb_copied += bytes;
+
+	if (rctx->fpdu_part_rcvd == iwarp_pktinfo[c_hdr->opcode].hdr_len) {
+		/*
+		 * HDR receive completed. Check if the current DDP segment
+		 * starts a new RDMAP message or continues a previously
+		 * started RDMAP message.
+		 *
+		 * Note well from the comments on DDP reassembly:
+		 * - Support for unordered reception of DDP segments
+		 *   (or FPDUs) from different RDMAP messages is not needed.
+		 * - Unordered reception of DDP segments of the same
+		 *   RDMAP message is not supported. It is probably not
+		 *   needed with most peers.
+		 */
+		siw_dprint_hdr(&rctx->hdr, RX_QPID(rctx), "HDR received");
+
+		if (rctx->more_ddp_segs != 0) {
+			rctx->first_ddp_seg = 0;
+			if (rctx->prev_ddp_opcode != c_hdr->opcode) {
+				dprint(DBG_ON,
+					"packet intersection: %d <> %d\n",
+					rctx->prev_ddp_opcode, c_hdr->opcode);
+				return -EPROTO;
+			}
+		} else {
+			rctx->prev_ddp_opcode = c_hdr->opcode;
+			rctx->first_ddp_seg = 1;
+		}
+		rctx->more_ddp_segs = (c_hdr->l == 0) ? 1 : 0;
+
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+static inline int siw_fpdu_payload_len(struct siw_iwarp_rx *rctx)
+{
+	return ((int)(rctx->hdr.ctrl.mpa_len) - rctx->fpdu_part_rcvd)
+		+ MPA_HDR_SIZE;
+}
+
+static inline int siw_fpdu_trailer_len(struct siw_iwarp_rx *rctx)
+{
+	int mpa_len = (int)rctx->hdr.ctrl.mpa_len + MPA_HDR_SIZE;
+
+	return MPA_CRC_SIZE + (-mpa_len & 0x3);
+}
+
+/*
+ * siw_rreq_complete()
+ *
+ * Complete the current READ REQUEST after READ RESPONSE processing.
+ * It may complete consecutive WQE's which were already SQ
+ * processed before but are awaiting completion due to completion
+ * ordering (see verbs 8.2.2.2).
+ * The READ RESPONSE may also resume SQ processing if it was stalled
+ * due to ORD exhaustion (see verbs 8.2.2.18)
+ * Function stops completion when next READ REQUEST found or ORQ empty.
+ */
+static void siw_rreq_complete(struct siw_wqe *wqe, int error)
+{
+	struct siw_qp		*qp = wqe->qp;
+	int			num_wc = 1;
+	enum ib_send_flags	flags;
+	LIST_HEAD(c_list);
+
+	flags = wr_flags(wqe);
+
+	if (flags & IB_SEND_SIGNALED)
+		list_add(&wqe->list, &c_list);
+	else {
+		atomic_inc(&qp->sq_space);
+		siw_wqe_put(wqe);
+		num_wc = 0;
+	}
+
+	lock_orq(qp);
+
+	/* More WQE's to complete following this RREQ? */
+	if (!list_empty(&qp->orq)) {
+		struct list_head *pos, *n;
+		list_for_each_safe(pos, n, &qp->orq) {
+			wqe = list_entry_wqe(pos);
+			if (wr_type(wqe) == SIW_WR_RDMA_READ_REQ)
+				break;
+			flags |= wr_flags(wqe);
+			num_wc++;
+			dprint(DBG_WR|DBG_ON,
+				"(QP%d): Resume completion, wr_type %d\n",
+				QP_ID(qp), wr_type(wqe));
+			list_move_tail(pos, &c_list);
+		}
+	}
+	unlock_orq(qp);
+
+	if (num_wc)
+		siw_sq_complete(&c_list, qp, num_wc, flags);
+
+	/*
+	 * Check if SQ processing was stalled due to ORD limit
+	 */
+	if (ORD_SUSPEND_SQ(qp)) {
+		lock_sq(qp);
+
+		wqe = siw_next_tx_wqe(qp);
+
+		if (wqe && !tx_wqe(qp)) {
+			WARN_ON(wr_type(wqe) != SIW_WR_RDMA_READ_REQ);
+			list_del_init(&wqe->list);
+			tx_wqe(qp) = wqe;
+
+			list_add_tail(&wqe->list, &qp->orq);
+
+			unlock_sq(qp);
+
+			dprint(DBG_RX, "(QP%d): SQ resume (%d)\n",
+				QP_ID(qp), atomic_read(&qp->sq_space));
+
+			siw_sq_queue_work(qp);
+		} else {
+			/* only new ORQ space if not next RREQ queued */
+			atomic_inc(&qp->orq_space);
+			unlock_sq(qp);
+		}
+	} else
+		atomic_inc(&qp->orq_space);
+}
+
+/*
+ * siw_rdmap_complete()
+ *
+ * complete processing of an RDMA message after receiving all
+ * DDP segmens
+ *
+ *   o SENDs + RRESPs will need for completion,
+ *   o RREQs need for  READ RESPONSE initialization
+ *   o WRITEs need memory dereferencing
+ *
+ * TODO: Could siw_[s,r]_complete() fail? (CQ full)
+ */
+static inline int siw_rdmap_complete(struct siw_qp *qp,
+				     struct siw_iwarp_rx *rctx)
+{
+	struct siw_wqe	*wqe;
+	int rv = 0;
+
+	switch (rctx->hdr.ctrl.opcode) {
+
+	case RDMAP_SEND_SE:
+		wr_flags(rx_wqe(qp)) |= IB_SEND_SOLICITED;
+	case RDMAP_SEND:
+		rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
+
+		wqe = rx_wqe(qp);
+
+		wqe->wc_status = IB_WC_SUCCESS;
+		wqe->wr_status = SR_WR_DONE;
+
+		siw_rq_complete(wqe, qp);
+
+		break;
+
+	case RDMAP_RDMA_READ_RESP:
+		rctx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
+
+		wqe = rx_wqe(qp);
+
+		wqe->wc_status = IB_WC_SUCCESS;
+		wqe->wr_status = SR_WR_DONE;
+
+		siw_rreq_complete(wqe, 0);
+
+		break;
+
+	case RDMAP_RDMA_READ_REQ:
+		rv = siw_init_rresp(qp, rctx);
+
+		break;
+
+	case RDMAP_RDMA_WRITE:
+		/*
+		 * Free References from memory object if
+		 * attached to receive context (inbound WRITE)
+		 * While a zero-length WRITE is allowed, the
+		 * current implementation does not create
+		 * a memory reference (it is unclear if memory
+		 * rights should be checked in that case!).
+		 *
+		 * TODO: check zero length WRITE semantics
+		 */
+		if (rx_mem(qp))
+			siw_mem_put(rx_mem(qp));
+		break;
+
+	default:
+		break;
+
+	}
+	rctx->umem_chunk = NULL; /* DEBUG aid, tentatively */
+	rx_wqe(qp) = NULL;	/* also clears MEM object for WRITE */
+
+	return rv;
+}
+
+/*
+ * siw_rdmap_error()
+ *
+ * Abort processing of RDMAP message after failure.
+ * SENDs + RRESPs will need for receive completion, if
+ * already started.
+ *
+ * TODO: WRITE need local error to be surfaced.
+ *
+ */
+static inline void
+siw_rdmap_error(struct siw_qp *qp, struct siw_iwarp_rx *rctx, int status)
+{
+	struct siw_wqe	*wqe;
+
+	switch (rctx->hdr.ctrl.opcode) {
+
+	case RDMAP_SEND_SE:
+	case RDMAP_SEND:
+		rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
+
+		wqe = rx_wqe(qp);
+		if (!wqe)
+			return;
+
+		if (rctx->hdr.ctrl.opcode == RDMAP_SEND_SE)
+			wr_flags(wqe) |= IB_SEND_SOLICITED;
+
+		if (!wqe->wc_status)
+			wqe->wc_status = IB_WC_GENERAL_ERR;
+
+		wqe->wr_status = SR_WR_DONE;
+		siw_rq_complete(wqe, qp);
+
+		break;
+
+	case RDMAP_RDMA_READ_RESP:
+		/*
+		 * A READ RESPONSE may flush consecutive WQE's
+		 * which were SQ processed before
+		 */
+		rctx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
+
+		if (rctx->state == SIW_GET_HDR || status == -ENODATA)
+			/*  eventual RREQ left untouched */
+			break;
+
+		wqe = rx_wqe(qp);
+		if (wqe) {
+			if (status)
+				wqe->wc_status = status;
+			else
+				wqe->wc_status = IB_WC_GENERAL_ERR;
+
+			wqe->wr_status = SR_WR_DONE;
+			/*
+			 * All errors turn the wqe into signalled.
+			 */
+			wr_flags(wqe) |= IB_SEND_SIGNALED;
+			siw_rreq_complete(wqe, status);
+		}
+		break;
+
+	case RDMAP_RDMA_WRITE:
+		/*
+		 * Free References from memory object if
+		 * attached to receive context (inbound WRITE)
+		 * While a zero-length WRITE is allowed, the
+		 * current implementation does not create
+		 * a memory reference (it is unclear if memory
+		 * rights should be checked in that case!).
+		 *
+		 * TODO: check zero length WRITE semantics
+		 */
+		if (rx_mem(qp))
+			siw_mem_put(rx_mem(qp));
+		break;
+
+	default:
+		break;
+	}
+	rctx->umem_chunk = NULL; /* DEBUG aid, tentatively */
+	rx_wqe(qp) = NULL;	/* also clears MEM object for WRITE */
+}
+
+/*
+ * siw_tcp_rx_data()
+ *
+ * Main routine to consume inbound TCP payload
+ *
+ * @rd_desc:	read descriptor
+ * @skb:	socket buffer
+ * @off:	offset in skb
+ * @len:	skb->len - offset : payload in skb
+ */
+int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
+		    unsigned int off, size_t len)
+{
+	struct siw_qp		*qp = rd_desc->arg.data;
+	struct siw_iwarp_rx	*rctx = &qp->rx_ctx;
+	int			rv;
+
+	rctx->skb = skb;
+	rctx->skb_new = skb->len - off;
+	rctx->skb_offset = off;
+	rctx->skb_copied = 0;
+
+	dprint(DBG_RX, "(QP%d): new data %d, rx-state %d\n", QP_ID(qp),
+		rctx->skb_new, rctx->state);
+
+	if (unlikely(rctx->rx_suspend == 1 ||
+		     qp->attrs.state != SIW_QP_STATE_RTS)) {
+		dprint(DBG_RX|DBG_ON, "(QP%d): failed. state rx:%d, qp:%d\n",
+			QP_ID(qp), qp->rx_ctx.state, qp->attrs.state);
+		return 0;
+	}
+	while (rctx->skb_new) {
+
+		switch (rctx->state) {
+
+		case SIW_GET_HDR:
+			rv = siw_get_hdr(rctx);
+			if (!rv) {
+				if (rctx->crc_enabled &&
+				    siw_crc_rxhdr(rctx) != 0) {
+					rv = -EINVAL;
+					break;
+				}
+				rctx->hdr.ctrl.mpa_len =
+					ntohs(rctx->hdr.ctrl.mpa_len);
+
+				rctx->fpdu_part_rem =
+					siw_fpdu_payload_len(rctx);
+
+				if (rctx->fpdu_part_rem)
+					rctx->pad = -rctx->fpdu_part_rem & 0x3;
+				else
+					rctx->pad = 0;
+
+				rctx->state = SIW_GET_DATA_START;
+				rctx->fpdu_part_rcvd = 0;
+			}
+			break;
+
+		case SIW_GET_DATA_MORE:
+			/*
+			 * Another data fragment of the same DDP segment.
+			 * Headers will not be checked again by the
+			 * opcode-specific data receive function below.
+			 * Setting first_ddp_seg = 0 avoids repeating
+			 * initializations that may occur only once per
+			 * DDP segment.
+			 */
+			rctx->first_ddp_seg = 0;
+
+		case SIW_GET_DATA_START:
+			/*
+			 * Headers will be checked by the opcode-specific
+			 * data receive function below.
+			 */
+			rv = siw_rx_data(qp, rctx);
+			if (!rv) {
+				rctx->fpdu_part_rem =
+					siw_fpdu_trailer_len(rctx);
+				rctx->fpdu_part_rcvd = 0;
+				rctx->state = SIW_GET_TRAILER;
+			} else
+				rctx->state = SIW_GET_DATA_MORE;
+
+			break;
+
+		case SIW_GET_TRAILER:
+			/*
+			 * read CRC + any padding
+			 */
+			rv = siw_get_trailer(qp, rctx);
+			if (!rv) {
+				/*
+				 * FPDU completed.
+				 * complete RDMAP message if last fragment
+				 */
+				rctx->state = SIW_GET_HDR;
+				rctx->fpdu_part_rcvd = 0;
+
+				if (!rctx->hdr.ctrl.l)
+					/* more frags */
+					break;
+
+				rv = siw_rdmap_complete(qp, rctx);
+				if (rv)
+					break;
+			}
+			break;
+
+		default:
+			WARN_ON(1);
+			rv = -EAGAIN;
+		}
+
+		if (unlikely(rv != 0 && rv != -EAGAIN)) {
+			/*
+			 * TODO: implement graceful error handling including
+			 *       generation (and processing) of TERMINATE
+			 *       messages.
+			 *
+			 *	 for now we are left with a bogus rx status
+			 *	 unable to receive any further byte.
+			 *	 BUT: code must handle difference between
+			 *
+			 * 	 o protocol syntax (FATAL, framing lost)
+			 *	 o crc	(FATAL, framing lost since we do not
+			 *	        trust packet header (??))
+			 *	 o local resource (maybe non fatal, framing
+			 *	   not lost)
+			 *
+			 *	 errors.
+			 */
+			siw_rdmap_error(qp, rctx, rv);
+
+			dprint(DBG_RX|DBG_ON,
+				"(QP%d): RX ERROR %d at RX state %d\n",
+				QP_ID(qp), rv, rctx->state);
+
+			siw_dprint_rctx(rctx);
+			/*
+			 * Calling siw_cm_queue_work() is safe without
+			 * releasing qp->state_lock because the QP state
+			 * will be transitioned to SIW_QP_STATE_ERROR
+			 * by the siw_work_handler() workqueue handler
+			 * after we return from siw_qp_llp_data_ready().
+			 */
+			siw_qp_cm_drop(qp, 1);
+
+			break;
+		}
+		if (rv) {
+			dprint(DBG_RX, "(QP%d): "
+				"Misaligned FPDU: State: %d, missing: %d\n",
+				QP_ID(qp), rctx->state, rctx->fpdu_part_rem);
+			break;
+		}
+	}
+	return rctx->skb_copied;
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_qp_tx.c b/drivers/infiniband/hw/softiwarp/siw_qp_tx.c
new file mode 100644
index 0000000..ef774eb
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_qp_tx.c
@@ -0,0 +1,1309 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+static int zcopy_tx = 1;
+module_param(zcopy_tx, int, 0644);
+MODULE_PARM_DESC(zcopy_tx, "Zero copy user data transmit if possible");
+
+DEFINE_PER_CPU(atomic_t, siw_workq_len);
+
+static inline int siw_crc_txhdr(struct siw_iwarp_tx *ctx)
+{
+	crypto_hash_init(&ctx->mpa_crc_hd);
+	return siw_crc_array(&ctx->mpa_crc_hd, (u8 *)&ctx->pkt,
+			     ctx->ctrl_len);
+}
+
+#define PKT_FRAGMENTED 1
+#define PKT_COMPLETE 0
+
+/*
+ * siw_qp_prepare_tx()
+ *
+ * Prepare tx state for sending out one fpdu. Builds complete pkt
+ * if no user data or only immediate data are present.
+ *
+ * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise.
+ */
+static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
+{
+	struct siw_wqe		*wqe = c_tx->wqe;
+	u32			*crc = NULL;
+
+	dprint(DBG_TX, "(QP%d):\n", TX_QPID(c_tx));
+
+	switch (wr_type(wqe)) {
+
+	case SIW_WR_RDMA_READ_REQ:
+		memcpy(&c_tx->pkt.ctrl,
+		       &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
+		       sizeof(struct iwarp_ctrl));
+
+		c_tx->pkt.rreq.rsvd = 0;
+		c_tx->pkt.rreq.ddp_qn = htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
+		c_tx->pkt.rreq.ddp_msn =
+			htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]);
+		c_tx->pkt.rreq.ddp_mo = 0;
+		c_tx->pkt.rreq.sink_stag = htonl(wqe->wr.rread.sge[0].lkey);
+		c_tx->pkt.rreq.sink_to =
+			cpu_to_be64(wqe->wr.rread.sge[0].addr); /* abs addr! */
+		c_tx->pkt.rreq.source_stag = htonl(wqe->wr.rread.rtag);
+		c_tx->pkt.rreq.source_to = cpu_to_be64(wqe->wr.rread.raddr);
+		c_tx->pkt.rreq.read_size = htonl(wqe->bytes);
+
+		dprint(DBG_TX, ": RREQ: Sink: %x, 0x%016llx\n",
+			wqe->wr.rread.sge[0].lkey, wqe->wr.rread.sge[0].addr);
+
+		c_tx->ctrl_len = sizeof(struct iwarp_rdma_rreq);
+		crc = &c_tx->pkt.rreq_pkt.crc;
+		break;
+
+	case SIW_WR_SEND:
+		if (wr_flags(wqe) & IB_SEND_SOLICITED)
+			memcpy(&c_tx->pkt.ctrl,
+			       &iwarp_pktinfo[RDMAP_SEND_SE].ctrl,
+			       sizeof(struct iwarp_ctrl));
+		else
+			memcpy(&c_tx->pkt.ctrl,
+			       &iwarp_pktinfo[RDMAP_SEND].ctrl,
+			       sizeof(struct iwarp_ctrl));
+
+		c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND;
+		c_tx->pkt.send.ddp_msn =
+			htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
+		c_tx->pkt.send.ddp_mo = 0;
+		c_tx->pkt.send.rsvd = 0;
+
+		c_tx->ctrl_len = sizeof(struct iwarp_send);
+
+		if (!wqe->bytes)
+			crc = &c_tx->pkt.send_pkt.crc;
+		break;
+
+	case SIW_WR_RDMA_WRITE:
+		memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_RDMA_WRITE].ctrl,
+		       sizeof(struct iwarp_ctrl));
+
+		c_tx->pkt.rwrite.sink_stag = htonl(wqe->wr.write.rtag);
+		c_tx->pkt.rwrite.sink_to = cpu_to_be64(wqe->wr.write.raddr);
+		c_tx->ctrl_len = sizeof(struct iwarp_rdma_write);
+
+		if (!wqe->bytes)
+			crc = &c_tx->pkt.write_pkt.crc;
+		break;
+
+	case SIW_WR_RDMA_READ_RESP:
+		memcpy(&c_tx->pkt.ctrl,
+		       &iwarp_pktinfo[RDMAP_RDMA_READ_RESP].ctrl,
+		       sizeof(struct iwarp_ctrl));
+
+		/* NBO */
+		c_tx->pkt.rresp.sink_stag = wqe->wr.rresp.rtag;
+		c_tx->pkt.rresp.sink_to = cpu_to_be64(wqe->wr.rresp.raddr);
+
+		c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp);
+
+		dprint(DBG_TX, ": RRESP: Sink: %x, 0x%016llx\n",
+			wqe->wr.rresp.rtag, wqe->wr.rresp.raddr);
+
+		if (!wqe->bytes)
+			crc = &c_tx->pkt.rresp_pkt.crc;
+		break;
+
+	default:
+		dprint(DBG_ON, "Unsupported WQE type %d\n", wr_type(wqe));
+		BUG();
+		break;
+	}
+	c_tx->ctrl_sent = 0;
+	c_tx->sge_idx = 0;
+	c_tx->sge_off = 0;
+	c_tx->pg_idx = 0;
+	c_tx->umem_chunk = NULL;
+
+	/*
+	 * Do complete CRC if enabled and short packet
+	 */
+	if (crc) {
+		*crc = 0;
+		if (c_tx->crc_enabled) {
+			if (siw_crc_txhdr(c_tx) != 0)
+				return -EINVAL;
+			crypto_hash_final(&c_tx->mpa_crc_hd, (u8 *)crc);
+		}
+	}
+	c_tx->ctrl_len += MPA_CRC_SIZE;
+
+	/*
+	 * Allow direct sending out of user buffer if WR is non signalled
+	 * and payload is over threshold and no CRC is enabled.
+	 * Per RDMA verbs, the application should not change the send buffer
+	 * until the work completed. In iWarp, work completion is only
+	 * local delivery to TCP. TCP may reuse the buffer for
+	 * retransmission or may even did not yet sent the data. Changing
+	 * unsent data also breaks the CRC, if applied.
+	 */
+	if (zcopy_tx &&
+	     !(wr_flags(wqe) & IB_SEND_SIGNALED) &&
+	     wqe->bytes > SENDPAGE_THRESH &&
+	     wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
+		c_tx->use_sendpage = 1;
+	else
+		c_tx->use_sendpage = 0;
+
+	return crc == NULL ? PKT_FRAGMENTED : PKT_COMPLETE;
+}
+
+/*
+ * Send out one complete FPDU. Used for fixed sized packets like
+ * Read Requests or zero length SENDs, WRITEs, READ.responses.
+ * Also used for pushing an FPDU hdr only.
+ */
+static inline int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
+			      int flags)
+{
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov = {
+		.iov_base = (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent,
+		.iov_len = c_tx->ctrl_len - c_tx->ctrl_sent};
+
+	int rv = kernel_sendmsg(s, &msg, &iov, 1,
+				c_tx->ctrl_len - c_tx->ctrl_sent);
+
+	dprint(DBG_TX, " (QP%d): op=%d, %d of %d sent (%d)\n",
+		TX_QPID(c_tx), c_tx->pkt.ctrl.opcode,
+		c_tx->ctrl_sent + rv, c_tx->ctrl_len, rv);
+
+	if (rv >= 0) {
+		c_tx->ctrl_sent += rv;
+
+		if (c_tx->ctrl_sent == c_tx->ctrl_len) {
+			siw_dprint_hdr(&c_tx->pkt.hdr, TX_QPID(c_tx),
+					"CTRL sent");
+			if (!(flags & MSG_MORE))
+				c_tx->new_tcpseg = 1;
+			rv = 0;
+		} else if (c_tx->ctrl_sent < c_tx->ctrl_len)
+			rv = -EAGAIN;
+		else
+			BUG();
+	}
+	return rv;
+}
+
+/*
+ * 0copy TCP transmit interface.
+ *
+ * Push page array page by page or in one shot.
+ * Pushing the whole page array requires the inner do_tcp_sendpages
+ * function to be exported by the kernel.
+ */
+static int siw_tcp_sendpages(struct socket *s, struct page **page,
+			     int offset, size_t size)
+{
+	int rv = 0;
+
+#ifdef SIW_SENDPAGES_EXPORT
+	struct sock *sk = s->sk;
+
+	if (!(sk->sk_route_caps & NETIF_F_SG) ||
+	    !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) {
+		/* FIXME:
+		 * This should also be handled in a
+		 * loop
+		 */
+		return -EFAULT;
+	}
+
+	lock_sock(sk);
+	TCP_CHECK_TIMER(sk);
+
+	/*
+	 * just return what sendpages has return
+	 */
+	rv = do_tcp_sendpages(sk, page, offset, size, MSG_MORE|MSG_DONTWAIT);
+
+	TCP_CHECK_TIMER(sk);
+	release_sock(sk);
+	if (rv == -EAGAIN)
+		rv = 0;
+#else
+	/*
+	 * If do_tcp_sendpages() function is not exported
+	 * push page by page
+	 */
+	size_t todo = size;
+	int i;
+
+	for (i = 0; size > 0; i++) {
+		size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
+
+		rv = s->ops->sendpage(s, page[i], offset, bytes,
+				      MSG_MORE|MSG_DONTWAIT);
+		if (rv <= 0)
+			break;
+
+		size -= rv;
+
+		if (rv != bytes)
+			break;
+
+		offset = 0;
+	}
+	if (rv >= 0 || rv == -EAGAIN)
+		rv = todo - size;
+#endif
+	return rv;
+}
+
+/*
+ * siw_0copy_tx()
+ *
+ * Pushes list of pages to TCP socket. If pages from multiple
+ * SGE's, all referenced pages of each SGE are pushed in one
+ * shot.
+ */
+static int siw_0copy_tx(struct socket *s, struct page **page,
+			struct siw_sge *sge, unsigned int offset,
+			unsigned int size)
+{
+	int i = 0, sent = 0, rv;
+	int sge_bytes = min(sge->len - offset, size);
+
+	offset  = (sge->addr + offset) & ~PAGE_MASK;
+
+	while (sent != size) {
+
+		rv = siw_tcp_sendpages(s, &page[i], offset, sge_bytes);
+		if (rv >= 0) {
+			sent += rv;
+			if (size == sent || sge_bytes > rv)
+				break;
+
+			i += PAGE_ALIGN(sge_bytes + offset) >> PAGE_SHIFT;
+			sge++;
+			sge_bytes = min(sge->len, size - sent);
+			offset = sge->addr & ~PAGE_MASK;
+		} else {
+			sent = rv;
+			break;
+		}
+	}
+	return sent;
+}
+
+/*
+ * siw_tx_umem_init()
+ *
+ * Resolve memory chunk and update page index pointer
+ *
+ * @chunk:	Umem Chunk to be updated
+ * @p_idx	Page Index to be updated
+ * @mr:		Memory Region
+ * @va:		Virtual Address within MR
+ *
+ */
+static void siw_tx_umem_init(struct ib_umem_chunk **chunk, int *page_index,
+			     struct siw_mr *mr, u64 va)
+{
+	struct ib_umem_chunk *cp;
+	int p_ix;
+
+	BUG_ON(va < mr->mem.va);
+	va -= mr->mem.va & PAGE_MASK;
+	/*
+	 * equivalent to
+	 * va += mr->umem->offset;
+	 * va = va >> PAGE_SHIFT;
+	 */
+
+	p_ix = va >> PAGE_SHIFT;
+
+	list_for_each_entry(cp, &mr->umem->chunk_list, list) {
+		if (p_ix < cp->nents)
+			break;
+		p_ix -= cp->nents;
+	}
+	BUG_ON(p_ix >= cp->nents);
+
+	dprint(DBG_MM, "(): New chunk 0x%p: Page idx %d, nents %d\n",
+		cp, p_ix, cp->nents);
+
+	*chunk = cp;
+	*page_index = p_ix;
+
+	return;
+}
+
+/*
+ * update memory chunk and page index from given starting point
+ * before current transmit described by: c_tx->sge_off,
+ * sge->addr, c_tx->pg_idx, and c_tx->umem_chunk
+ */
+static inline void
+siw_umem_chunk_update(struct siw_iwarp_tx *c_tx, struct siw_mr *mr,
+		      struct siw_sge *sge, unsigned int off)
+{
+	struct ib_umem_chunk *chunk = c_tx->umem_chunk;
+	u64 va_start = sge->addr + c_tx->sge_off;
+
+	off += (unsigned int)(va_start & ~PAGE_MASK); /* + first page offset */
+	off >>= PAGE_SHIFT; 	/* bytes offset becomes pages offset */
+
+	list_for_each_entry_from(chunk, &mr->umem->chunk_list, list) {
+		if (c_tx->pg_idx + off < chunk->nents)
+			break;
+		off -= chunk->nents - c_tx->pg_idx;
+		c_tx->pg_idx = 0;
+	}
+	c_tx->pg_idx += off;
+
+	c_tx->umem_chunk = chunk;
+}
+
+#define MAX_TRAILER 8
+#define MAX_ARRAY 130	/* Max number of kernel_sendmsg elements */
+
+static inline void
+siw_save_txstate(struct siw_iwarp_tx *c_tx, struct ib_umem_chunk *chunk,
+		 unsigned int pg_idx, unsigned int sge_idx,
+		 unsigned int sge_off)
+{
+	c_tx->umem_chunk = chunk;
+	c_tx->pg_idx = pg_idx;
+	c_tx->sge_idx = sge_idx;
+	c_tx->sge_off = sge_off;
+}
+/*
+ * Write out iov referencing hdr, data and trailer of current FPDU.
+ * Update transmit state dependent on write return status
+ */
+static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
+{
+	struct siw_wqe		*wqe = c_tx->wqe;
+	struct siw_sge		*sge = &wqe->wr.sgl.sge[c_tx->sge_idx],
+				*first_sge = sge;
+	struct siw_mr		*mr = siw_mem2mr(sge->mem.obj);
+	struct ib_umem_chunk 	*chunk = c_tx->umem_chunk;
+
+	struct kvec		iov[MAX_ARRAY];
+	struct page 		*page_array[MAX_ARRAY];
+	struct msghdr		msg = {.msg_flags = MSG_DONTWAIT};
+
+	int			seg = 0, do_crc = c_tx->do_crc, kbuf = 0,
+				rv;
+	unsigned int		data_len = c_tx->bytes_unsent,
+				hdr_len = 0,
+				trl_len = 0,
+				sge_off = c_tx->sge_off,
+				sge_idx = c_tx->sge_idx,
+				pg_idx = c_tx->pg_idx;
+
+	if (SIW_INLINED_DATA(wqe)) {
+		kbuf = 1;
+		chunk = 0;
+	}
+
+	if (c_tx->state == SIW_SEND_HDR) {
+		if (c_tx->use_sendpage) {
+			rv = siw_tx_ctrl(c_tx, s, MSG_DONTWAIT|MSG_MORE);
+			if (rv)
+				goto done;
+
+			c_tx->state = SIW_SEND_DATA;
+		} else {
+			iov[0].iov_base =
+				(char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent;
+			iov[0].iov_len = hdr_len =
+				c_tx->ctrl_len - c_tx->ctrl_sent;
+			seg = 1;
+			siw_dprint_hdr(&c_tx->pkt.hdr, TX_QPID(c_tx),
+					"HDR to send: ");
+		}
+	}
+
+	wqe->processed += data_len;
+
+	while (data_len) { /* walk the list of SGE's */
+		unsigned int sge_len = min(sge->len - sge_off, data_len);
+		unsigned int fp_off = (sge->addr + sge_off) & ~PAGE_MASK;
+
+		BUG_ON(!sge_len);
+
+		if (kbuf) {
+			/*
+			 * In kernel buffers to be tx'ed.
+			 */
+			iov[seg].iov_base =
+				(void *)(unsigned long)(sge->addr + sge_off);
+			iov[seg].iov_len = sge_len;
+			if (do_crc)
+				siw_crc_array(&c_tx->mpa_crc_hd,
+					      iov[seg].iov_base, sge_len);
+			sge_off += sge_len;
+			data_len -= sge_len;
+			seg++;
+			goto sge_done;
+		}
+		while (sge_len) {
+			struct scatterlist *sl;
+			size_t plen;
+
+			if (!chunk) {
+				mr = siw_mem2mr(sge->mem.obj);
+				siw_tx_umem_init(&chunk, &pg_idx, mr,
+						 sge->addr + sge_off);
+
+				if (!c_tx->umem_chunk)
+					/* Starting first tx for this WQE */
+					siw_save_txstate(c_tx, chunk, pg_idx,
+							 sge_idx, sge_off);
+			}
+			sl = &chunk->page_list[pg_idx];
+			plen = min((int)PAGE_SIZE - fp_off, sge_len);
+
+			BUG_ON(plen <= 0);
+
+			page_array[seg] = sg_page(sl);
+
+			if (!c_tx->use_sendpage) {
+				iov[seg].iov_base = kmap(sg_page(sl)) + fp_off;
+				iov[seg].iov_len = plen;
+			}
+			if (do_crc)
+				siw_crc_sg(&c_tx->mpa_crc_hd, sl, fp_off, plen);
+
+			sge_len -= plen;
+			sge_off += plen;
+			data_len -= plen;
+
+			if (plen + fp_off == PAGE_SIZE &&
+			    sge_off < sge->len && ++pg_idx == chunk->nents) {
+				chunk = mem_chunk_next(chunk);
+				pg_idx = 0;
+			}
+			fp_off = 0;
+			if (++seg > MAX_ARRAY) {
+				dprint(DBG_ON, "(QP%d): Too many fragments\n",
+				       TX_QPID(c_tx));
+				if (!kbuf) {
+					int i = (hdr_len > 0) ? 1 : 0;
+					seg--;
+					while (i < seg)
+						kunmap(page_array[i++]);
+				}
+				wqe->processed = 0;
+				rv = -EINVAL;
+				goto done_crc;
+			}
+		}
+sge_done:
+		/* Update SGE variables at end of SGE */
+		if (sge_off == sge->len && wqe->processed < wqe->bytes) {
+			sge_idx++;
+			sge++;
+			sge_off = 0;
+			chunk = NULL;
+		}
+	}
+	/* trailer */
+	if (likely(c_tx->state != SIW_SEND_TRAILER)) {
+		iov[seg].iov_base = &c_tx->trailer.pad[4 - c_tx->pad];
+		iov[seg].iov_len = trl_len = MAX_TRAILER - (4 - c_tx->pad);
+	} else {
+		iov[seg].iov_base = &c_tx->trailer.pad[c_tx->ctrl_sent];
+		iov[seg].iov_len = trl_len = MAX_TRAILER - c_tx->ctrl_sent;
+	}
+
+	if (c_tx->pad) {
+		*(u32 *)c_tx->trailer.pad = 0;
+		if (do_crc)
+			siw_crc_array(&c_tx->mpa_crc_hd,
+				      (u8 *)&c_tx->trailer.crc - c_tx->pad,
+				      c_tx->pad);
+	}
+	if (!c_tx->crc_enabled)
+		c_tx->trailer.crc = 0;
+	else if (do_crc)
+		crypto_hash_final(&c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc);
+
+	data_len = c_tx->bytes_unsent;
+
+	if (c_tx->tcp_seglen >= (int)MPA_MIN_FRAG && TX_MORE_WQE(TX_QP(c_tx))) {
+		msg.msg_flags |= MSG_MORE;
+		c_tx->new_tcpseg = 0;
+	} else
+		c_tx->new_tcpseg = 1;
+
+	if (c_tx->use_sendpage) {
+		rv = siw_0copy_tx(s, page_array, first_sge, c_tx->sge_off,
+				  data_len);
+		if (rv == data_len) {
+			rv = kernel_sendmsg(s, &msg, &iov[seg], 1, trl_len);
+			if (rv > 0)
+				rv += data_len;
+			else
+				rv = data_len;
+		}
+	} else {
+		rv = kernel_sendmsg(s, &msg, iov, seg + 1,
+				    hdr_len + data_len + trl_len);
+		if (!kbuf) {
+			int i = (hdr_len > 0) ? 1 : 0;
+			while (i < seg)
+				kunmap(page_array[i++]);
+		}
+	}
+	if (rv < (int)hdr_len) {
+		/* Not even complete hdr pushed or negative rv */
+		wqe->processed -= data_len;
+		if (rv >= 0) {
+			c_tx->ctrl_sent += rv;
+			rv = -EAGAIN;
+		}
+		goto done_crc;
+	}
+
+	rv -= hdr_len;
+
+	if (rv >= (int)data_len) {
+		/* all user data pushed to TCP or no data to push */
+		if (data_len > 0 && wqe->processed < wqe->bytes)
+			/* Save the current state for next tx */
+			siw_save_txstate(c_tx, chunk, pg_idx, sge_idx, sge_off);
+
+		rv -= data_len;
+
+		if (rv == trl_len) /* all pushed */
+			rv = 0;
+		else {
+			c_tx->state = SIW_SEND_TRAILER;
+			c_tx->ctrl_len = MAX_TRAILER;
+			c_tx->ctrl_sent = rv + 4 - c_tx->pad;
+			c_tx->bytes_unsent = 0;
+			rv = -EAGAIN;
+		}
+
+	} else if (data_len > 0) {
+		/* Maybe some user data pushed to TCP */
+		c_tx->state = SIW_SEND_DATA;
+		wqe->processed -= data_len - rv;
+
+		if (rv) {
+			/*
+			 * Some bytes out. Recompute tx state based
+			 * on old state and bytes pushed
+			 */
+			c_tx->bytes_unsent -= rv;
+			sge = &wqe->wr.sgl.sge[c_tx->sge_idx];
+
+			if (c_tx->sge_idx == sge_idx && c_tx->umem_chunk)
+				/*
+				 * same SGE as starting SGE for this FPDU
+				 */
+				siw_umem_chunk_update(c_tx, mr, sge, rv);
+			else {
+				while (sge->len <= c_tx->sge_off + rv) {
+					rv -= sge->len - c_tx->sge_off;
+					sge = &wqe->wr.sgl.sge[++c_tx->sge_idx];
+					c_tx->sge_off = 0;
+				}
+				c_tx->umem_chunk = NULL;
+			}
+			c_tx->sge_off += rv;
+			BUG_ON(c_tx->sge_off >= sge->len);
+		}
+		rv = -EAGAIN;
+	}
+done_crc:
+	c_tx->do_crc = 0;
+done:
+	return rv;
+}
+
+static void siw_calculate_tcpseg(struct siw_iwarp_tx *c_tx, struct socket *s)
+{
+	/*
+	 * refresh TCP segement len if we start a new segment or
+	 * remaining segment len is less than MPA_MIN_FRAG or
+	 * the socket send buffer is empty.
+	 */
+	if (c_tx->new_tcpseg || c_tx->tcp_seglen < (int)MPA_MIN_FRAG ||
+	     !tcp_send_head(s->sk))
+
+		c_tx->tcp_seglen = get_tcp_mss(s->sk);
+}
+
+
+/*
+ * siw_unseg_txlen()
+ *
+ * Compute complete tcp payload len if packet would not
+ * get fragmented
+ */
+static inline int siw_unseg_txlen(struct siw_iwarp_tx *c_tx)
+{
+	int pad = c_tx->bytes_unsent ? -c_tx->bytes_unsent & 0x3 : 0;
+
+	return c_tx->bytes_unsent + c_tx->ctrl_len + pad + MPA_CRC_SIZE;
+}
+
+
+/*
+ * siw_prepare_fpdu()
+ *
+ * Prepares transmit context to send out one FPDU if FPDU will contain
+ * user data and user data are not immediate data.
+ * Checks and locks involved memory segments of data to be sent.
+ * Computes maximum FPDU length to fill up TCP MSS if possible.
+ *
+ * @qp:		QP from which to transmit
+ * @wqe:	Current WQE causing transmission
+ *
+ * TODO: Take into account real available sendspace on socket
+ *       to avoid header misalignment due to send pausing within
+ *       fpdu transmission
+ */
+int siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+	struct siw_iwarp_tx	*c_tx  = &qp->tx_ctx;
+	int			rv = 0;
+
+	/*
+	 * TODO: TCP Fragmentation dynamics needs for further investigation.
+	 * 	 Resuming SQ processing may start with full-sized packet
+	 *	 or short packet which resets MSG_MORE and thus helps
+	 *	 to synchronize.
+	 *	 This version resumes with short packet.
+	 */
+	c_tx->ctrl_len = iwarp_pktinfo[c_tx->pkt.ctrl.opcode].hdr_len;
+	c_tx->ctrl_sent = 0;
+
+	/*
+	 * Update target buffer offset if any
+	 */
+	if (!c_tx->pkt.ctrl.t) {
+		/* Untagged message */
+		c_tx->pkt.c_untagged.ddp_mo = cpu_to_be32(wqe->processed);
+	} else {
+		/* Tagged message */
+		if (wr_type(wqe) == SIW_WR_RDMA_READ_RESP) {
+			c_tx->pkt.c_tagged.ddp_to =
+			    cpu_to_be64(wqe->wr.rresp.raddr + wqe->processed);
+		} else {
+			c_tx->pkt.c_tagged.ddp_to =
+			    cpu_to_be64(wqe->wr.write.raddr + wqe->processed);
+		}
+	}
+
+	/* First guess: one big unsegmented DDP segment */
+	c_tx->bytes_unsent = wqe->bytes - wqe->processed;
+	c_tx->tcp_seglen -= siw_unseg_txlen(c_tx);
+
+	if (c_tx->tcp_seglen >= 0) {
+		/* Whole DDP segment fits into current TCP segment */
+		c_tx->pkt.ctrl.l = 1;
+		c_tx->pad = -c_tx->bytes_unsent & 0x3;
+	} else {
+		/* Trim DDP payload to fit into current TCP segment */
+		c_tx->bytes_unsent += c_tx->tcp_seglen;
+		c_tx->bytes_unsent &= ~0x3;
+		c_tx->pad = 0;
+		c_tx->pkt.ctrl.l = 0;
+	}
+	c_tx->pkt.ctrl.mpa_len =
+		htons(c_tx->ctrl_len + c_tx->bytes_unsent - MPA_HDR_SIZE);
+
+#ifdef SIW_TX_FULLSEGS
+	c_tx->fpdu_len =
+		c_tx->ctrl_len + c_tx->bytes_unsent + c_tx->pad + MPA_CRC_SIZE;
+#endif
+	/*
+	 * Init MPA CRC computation
+	 */
+	if (c_tx->crc_enabled) {
+		siw_crc_txhdr(c_tx);
+		c_tx->do_crc = 1;
+	}
+	if (c_tx->bytes_unsent && !SIW_INLINED_DATA(wqe)) {
+		struct siw_sge	*sge = &wqe->wr.sgl.sge[c_tx->sge_idx];
+		/*
+		 * Reference memory to be tx'd
+		 */
+		BUG_ON(c_tx->sge_idx > wqe->wr.sgl.num_sge - 1);
+
+		if (wr_type(wqe) != SIW_WR_RDMA_READ_RESP)
+			rv = siw_check_sgl(qp->pd, sge, SR_MEM_LREAD,
+					   c_tx->sge_off, c_tx->bytes_unsent);
+		else
+			rv = siw_check_sge(qp->pd, sge, SR_MEM_RREAD,
+					   c_tx->sge_off, c_tx->bytes_unsent);
+	}
+	return rv;
+}
+
+#ifdef SIW_TX_FULLSEGS
+static inline int siw_test_wspace(struct socket *s, struct siw_iwarp_tx *c_tx)
+{
+	struct sock *sk = s->sk;
+	int rv = 0;
+
+	lock_sock(sk);
+	if (sk_stream_wspace(sk) < (int)c_tx->fpdu_len) {
+		set_bit(SOCK_NOSPACE, &s->flags);
+		rv = -EAGAIN;
+	}
+	release_sock(sk);
+
+	return rv;
+}
+#endif
+/*
+ * siw_qp_sq_proc_tx()
+ *
+ * Process one WQE which needs transmission on the wire.
+ * Return with:
+ *	-EAGAIN, if handover to tcp remained incomplete
+ *	0,	 if handover to tcp complete
+ *	< 0,	 if other errors happend.
+ *
+ * @qp:		QP to send from
+ * @wqe:	WQE causing transmission
+ */
+static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+	struct siw_iwarp_tx	*c_tx = &qp->tx_ctx;
+	struct socket	 	*s = qp->attrs.llp_stream_handle;
+	int			rv = 0;
+
+
+	if (wqe->wr_status == SR_WR_QUEUED) {
+		wqe->wr_status = SR_WR_INPROGRESS;
+
+		siw_calculate_tcpseg(c_tx, s);
+
+		rv = siw_qp_prepare_tx(c_tx);
+		if (rv == PKT_FRAGMENTED) {
+			c_tx->state = SIW_SEND_HDR;
+			rv = siw_prepare_fpdu(qp, wqe);
+			if (rv)
+				return rv;
+		} else if (rv == PKT_COMPLETE)
+			c_tx->state = SIW_SEND_SHORT_FPDU;
+		else
+			goto tx_done;
+	}
+next_segment:
+#ifdef SIW_TX_FULLSEGS
+	rv = siw_test_wspace(s, c_tx);
+	if (rv < 0)
+		goto tx_done;
+#endif
+
+	if (c_tx->state == SIW_SEND_SHORT_FPDU) {
+		enum siw_wr_opcode tx_type = wr_type(wqe);
+
+		/*
+		 * Always end current TCP segment (no MSG_MORE flag):
+		 * trying to fill segment would result in excessive delay.
+		 */
+		rv = siw_tx_ctrl(c_tx, s, MSG_DONTWAIT);
+
+		if (!rv && tx_type != SIW_WR_RDMA_READ_REQ)
+			wqe->processed = wqe->bytes;
+
+		goto tx_done;
+
+	} else
+		rv = siw_tx_hdt(c_tx, s);
+
+	if (!rv) {
+		/* Verbs, 6.4.: Try stopping sending after a full DDP segment
+		 * if the connection goes down (== peer halfclose)
+		 */
+		if (unlikely(c_tx->tx_suspend)) {
+			rv = -ECONNABORTED;
+			goto tx_done;
+		}
+		/*
+		 * One segment sent. Processing completed if last segment.
+		 * Do next segment otherwise. Stop if tx error.
+		 */
+		if (c_tx->pkt.ctrl.l == 1) {
+			dprint(DBG_TX, "(QP%d): WR completed\n", QP_ID(qp));
+			goto tx_done;
+		}
+		c_tx->state = SIW_SEND_HDR;
+
+		siw_calculate_tcpseg(c_tx, s);
+
+		rv = siw_prepare_fpdu(qp, wqe);
+		if (!rv)
+			goto next_segment;
+	}
+tx_done:
+	return rv;
+}
+
+
+/*
+ * siw_wqe_sq_processed()
+ *
+ * Called after WQE processing completed.
+ * If WQE is not of signalled typ, it can be released.
+ * If the ORQ is empty, a signalled WQE is attached to the CQ.
+ * Otherwise, it is appended to the end of the ORQ for later
+ * completion. To keep WQE ordering, the ORQ is always consumed FIFO.
+ */
+static void siw_wqe_sq_processed(struct siw_wqe *wqe, struct siw_qp *qp)
+{
+	unsigned long flags;
+	LIST_HEAD(c_list);
+
+	if (!(wr_flags(wqe) & IB_SEND_SIGNALED)) {
+		atomic_inc(&qp->sq_space);
+		siw_wqe_put(wqe);
+		return;
+	}
+	lock_orq_rxsave(qp, flags);
+
+	if (ORQ_EMPTY(qp)) {
+		unlock_orq_rxsave(qp, flags);
+		dprint(DBG_WR|DBG_TX,
+			"(QP%d): Immediate completion, wr_type %d\n",
+			QP_ID(qp), wr_type(wqe));
+		list_add_tail(&wqe->list, &c_list);
+		siw_sq_complete(&c_list, qp, 1, wr_flags(wqe));
+	} else {
+		list_add_tail(&wqe->list, &qp->orq);
+		dprint(DBG_WR|DBG_TX,
+			"(QP%d): Defer completion, wr_type %d\n",
+			QP_ID(qp), wr_type(wqe));
+	}
+}
+
+int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+	printk(KERN_ERR "local WR's not yet implemented\n");
+	BUG();
+	return 0;
+}
+
+
+/*
+ * siw_qp_sq_process()
+ *
+ * Core TX path routine for RDMAP/DDP/MPA using a TCP kernel socket.
+ * Sends RDMAP payload for the current SQ WR @wqe of @qp in one or more
+ * MPA FPDUs, each containing a DDP segment.
+ *
+ * SQ processing may occur in user context as a result of posting
+ * new WQE's or from siw_sq_work_handler() context.
+ *
+ * SQ processing may get paused anytime, possibly in the middle of a WR
+ * or FPDU, if insufficient send space is available. SQ processing
+ * gets resumed from siw_sq_work_handler(), if send space becomes
+ * available again.
+ *
+ * Must be called with the QP state read-locked.
+ *
+ * TODO:
+ * To be solved more seriously: an outbound RREQ can be satisfied
+ * by the corresponding RRESP _before_ it gets assigned to the ORQ.
+ * This happens regularly in RDMA READ via loopback case. Since both
+ * outbound RREQ and inbound RRESP can be handled by the same CPU
+ * locking the ORQ is dead-lock prone and thus not an option.
+ * Tentatively, the RREQ gets assigned to the ORQ _before_ being
+ * sent (and pulled back in case of send failure).
+ */
+int siw_qp_sq_process(struct siw_qp *qp, int user_ctx)
+{
+	struct siw_wqe		*wqe;
+	enum siw_wr_opcode	tx_type;
+	unsigned long		flags;
+	int			rv = 0;
+	int			max_burst;
+
+	if (user_ctx)
+		max_burst = SQ_USER_MAXBURST;
+	else
+		max_burst = max(qp->attrs.sq_size, qp->attrs.ird);
+
+	atomic_inc(&qp->tx_ctx.in_use);
+
+	wait_event(qp->tx_ctx.waitq, atomic_read(&qp->tx_ctx.in_use) == 1);
+
+	wqe = tx_wqe(qp);
+	BUG_ON(wqe == NULL);
+
+next_wqe:
+	/*
+	 * Stop QP processing if SQ state changed
+	 */
+	if (unlikely(qp->tx_ctx.tx_suspend)) {
+		dprint(DBG_WR|DBG_TX, "(QP%d): tx suspend\n", QP_ID(qp));
+		goto done;
+	}
+	tx_type = wr_type(wqe);
+
+	dprint(DBG_WR|DBG_TX,
+		" QP(%d): WR type %d, state %d, data %u, sent %u, id %llu\n",
+		QP_ID(qp), wr_type(wqe), wqe->wr_status, wqe->bytes,
+		wqe->processed, (unsigned long long)wr_id(wqe));
+
+	if (SIW_WQE_IS_TX(wqe))
+		rv = siw_qp_sq_proc_tx(qp, wqe);
+	else
+		rv = siw_qp_sq_proc_local(qp, wqe);
+
+	if (!rv) {
+		/*
+		 * WQE processing done
+		 */
+		switch (tx_type) {
+
+		case SIW_WR_SEND:
+		case SIW_WR_RDMA_WRITE:
+
+			wqe->wc_status = IB_WC_SUCCESS;
+			wqe->wr_status = SR_WR_DONE;
+			siw_wqe_sq_processed(wqe, qp);
+			break;
+
+		case SIW_WR_RDMA_READ_REQ:
+			/*
+			 * already enqueued to ORQ queue
+			 */
+			break;
+
+		case SIW_WR_RDMA_READ_RESP:
+			/*
+			 * silently recyclye wqe
+			 */
+			/* XXX DEBUG AID, please remove */
+			wqe->wr_status = SR_WR_DONE;
+			siw_wqe_put(wqe);
+			break;
+		default:
+			BUG();
+		}
+
+		lock_sq_rxsave(qp, flags);
+
+		wqe = siw_next_tx_wqe(qp);
+		if (!wqe) {
+			tx_wqe(qp) = NULL;
+			unlock_sq_rxsave(qp, flags);
+			goto done;
+		}
+		if (wr_type(wqe) == SIW_WR_RDMA_READ_REQ) {
+			if (ORD_SUSPEND_SQ(qp)) {
+				tx_wqe(qp) = NULL;
+				unlock_sq_rxsave(qp, flags);
+				dprint(DBG_WR|DBG_TX,
+					" QP%d PAUSE SQ: ORD limit\n",
+					QP_ID(qp));
+				goto done;
+			} else {
+				tx_wqe(qp) = wqe;
+				siw_rreq_queue(wqe, qp);
+			}
+		} else  {
+			list_del_init(&wqe->list);
+			tx_wqe(qp) = wqe;
+		}
+		unlock_sq_rxsave(qp, flags);
+
+		if (--max_burst == 0) {
+			if (user_ctx) {
+				/*
+				 * Avoid to keep the user sending from its
+				 * context for too long (blocking user thread)
+				 */
+				siw_sq_queue_work(qp);
+				goto done;
+			} else {
+				/*
+				 * Avoid to starve other QP's tx if consumer
+				 * keeps posting new tx work for current cpu.
+				 */
+				int workq_len =
+				    atomic_read(&get_cpu_var(siw_workq_len));
+
+				put_cpu_var(siw_workq_len);
+
+				if (workq_len) {
+					/* Another QP's work on same WQ */
+					siw_sq_queue_work(qp);
+					goto done;
+				}
+			}
+			max_burst = max(qp->attrs.sq_size, qp->attrs.ird);
+		}
+		goto next_wqe;
+
+	} else if (rv == -EAGAIN) {
+		dprint(DBG_WR|DBG_TX,
+			"(QP%d): SQ paused: hd/tr %d of %d, data %d\n",
+			QP_ID(qp), qp->tx_ctx.ctrl_sent, qp->tx_ctx.ctrl_len,
+			qp->tx_ctx.bytes_unsent);
+		rv = 0;
+		goto done;
+	} else {
+		/*
+		 * WQE processing failed.
+		 * Verbs 8.3.2:
+		 * o It turns any WQE into a signalled WQE.
+		 * o Local catastrophic error must be surfaced
+		 * o QP must be moved into Terminate state: done by code
+		 *   doing socket state change processing
+		 *
+		 * o TODO: Termination message must be sent.
+		 * o TODO: Implement more precise work completion errors,
+		 *         see enum ib_wc_status in ib_verbs.h
+		 */
+		dprint(DBG_ON, " (QP%d): WQE type %d processing failed: %d\n",
+				QP_ID(qp), wr_type(wqe), rv);
+
+		lock_sq_rxsave(qp, flags);
+		/*
+		 * RREQ may have already been completed by inbound RRESP!
+		 */
+		if (tx_type == RDMAP_RDMA_READ_REQ) {
+			lock_orq(qp);
+			if (!ORQ_EMPTY(qp) &&
+			    wqe == list_entry_wqe(qp->orq.prev)) {
+				/*
+				 * wqe still on the ORQ
+				 * TODO: fix a potential race condition if the
+				 * rx path is currently referencing the wqe(!)
+				 */
+				dprint(DBG_ON, " (QP%d): Bad RREQ in ORQ\n",
+					QP_ID(qp));
+				list_del_init(&wqe->list);
+				unlock_orq(qp);
+			} else {
+				/*
+				 * already completed by inbound RRESP
+				 */
+				dprint(DBG_ON,
+					" (QP%d): Bad RREQ already Completed\n",
+					QP_ID(qp));
+				unlock_orq(qp);
+				tx_wqe(qp) = NULL;
+				unlock_sq_rxsave(qp, flags);
+
+				goto done;
+			}
+		}
+		tx_wqe(qp) = NULL;
+		unlock_sq_rxsave(qp, flags);
+		/*
+		 * immediately suspends further TX processing
+		 */
+		if (!qp->tx_ctx.tx_suspend)
+			siw_qp_cm_drop(qp, 0);
+
+		switch (tx_type) {
+
+		case SIW_WR_SEND:
+		case SIW_WR_RDMA_WRITE:
+		case SIW_WR_RDMA_READ_REQ:
+			wqe->wr_status = SR_WR_DONE;
+			wqe->wc_status = IB_WC_LOC_QP_OP_ERR;
+			wqe->error = rv;
+			wr_flags(wqe) |= IB_SEND_SIGNALED;
+			if (tx_type != SIW_WR_RDMA_READ_REQ)
+				/*
+				 * RREQ already enqueued to ORQ queue
+				 */
+				siw_wqe_sq_processed(wqe, qp);
+
+			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
+
+			break;
+
+		case SIW_WR_RDMA_READ_RESP:
+			/*
+			 * Recyclye wqe
+			 */
+			dprint(DBG_WR|DBG_TX|DBG_ON, "(QP%d): "
+				   "Processing RRESPONSE failed with %d\n",
+				    QP_ID(qp), rv);
+
+			siw_async_ev(qp, NULL, IB_EVENT_QP_REQ_ERR);
+
+			siw_wqe_put(wqe);
+			break;
+
+		default:
+			BUG();
+		}
+	}
+done:
+	atomic_dec(&qp->tx_ctx.in_use);
+	wake_up(&qp->tx_ctx.waitq);
+
+	return rv;
+}
+
+static struct workqueue_struct *siw_sq_wq;
+
+int __init siw_sq_worker_init(void)
+{
+	siw_sq_wq = create_workqueue("siw_sq_wq");
+	if (!siw_sq_wq)
+		return -ENOMEM;
+
+	dprint(DBG_TX|DBG_OBJ, " Init WQ\n");
+	return 0;
+}
+
+
+void __exit siw_sq_worker_exit(void)
+{
+	dprint(DBG_TX|DBG_OBJ, " Destroy WQ\n");
+	if (siw_sq_wq) {
+		flush_workqueue(siw_sq_wq);
+		destroy_workqueue(siw_sq_wq);
+	}
+}
+
+
+/*
+ * siw_sq_work_handler()
+ *
+ * Scheduled by siw_qp_llp_write_space() socket callback if socket
+ * send space became available again. This function resumes SQ
+ * processing.
+ */
+static void siw_sq_work_handler(struct work_struct *w)
+{
+	struct siw_sq_work	*this_work;
+	struct siw_qp		*qp;
+	int			rv;
+
+	atomic_dec(&get_cpu_var(siw_workq_len));
+	put_cpu_var(siw_workq_len);
+
+	this_work = container_of(w, struct siw_sq_work, work);
+	qp = container_of(this_work, struct siw_qp, sq_work);
+
+	dprint(DBG_TX|DBG_OBJ, "(QP%d)\n", QP_ID(qp));
+
+	if (down_read_trylock(&qp->state_lock)) {
+		if (likely(qp->attrs.state == SIW_QP_STATE_RTS &&
+			   !qp->tx_ctx.tx_suspend)) {
+
+			rv = siw_qp_sq_process(qp, 0);
+			up_read(&qp->state_lock);
+
+			if (rv < 0) {
+				dprint(DBG_TX, "(QP%d): failed: %d\n",
+					QP_ID(qp), rv);
+
+				if (!qp->tx_ctx.tx_suspend)
+					siw_qp_cm_drop(qp, 0);
+			}
+		} else {
+			dprint(DBG_ON|DBG_TX, "(QP%d): state: %d %d\n",
+				QP_ID(qp), qp->attrs.state,
+					qp->tx_ctx.tx_suspend);
+			up_read(&qp->state_lock);
+		}
+	} else {
+		dprint(DBG_ON|DBG_TX, "(QP%d): QP locked\n", QP_ID(qp));
+	}
+	siw_qp_put(qp);
+}
+
+
+int siw_sq_queue_work(struct siw_qp *qp)
+{
+	int cpu, rv;
+
+	dprint(DBG_TX|DBG_OBJ, "(QP%d)\n", QP_ID(qp));
+
+	siw_qp_get(qp);
+
+	INIT_WORK(&qp->sq_work.work, siw_sq_work_handler);
+
+	cpu = get_cpu();
+
+	if (in_softirq()) {
+		if (cpu == qp->cpu) {
+			/*
+			 * Try not to use the current CPU for tx traffic.
+			 */
+			for_each_online_cpu(cpu) {
+				if (cpu != qp->cpu)
+					break;
+			}
+		} else
+			cpu = qp->cpu;
+	}
+	atomic_inc(&per_cpu(siw_workq_len, cpu));
+	rv = queue_work_on(cpu, siw_sq_wq, &qp->sq_work.work);
+	/*
+	 * Remember CPU: Avoid spreading SQ work of QP over WQ's
+	 */
+	qp->cpu = cpu;
+
+	put_cpu();
+
+	return rv;
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_user.h b/drivers/infiniband/hw/softiwarp/siw_user.h
new file mode 100644
index 0000000..ce7857d
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_user.h
@@ -0,0 +1,66 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_USER_H
+#define _SIW_USER_H
+
+/*
+ * user commands/command responses must correlate with the siw_abi
+ * in user land.
+ */
+
+struct siw_uresp_create_cq {
+	__u32	cq_id;
+};
+
+struct siw_uresp_create_qp {
+	__u32	qp_id;
+	__u32	sq_size;
+	__u32	rq_size;
+};
+
+struct siw_uresp_reg_mr {
+	__u32	stag;
+};
+
+struct siw_ureq_reg_mr {
+	__u8	stag_key;
+	__u8	reserved[3];
+};
+
+#endif
diff --git a/drivers/infiniband/hw/softiwarp/siw_verbs.c b/drivers/infiniband/hw/softiwarp/siw_verbs.c
new file mode 100644
index 0000000..238150e
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_verbs.c
@@ -0,0 +1,1569 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR+1] = {
+	[IB_QPS_RESET]	= SIW_QP_STATE_IDLE,
+	[IB_QPS_INIT]	= SIW_QP_STATE_IDLE,
+	[IB_QPS_RTR]	= SIW_QP_STATE_RTR,
+	[IB_QPS_RTS]	= SIW_QP_STATE_RTS,
+	[IB_QPS_SQD]	= SIW_QP_STATE_CLOSING,
+	[IB_QPS_SQE]	= SIW_QP_STATE_TERMINATE,
+	[IB_QPS_ERR]	= SIW_QP_STATE_ERROR
+};
+
+static inline struct siw_mr *siw_mr_ofa2siw(struct ib_mr *ofa_mr)
+{
+	return container_of(ofa_mr, struct siw_mr, ofa_mr);
+}
+
+static inline struct siw_pd *siw_pd_ofa2siw(struct ib_pd *ofa_pd)
+{
+	return container_of(ofa_pd, struct siw_pd, ofa_pd);
+}
+
+static inline struct siw_ucontext *siw_ctx_ofa2siw(
+	struct ib_ucontext *ofa_ctx)
+{
+	return container_of(ofa_ctx, struct siw_ucontext, ib_ucontext);
+}
+
+static inline struct siw_qp *siw_qp_ofa2siw(struct ib_qp *ofa_qp)
+{
+	return container_of(ofa_qp, struct siw_qp, ofa_qp);
+}
+
+static inline struct siw_cq *siw_cq_ofa2siw(struct ib_cq *ofa_cq)
+{
+	return container_of(ofa_cq, struct siw_cq, ofa_cq);
+}
+
+static inline struct siw_srq *siw_srq_ofa2siw(struct ib_srq *ofa_srq)
+{
+	return container_of(ofa_srq, struct siw_srq, ofa_srq);
+}
+
+struct ib_ucontext *siw_alloc_ucontext(struct ib_device *ofa_dev,
+				       struct ib_udata *udata)
+{
+	struct siw_ucontext *ctx;
+
+	dprint(DBG_CM, "(device=%s)\n", ofa_dev->name);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		dprint(DBG_ON, " kzalloc\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	return &ctx->ib_ucontext;
+}
+
+int siw_dealloc_ucontext(struct ib_ucontext *ctx)
+{
+	struct siw_ucontext *ucontext;
+
+	ucontext = siw_ctx_ofa2siw(ctx);
+
+	kfree(ucontext);
+
+	return 0;
+}
+
+int siw_query_device(struct ib_device *ofa_dev, struct ib_device_attr *attr)
+{
+	struct siw_dev *dev = siw_dev_ofa2siw(ofa_dev);
+
+	memset(attr, 0, sizeof *attr);
+
+	attr->max_mr_size = dev->attrs.max_mr_size;
+	attr->vendor_id = dev->attrs.vendor_id;
+	attr->vendor_part_id = dev->attrs.vendor_part_id;
+	attr->max_qp = dev->attrs.max_qp;
+	attr->max_qp_wr = dev->attrs.max_qp_wr;
+
+	/*
+	 * RDMA Read parameters:
+	 * Max. ORD (Outbound Read queue Depth), a.k.a. max_initiator_depth
+	 * Max. IRD (Inbound Read queue Depth), a.k.a. max_responder_resources
+	 */
+	attr->max_qp_rd_atom = dev->attrs.max_ord;
+	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
+	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
+	attr->device_cap_flags = dev->attrs.cap_flags;
+	attr->max_sge = dev->attrs.max_sge;
+	attr->max_sge_rd = dev->attrs.max_sge_rd;
+	attr->max_cq = dev->attrs.max_cq;
+	attr->max_cqe = dev->attrs.max_cqe;
+	attr->max_mr = dev->attrs.max_mr;
+	attr->max_pd = dev->attrs.max_pd;
+	attr->max_mw = dev->attrs.max_mw;
+	attr->max_fmr = dev->attrs.max_fmr;
+	attr->max_srq = dev->attrs.max_srq;
+	attr->max_srq_wr = dev->attrs.max_srq_wr;
+	attr->max_srq_sge = dev->attrs.max_srq_sge;
+
+	memcpy(&attr->sys_image_guid, dev->l2dev->dev_addr, 6);
+
+	/*
+	 * TODO: understand what of the following should
+	 * get useful information
+	 *
+	 * attr->fw_ver;
+	 * attr->max_ah
+	 * attr->max_map_per_fmr
+	 * attr->max_ee
+	 * attr->max_rdd
+	 * attr->max_ee_rd_atom;
+	 * attr->max_ee_init_rd_atom;
+	 * attr->max_raw_ipv6_qp
+	 * attr->max_raw_ethy_qp
+	 * attr->max_mcast_grp
+	 * attr->max_mcast_qp_attach
+	 * attr->max_total_mcast_qp_attach
+	 * attr->max_pkeys
+	 * attr->atomic_cap;
+	 * attr->page_size_cap;
+	 * attr->hw_ver;
+	 * attr->local_ca_ack_delay;
+	 */
+	return 0;
+}
+
+/*
+ * Approximate translation of real MTU for IB.
+ *
+ * TODO: is that needed for RNIC's? We may have a medium
+ *       which reports MTU of 64kb and have to degrade to 4k??
+ */
+static inline enum ib_mtu siw_mtu_net2ofa(unsigned short mtu)
+{
+	if (mtu >= 4096)
+		return IB_MTU_4096;
+	if (mtu >= 2048)
+		return IB_MTU_2048;
+	if (mtu >= 1024)
+		return IB_MTU_1024;
+	if (mtu >= 512)
+		return IB_MTU_512;
+	if (mtu >= 256)
+		return IB_MTU_256;
+	return -1;
+}
+
+int siw_query_port(struct ib_device *ofa_dev, u8 port,
+		     struct ib_port_attr *attr)
+{
+	struct siw_dev *dev = siw_dev_ofa2siw(ofa_dev);
+
+	memset(attr, 0, sizeof *attr);
+	/*
+	 * TODO: fully understand what to do here
+	 */
+	attr->state = IB_PORT_ACTIVE;	/* ?? */
+	attr->max_mtu = siw_mtu_net2ofa(dev->l2dev->mtu);
+	attr->active_mtu = attr->max_mtu;
+	attr->gid_tbl_len = 1;
+	attr->port_cap_flags = IB_PORT_CM_SUP;	/* ?? */
+	attr->port_cap_flags |= IB_PORT_DEVICE_MGMT_SUP;
+	attr->max_msg_sz = -1;
+	attr->pkey_tbl_len = 1;
+	attr->active_width = 2;
+	attr->active_speed = 2;
+	/*
+	 * All zero
+	 *
+	 * attr->lid = 0;
+	 * attr->bad_pkey_cntr = 0;
+	 * attr->qkey_viol_cntr = 0;
+	 * attr->sm_lid = 0;
+	 * attr->lmc = 0;
+	 * attr->max_vl_num = 0;
+	 * attr->sm_sl = 0;
+	 * attr->subnet_timeout = 0;
+	 * attr->init_type_repy = 0;
+	 * attr->phys_state = 0;
+	 */
+	return 0;
+}
+
+int siw_query_pkey(struct ib_device *ofa_dev, u8 port, u16 idx, u16 *pkey)
+{
+	*pkey = 0;
+	return 0;
+}
+
+int siw_query_gid(struct ib_device *ofa_dev, u8 port, int idx,
+		   union ib_gid *gid)
+{
+	struct siw_dev *dev = siw_dev_ofa2siw(ofa_dev);
+
+	/* subnet_prefix == interface_id == 0; */
+	memset(gid, 0, sizeof *gid);
+	memcpy(&gid->raw[0], dev->l2dev->dev_addr, 6);
+
+	return 0;
+}
+
+struct ib_pd *siw_alloc_pd(struct ib_device *ofa_dev,
+			   struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct siw_pd	*pd = NULL;
+	struct siw_dev	*dev   = siw_dev_ofa2siw(ofa_dev);
+	int rv;
+
+	if (atomic_inc_return(&dev->num_pd) > SIW_MAX_PD) {
+		dprint(DBG_ON, ": Out of PD's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	pd = kmalloc(sizeof *pd, GFP_KERNEL);
+	if (!pd) {
+		dprint(DBG_ON, ": malloc\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	rv = siw_pd_add(dev, pd);
+	if (rv) {
+		dprint(DBG_ON, ": siw_pd_add\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->hdr.id, sizeof pd->hdr.id)) {
+			rv = -EFAULT;
+			goto err_out_idr;
+		}
+	}
+	return &pd->ofa_pd;
+
+err_out_idr:
+	siw_remove_obj(&dev->idr_lock, &dev->pd_idr, &pd->hdr);
+err_out:
+	kfree(pd);
+	atomic_dec(&dev->num_pd);
+
+	return ERR_PTR(rv);
+}
+
+int siw_dealloc_pd(struct ib_pd *ofa_pd)
+{
+	struct siw_pd	*pd = siw_pd_ofa2siw(ofa_pd);
+	struct siw_dev	*dev = siw_dev_ofa2siw(ofa_pd->device);
+
+	siw_remove_obj(&dev->idr_lock, &dev->pd_idr, &pd->hdr);
+	siw_pd_put(pd);
+
+	atomic_dec(&dev->num_pd);
+	return 0;
+}
+
+struct ib_ah *siw_create_ah(struct ib_pd *pd, struct ib_ah_attr *attr)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+int siw_destroy_ah(struct ib_ah *ah)
+{
+	return -ENOSYS;
+}
+
+
+void siw_qp_get_ref(struct ib_qp *ofa_qp)
+{
+	struct siw_qp	*qp = siw_qp_ofa2siw(ofa_qp);
+
+	dprint(DBG_OBJ|DBG_CM, "(QP%d): Get Reference\n", QP_ID(qp));
+	siw_qp_get(qp);
+}
+
+
+void siw_qp_put_ref(struct ib_qp *ofa_qp)
+{
+	struct siw_qp	*qp = siw_qp_ofa2siw(ofa_qp);
+
+	dprint(DBG_OBJ|DBG_CM, "(QP%d): Put Reference\n", QP_ID(qp));
+	siw_qp_put(qp);
+}
+
+int siw_no_mad(struct ib_device *ofa_dev, int flags, u8 port,
+			    struct ib_wc *wc, struct ib_grh *grh,
+			    struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	return -ENOSYS;
+}
+
+
+/*
+ * siw_create_qp()
+ *
+ * Create QP of requested size on given device.
+ *
+ * @ofa_pd:	OFA PD contained in siw PD
+ * @attrs:	Initial QP attributes.
+ * @udata:	used to provide QP ID, SQ and RQ size back to user.
+ */
+
+struct ib_qp *siw_create_qp(struct ib_pd *ofa_pd, struct ib_qp_init_attr *attrs,
+			    struct ib_udata *udata)
+{
+	struct siw_qp	 		*qp = NULL;
+	struct siw_pd	 		*pd = siw_pd_ofa2siw(ofa_pd);
+	struct ib_device	 	*ofa_dev = ofa_pd->device;
+	struct siw_dev 			*dev = siw_dev_ofa2siw(ofa_dev);
+	struct siw_cq  			*scq = NULL, *rcq = NULL;
+	struct siw_iwarp_tx		*c_tx;
+	struct siw_iwarp_rx		*c_rx;
+	struct siw_uresp_create_qp	uresp;
+
+	int rv = 0;
+
+	dprint(DBG_OBJ|DBG_CM, ": new QP on device %s\n",
+		ofa_dev->name);
+
+	if (atomic_inc_return(&dev->num_qp) > SIW_MAX_QP) {
+		dprint(DBG_ON, ": Out of QP's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (attrs->qp_type != IB_QPT_RC) {
+		dprint(DBG_ON, ": Only RC QP's supported\n");
+		rv = -EINVAL;
+		goto err_out;
+	}
+	if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) ||
+	    (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) ||
+	    (attrs->cap.max_send_sge > SIW_MAX_SGE)  ||
+	    (attrs->cap.max_recv_sge > SIW_MAX_SGE)) {
+		dprint(DBG_ON, ": QP Size!\n");
+		rv = -EINVAL;
+		goto err_out;
+	}
+	/*
+	 * NOTE: we allow for zero element SQ and RQ WQE's SGL's
+	 * but not for a QP unable to hold any WQE (SQ + RQ)
+	 */
+	if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) {
+		rv = -EINVAL;
+		goto err_out;
+	}
+
+	scq = siw_cq_id2obj(dev, ((struct siw_cq *)attrs->send_cq)->hdr.id);
+	rcq = siw_cq_id2obj(dev, ((struct siw_cq *)attrs->recv_cq)->hdr.id);
+
+	if (!scq || !rcq) {
+		dprint(DBG_OBJ, ": Fail: SCQ: 0x%p, RCQ: 0x%p\n",
+			scq, rcq);
+		rv = -EINVAL;
+		goto err_out;
+	}
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		dprint(DBG_ON, ": kzalloc\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+
+	rv = siw_qp_add(dev, qp);
+	if (rv)
+		goto err_out;
+
+	INIT_LIST_HEAD(&qp->wqe_freelist);
+	INIT_LIST_HEAD(&qp->sq);
+	INIT_LIST_HEAD(&qp->rq);
+	INIT_LIST_HEAD(&qp->orq);
+	INIT_LIST_HEAD(&qp->irq);
+
+	init_rwsem(&qp->state_lock);
+	spin_lock_init(&qp->freelist_lock);
+	spin_lock_init(&qp->sq_lock);
+	spin_lock_init(&qp->rq_lock);
+	spin_lock_init(&qp->orq_lock);
+
+	init_waitqueue_head(&qp->tx_ctx.waitq);
+
+	qp->pd  = pd;
+	qp->scq = scq;
+	qp->rcq = rcq;
+
+	if (attrs->srq) {
+		/*
+		 * SRQ support.
+		 * Verbs 6.3.7: ignore RQ size, if SRQ present
+		 * Verbs 6.3.5: do not check PD of SRQ against PD of QP
+		 */
+		qp->srq = siw_srq_ofa2siw(attrs->srq);
+		qp->attrs.rq_size = 0;
+		atomic_set(&qp->rq_space, 0);
+		dprint(DBG_OBJ, " QP(%d): SRQ(%p) attached\n",
+			QP_ID(qp), qp->srq);
+	} else {
+		qp->srq = NULL;
+		qp->attrs.rq_size = attrs->cap.max_recv_wr;
+		atomic_set(&qp->rq_space, qp->attrs.rq_size);
+	}
+	qp->attrs.sq_size = attrs->cap.max_send_wr;
+	atomic_set(&qp->sq_space, qp->attrs.sq_size);
+	qp->attrs.sq_max_sges = attrs->cap.max_send_sge;
+	/*
+	 * ofed has no max_send_sge_rdmawrite
+	 */
+	qp->attrs.sq_max_sges_rdmaw = attrs->cap.max_send_sge;
+	qp->attrs.rq_max_sges = attrs->cap.max_recv_sge;
+	/*
+	 * while not part of attrs we init ord/ird here
+	 */
+	qp->attrs.ord = dev->attrs.max_ord;
+	qp->attrs.ird = dev->attrs.max_ird;
+
+	qp->attrs.state = SIW_QP_STATE_IDLE;
+
+	if (udata) {
+		uresp.sq_size = qp->attrs.sq_size;
+		uresp.rq_size = qp->attrs.rq_size;
+		uresp.qp_id = QP_ID(qp);
+
+		rv = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+		if (rv)
+			goto err_out_idr;
+	}
+	c_tx = &qp->tx_ctx;
+	c_rx = &qp->rx_ctx;
+
+	c_tx->crc_enabled = c_rx->crc_enabled = CONFIG_RDMA_SIW_CRC_ENFORCED;
+
+	if (c_tx->crc_enabled) {
+		c_tx->mpa_crc_hd.tfm =
+			crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(c_tx->mpa_crc_hd.tfm)) {
+			rv = -PTR_ERR(c_tx->mpa_crc_hd.tfm);
+			dprint(DBG_ON, "(QP%d): Failed loading crc32c"
+				" with error %d. ", QP_ID(qp), rv);
+			goto err_out_idr;
+		}
+	}
+	if (c_rx->crc_enabled) {
+		c_rx->mpa_crc_hd.tfm =
+			crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(c_rx->mpa_crc_hd.tfm)) {
+			rv = -PTR_ERR(c_rx->mpa_crc_hd.tfm);
+			crypto_free_hash(c_tx->mpa_crc_hd.tfm);
+			goto err_out_idr;
+		}
+	}
+	atomic_set(&qp->tx_ctx.in_use, 0);
+
+	qp->ofa_qp.qp_num = QP_ID(qp);
+
+	siw_pd_get(pd);
+
+	return &qp->ofa_qp;
+
+err_out_idr:
+	siw_remove_obj(&dev->idr_lock, &dev->qp_idr, &qp->hdr);
+err_out:
+	if (scq)
+		siw_cq_put(scq);
+	if (rcq)
+		siw_cq_put(rcq);
+
+	kfree(qp);
+	atomic_dec(&dev->num_qp);
+
+	return ERR_PTR(rv);
+}
+
+/*
+ * Minimum siw_query_qp() verb interface to allow for qperf application
+ * to run on siw.
+ *
+ * TODO: all.
+ */
+int siw_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
+	qp_init_attr->cap.max_inline_data = 0;
+
+	return 0;
+}
+
+int siw_ofed_modify_qp(struct ib_qp *ofa_qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata)
+{
+	struct siw_qp_attrs	new_attrs;
+	enum siw_qp_attr_mask	siw_attr_mask = 0;
+	struct siw_qp		*qp = siw_qp_ofa2siw(ofa_qp);
+	int			rv;
+
+	if (!attr_mask) {
+		dprint(DBG_CM, "(QP%d): attr_mask==0 ignored\n", QP_ID(qp));
+		return 0;
+	}
+	siw_dprint_qp_attr_mask(attr_mask);
+
+	memset(&new_attrs, 0, sizeof new_attrs);
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS) {
+
+		siw_attr_mask |= SIW_QP_ATTR_ACCESS_FLAGS;
+
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+			new_attrs.flags |= SIW_RDMA_READ_ENABLED;
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+			new_attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+		if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+			new_attrs.flags |= SIW_RDMA_BIND_ENABLED;
+	}
+	if (attr_mask & IB_QP_STATE) {
+		dprint(DBG_CM, "(QP%d): Desired IB QP state: %s\n",
+			   QP_ID(qp), ib_qp_state_to_string[attr->qp_state]);
+
+		new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state];
+
+		if (new_attrs.state > SIW_QP_STATE_RTS)
+			qp->tx_ctx.tx_suspend = 1;
+
+		/* TODO: SIW_QP_STATE_UNDEF is currently not possible ... */
+		if (new_attrs.state == SIW_QP_STATE_UNDEF)
+			return -EINVAL;
+
+		siw_attr_mask |= SIW_QP_ATTR_STATE;
+	}
+	if (!attr_mask)
+		return 0;
+
+	down_write(&qp->state_lock);
+
+	rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask);
+
+	up_write(&qp->state_lock);
+	return rv;
+}
+
+int siw_destroy_qp(struct ib_qp *ofa_qp)
+{
+	struct ib_device	*ofa_dev = ofa_qp->device;
+	struct siw_dev		*dev = siw_dev_ofa2siw(ofa_dev);
+	struct siw_qp		*qp = siw_qp_ofa2siw(ofa_qp);
+	struct siw_cep		*cep;
+	struct siw_qp_attrs	qp_attrs;
+
+	dprint(DBG_CM, "(QP%d): SIW QP state=%d, cep=0x%p\n",
+		QP_ID(qp), qp->attrs.state, qp->cep);
+
+	/*
+	 * Mark QP as in process of destruction to prevent from eventual async
+	 * callbacks to OFA core
+	 */
+	qp->attrs.flags |= SIW_QP_IN_DESTROY;
+	qp->rx_ctx.rx_suspend = 1;
+
+	down_write(&qp->state_lock);
+
+	qp_attrs.state = SIW_QP_STATE_ERROR;
+	(void)siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE);
+
+	up_write(&qp->state_lock);
+
+	cep = qp->cep;
+	if (cep) {
+		/*
+		 * Wait if CM work is scheduled. calling siw_qp_modify()
+		 * already dropped the network connection.
+		 */
+		dprint(DBG_CM, " (QP%d) (CEP 0x%p): %s (%d)\n",
+			QP_ID(qp), cep, atomic_read(&cep->ref.refcount) > 1 ?
+			"Wait for CM" : "CM done",
+			atomic_read(&cep->ref.refcount));
+
+		wait_event(cep->waitq, atomic_read(&cep->ref.refcount) == 1);
+		dprint(DBG_CM, "(QP%d): CM done 2\n", QP_ID(qp));
+		qp->cep = 0;
+		siw_cep_put(cep);
+	}
+
+	if (qp->rx_ctx.crc_enabled)
+		crypto_free_hash(qp->rx_ctx.mpa_crc_hd.tfm);
+	if (qp->tx_ctx.crc_enabled)
+		crypto_free_hash(qp->tx_ctx.mpa_crc_hd.tfm);
+
+	siw_remove_obj(&dev->idr_lock, &dev->qp_idr, &qp->hdr);
+
+	/* Drop references */
+	siw_cq_put(qp->scq);
+	siw_cq_put(qp->rcq);
+	siw_pd_put(qp->pd);
+	qp->scq = qp->rcq = NULL;
+
+	siw_qp_freeq_flush(qp);
+
+	siw_qp_put(qp);
+
+	atomic_dec(&dev->num_qp);
+	return 0;
+}
+
+/*
+ * siw_copy_sgl()
+ *
+ * Copy SGL from user (OFA) representation to local
+ * representation.
+ * Memory lookup and base+bounds checks must
+ * be deferred until wqe gets executed
+ */
+static int siw_copy_sgl(struct ib_sge *ofa_sge, struct siw_sge *si_sge,
+			int num_sge)
+{
+	int bytes = 0;
+
+	while (num_sge--) {
+		si_sge->addr = ofa_sge->addr;
+		si_sge->len  = ofa_sge->length;
+		si_sge->lkey = ofa_sge->lkey;
+		/*
+		 * defer memory lookup to WQE processing
+		 */
+		si_sge->mem.obj = NULL;
+
+		bytes += si_sge->len;
+		si_sge++; ofa_sge++;
+	}
+	return bytes;
+}
+
+/*
+ * siw_copy_inline_sgl()
+ *
+ * Prepare sgl of inlined data for sending.
+ * User provided sgl with unregistered user buffers. The function checks
+ * if the given buffer addresses and len's are within process context
+ * bounds and copies data into one kernel buffer. This implies dual copy
+ * operation in the tx path since TCP will make another copy for
+ * retransmission. There is room for efficiency improvement.
+ */
+static int siw_copy_inline_sgl(struct ib_sge *ofa_sge, struct siw_sge *si_sge,
+			       int num_sge)
+{
+	char	*kbuf;
+	int 	i, bytes = 0;
+
+	if (unlikely(num_sge == 0))
+		return 0;
+
+	for (i = 0; i < num_sge; i++) {
+		struct ib_sge *sge = &ofa_sge[i];
+
+		if (unlikely(!access_ok(VERIFY_READ, sge->addr, sge->length)))
+			return -EFAULT;
+
+		bytes += sge->length;
+
+		if (bytes > SIW_MAX_INLINE)
+			return -EINVAL;
+	}
+	if (unlikely(!bytes))
+		return 0;
+
+	kbuf = kmalloc(bytes, GFP_KERNEL);
+	if (unlikely(!kbuf)) {
+		dprint(DBG_ON, " kmalloc\n");
+		return -ENOMEM;
+	}
+	si_sge->mem.buf = kbuf;
+
+	while (num_sge--) {
+		if (__copy_from_user(kbuf,
+				     (void *)(unsigned long)ofa_sge->addr,
+				     ofa_sge->length)) {
+			kfree(si_sge->mem.buf);
+			return -EFAULT;
+		}
+		kbuf += ofa_sge->length;
+		ofa_sge++;
+	}
+	si_sge->len = bytes;
+	si_sge->lkey = 0;
+	si_sge->addr = 0; /* don't need the user addr */
+	return bytes;
+}
+
+
+/*
+ * siw_post_send()
+ *
+ * Post a list of S-WR's to a SQ.
+ *
+ * @ofa_qp:	OFA QP contained in siw QP
+ * @wr:		Null terminated list of user WR's
+ * @bad_wr:	Points to failing WR in case of synchronous failure.
+ */
+int siw_post_send(struct ib_qp *ofa_qp, struct ib_send_wr *wr,
+		  struct ib_send_wr **bad_wr)
+{
+	struct siw_wqe	*wqe = NULL;
+	struct siw_qp	*qp = siw_qp_ofa2siw(ofa_qp);
+
+	unsigned long flags;
+	int rv = 0;
+
+	dprint(DBG_WR|DBG_TX, "(QP%d): state=%d\n",
+		QP_ID(qp), qp->attrs.state);
+
+	/*
+	 * Acquire QP state lock for reading. The idea is that a
+	 * user cannot move the QP out of RTS during TX/RX processing.
+	 */
+	down_read(&qp->state_lock);
+
+	if (qp->attrs.state != SIW_QP_STATE_RTS) {
+		dprint(DBG_WR|DBG_ON, "(QP%d): state=%d\n",
+			QP_ID(qp), qp->attrs.state);
+		up_read(&qp->state_lock);
+		*bad_wr = wr;
+		return -ENOTCONN;
+	}
+	dprint(DBG_WR|DBG_TX, "(QP%d): sq_space(#1)=%d\n",
+		QP_ID(qp), atomic_read(&qp->sq_space));
+
+	while (wr) {
+		if (!atomic_read(&qp->sq_space)) {
+			dprint(DBG_ON, " sq_space\n");
+			wqe = NULL;
+			rv = -ENOMEM;
+			break;
+		}
+		wqe = siw_wqe_get(qp, wr->opcode);
+		if (!wqe) {
+			dprint(DBG_ON, " siw_wqe_get\n");
+			rv = -ENOMEM;
+			break;
+		}
+		if (wr->num_sge > qp->attrs.sq_max_sges) {
+			/*
+			 * NOTE: we allow for zero length wr's here.
+			 */
+			dprint(DBG_WR, "(QP%d): Num SGE: %d\n",
+				QP_ID(qp), wr->num_sge);
+			rv = -EINVAL;
+			break;
+		}
+		wr_type(wqe) = wr->opcode;
+		wr_flags(wqe) = wr->send_flags;
+		wr_id(wqe) = wr->wr_id;
+
+		if (SIW_INLINED_DATA(wqe))
+			dprint(DBG_WR, "(QP%d): INLINE DATA\n", QP_ID(qp));
+
+		switch (wr->opcode) {
+
+		case IB_WR_SEND:
+			if (!SIW_INLINED_DATA(wqe)) {
+				rv = siw_copy_sgl(wr->sg_list, wqe->wr.send.sge,
+						  wr->num_sge);
+				wqe->wr.send.num_sge = wr->num_sge;
+			} else {
+				rv = siw_copy_inline_sgl(wr->sg_list,
+							 wqe->wr.send.sge,
+							 wr->num_sge);
+				wqe->wr.send.num_sge = 1;
+			}
+			if (rv <= 0) {
+				rv = -EINVAL;
+				break;
+			}
+			wqe->bytes = rv;
+			break;
+
+		case IB_WR_RDMA_READ:
+			/*
+			 * OFED WR restricts RREAD sink to SGL containing
+			 * 1 SGE only. we could relax to SGL with multiple
+			 * elements referring the SAME ltag or even sending
+			 * a private per-rreq tag referring to a checked
+			 * local sgl with MULTIPLE ltag's. would be easy
+			 * to do...
+			 */
+			if (wr->num_sge != 1) {
+				rv = -EINVAL;
+				break;
+			}
+			rv = siw_copy_sgl(wr->sg_list, wqe->wr.rread.sge, 1);
+			/*
+			 * NOTE: zero length RREAD is allowed!
+			 */
+			wqe->wr.rread.raddr = wr->wr.rdma.remote_addr;
+			wqe->wr.rread.rtag = wr->wr.rdma.rkey;
+			wqe->wr.rread.num_sge = 1;
+			wqe->bytes = rv;
+			break;
+
+		case IB_WR_RDMA_WRITE:
+			if (!SIW_INLINED_DATA(wqe)) {
+				rv = siw_copy_sgl(wr->sg_list, wqe->wr.send.sge,
+						  wr->num_sge);
+				wqe->wr.write.num_sge = wr->num_sge;
+			} else {
+				rv = siw_copy_inline_sgl(wr->sg_list,
+							 wqe->wr.send.sge,
+							 wr->num_sge);
+				wqe->wr.write.num_sge = min(1, wr->num_sge);
+			}
+			/*
+			 * NOTE: zero length WRITE is allowed!
+			 */
+			if (rv < 0) {
+				rv = -EINVAL;
+				break;
+			}
+			wqe->wr.write.raddr = wr->wr.rdma.remote_addr;
+			wqe->wr.write.rtag = wr->wr.rdma.rkey;
+			wqe->bytes = rv;
+			break;
+
+		default:
+			dprint(DBG_WR|DBG_TX,
+				"(QP%d): Opcode %d not yet implemented\n",
+				QP_ID(qp), wr->opcode);
+			rv = -EINVAL;
+			break;
+		}
+		dprint(DBG_WR|DBG_TX, "(QP%d): opcode %d, bytes %d, "
+				"flags 0x%x\n",
+				QP_ID(qp), wr_type(wqe), wqe->bytes,
+				wr_flags(wqe));
+		if (rv < 0)
+			break;
+
+		wqe->wr_status = SR_WR_QUEUED;
+
+		lock_sq_rxsave(qp, flags);
+		list_add_tail(&wqe->list, &qp->sq);
+		atomic_dec(&qp->sq_space);
+		unlock_sq_rxsave(qp, flags);
+
+		wr = wr->next;
+	}
+	/*
+	 * Send directly if SQ processing is not in progress.
+	 * Eventual immediate errors (rv < 0) do not affect the involved
+	 * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ
+	 * processing, if new work is already pending. But rv must be passed
+	 * to caller.
+	 */
+	lock_sq_rxsave(qp, flags);
+
+	if (tx_wqe(qp) == NULL) {
+		struct siw_wqe	*next = siw_next_tx_wqe(qp);
+		if (next != NULL) {
+			if (wr_type(next) != SIW_WR_RDMA_READ_REQ ||
+			    !ORD_SUSPEND_SQ(qp)) {
+				tx_wqe(qp) = next;
+				if (wr_type(next) != SIW_WR_RDMA_READ_REQ)
+					list_del_init(&next->list);
+				else
+					siw_rreq_queue(next, qp);
+
+				unlock_sq_rxsave(qp, flags);
+
+				dprint(DBG_WR|DBG_TX,
+					"(QP%d): Direct sending...\n",
+					QP_ID(qp));
+
+				if (siw_qp_sq_process(qp, 1) != 0 &&
+				    !(qp->tx_ctx.tx_suspend))
+					siw_qp_cm_drop(qp, 0);
+			} else
+				unlock_sq_rxsave(qp, flags);
+		} else
+			unlock_sq_rxsave(qp, flags);
+	} else
+		unlock_sq_rxsave(qp, flags);
+
+	up_read(&qp->state_lock);
+
+	dprint(DBG_WR|DBG_TX, "(QP%d): sq_space(#2)=%d\n", QP_ID(qp),
+		atomic_read(&qp->sq_space));
+	if (rv >= 0)
+		return 0;
+	/*
+	 * Immediate error
+	 */
+	dprint(DBG_WR|DBG_ON, "(QP%d): error=%d\n", QP_ID(qp), rv);
+
+	if (wqe != NULL)
+		siw_wqe_put(wqe);
+	*bad_wr = wr;
+	return rv;
+}
+
+/*
+ * siw_post_receive()
+ *
+ * Post a list of R-WR's to a RQ.
+ *
+ * @ofa_qp:	OFA QP contained in siw QP
+ * @wr:		Null terminated list of user WR's
+ * @bad_wr:	Points to failing WR in case of synchronous failure.
+ */
+int siw_post_receive(struct ib_qp *ofa_qp, struct ib_recv_wr *wr,
+		     struct ib_recv_wr **bad_wr)
+{
+	struct siw_wqe	*wqe = NULL;
+	struct siw_qp	*qp = siw_qp_ofa2siw(ofa_qp);
+	unsigned long	flags;
+	int rv = 0;
+
+	dprint(DBG_WR|DBG_TX, "(QP%d): state=%d\n", QP_ID(qp),
+		qp->attrs.state);
+
+	if (qp->srq)
+		return -EOPNOTSUPP; /* what else from errno.h? */
+	/*
+	 * Acquire a QP state lock for reading. The idea is that a
+	 * user cannot move the QP out of RTS during TX/RX processing.
+	 */
+	down_read(&qp->state_lock);
+
+	if (qp->attrs.state > SIW_QP_STATE_RTS) {
+		up_read(&qp->state_lock);
+		dprint(DBG_ON, " (QP%d): state=%d\n", QP_ID(qp),
+			qp->attrs.state);
+		return -EINVAL;
+	}
+	while (wr) {
+		/*
+		 * NOTE: siw_wqe_get() calls kzalloc(), which may sleep.
+		 */
+		if (!atomic_read(&qp->rq_space) ||
+			!(wqe = siw_wqe_get(qp, SIW_WR_RECEIVE))) {
+			dprint(DBG_ON, " siw_wqe_get? (%d)\n",
+			       atomic_read(&qp->rq_space));
+			rv = -ENOMEM;
+			break;
+		}
+		if (wr->num_sge > qp->attrs.rq_max_sges) {
+			dprint(DBG_WR|DBG_ON, "(QP%d): Num SGE: %d\n",
+				QP_ID(qp), wr->num_sge);
+			rv = -EINVAL;
+			break;
+		}
+		wr_type(wqe) = SIW_WR_RECEIVE;
+		wr_id(wqe) = wr->wr_id;
+
+		rv = siw_copy_sgl(wr->sg_list, wqe->wr.recv.sge, wr->num_sge);
+		if (rv < 0) {
+			/*
+			 * XXX tentatively allow zero length receive
+			 */
+			rv = -EINVAL;
+			break;
+		}
+		wqe->wr.recv.num_sge = wr->num_sge;
+		wqe->bytes = rv;
+
+		lock_rq_rxsave(qp, flags);
+
+		list_add_tail(&wqe->list, &qp->rq);
+		wqe->wr_status = SR_WR_QUEUED;
+		atomic_dec(&qp->rq_space);
+
+		unlock_rq_rxsave(qp, flags);
+
+		wr = wr->next;
+	}
+	if (rv <= 0) {
+		dprint(DBG_WR|DBG_ON, "(QP%d): error=%d\n", QP_ID(qp), rv);
+		if (wqe != NULL)
+			siw_wqe_put(wqe);
+		*bad_wr = wr;
+	}
+	dprint(DBG_WR|DBG_RX, "(QP%d): rq_space=%d\n", QP_ID(qp),
+		atomic_read(&qp->rq_space));
+
+	up_read(&qp->state_lock);
+
+	return rv > 0 ? 0 : rv;
+}
+
+int siw_destroy_cq(struct ib_cq *ofa_cq)
+{
+	struct siw_cq	 	*cq  = siw_cq_ofa2siw(ofa_cq);
+	struct ib_device	*ofa_dev = ofa_cq->device;
+	struct siw_dev		*dev = siw_dev_ofa2siw(ofa_dev);
+
+	siw_cq_flush(cq);
+
+	siw_remove_obj(&dev->idr_lock, &dev->cq_idr, &cq->hdr);
+	siw_cq_put(cq);
+	atomic_dec(&dev->num_cq);
+	return 0;
+}
+
+/*
+ * siw_create_cq()
+ *
+ * Create CQ of requested size on given device.
+ *
+ * @ofa_dev:	OFA device contained in siw device
+ * @size:	maximum number of CQE's allowed.
+ * @ib_context: user context.
+ * @udata:	used to provide CQ ID back to user.
+ */
+
+struct ib_cq *siw_create_cq(struct ib_device *ofa_dev, int size,
+			    int vec /* unused */,
+			    struct ib_ucontext *ib_context,
+			    struct ib_udata *udata)
+{
+	struct siw_cq	 		*cq = NULL;
+	struct siw_dev 			*dev = siw_dev_ofa2siw(ofa_dev);
+	struct siw_uresp_create_cq	uresp;
+	int		 		rv;
+
+	if (atomic_inc_return(&dev->num_cq) > SIW_MAX_CQ) {
+		dprint(DBG_ON, ": Out of CQ's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (size < 1 || size > SIW_MAX_CQE) {
+		dprint(DBG_ON, ": CQE: %d\n", size);
+		rv = -EINVAL;
+		goto err_out;
+	}
+	cq = kmalloc(sizeof *cq, GFP_KERNEL);
+	if (!cq) {
+		dprint(DBG_ON, ":  kmalloc\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	cq->ofa_cq.cqe = size - 1;
+
+	rv = siw_cq_add(dev, cq);
+	if (rv)
+		goto err_out_idr;
+
+	INIT_LIST_HEAD(&cq->queue);
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->qlen, 0);
+
+	if (ib_context) {
+		uresp.cq_id = OBJ_ID(cq);
+
+		rv = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+		if (rv)
+			goto err_out_idr;
+	}
+	return &cq->ofa_cq;
+
+err_out_idr:
+	siw_remove_obj(&dev->idr_lock, &dev->cq_idr, &cq->hdr);
+err_out:
+	dprint(DBG_OBJ, ": CQ creation failed\n");
+
+	kfree(cq);
+	atomic_dec(&dev->num_cq);
+
+	return ERR_PTR(rv);
+}
+
+/*
+ * siw_poll_cq()
+ *
+ * Reap CQ entries if available and copy work completion status into
+ * array of WC's provided by caller. Returns number of reaped CQE's.
+ *
+ * @ofa_cq:	OFA CQ contained in siw CQ.
+ * @num_cqe:	Maximum number of CQE's to reap.
+ * @wc:		Array of work completions to be filled by siw.
+ */
+int siw_poll_cq(struct ib_cq *ofa_cq, int num_cqe, struct ib_wc *wc)
+{
+	struct siw_cq		*cq  = siw_cq_ofa2siw(ofa_cq);
+	int			i;
+
+	for (i = 0; i < num_cqe; i++) {
+		if (!(siw_reap_cqe(cq, wc)))
+			break;
+		wc++;
+	}
+	dprint(DBG_WR, " CQ%d: reap %d comletions (%d left)\n",
+		OBJ_ID(cq), i, atomic_read(&cq->qlen));
+
+	return i;
+}
+
+/*
+ * siw_req_notify_cq()
+ *
+ * Request notification for new CQE's added to that CQ.
+ * Defined flags:
+ * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification
+ *   event if a WQE with notification flag set enters the CQ
+ * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification
+ *   event if a WQE enters the CQ.
+ * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the
+ *   number of not reaped CQE's regardless of its notification
+ *   type and current or new CQ notification settings.
+ *
+ * @ofa_cq:	OFA CQ contained in siw CQ.
+ * @flags:	Requested notification flags.
+ */
+int siw_req_notify_cq(struct ib_cq *ofa_cq, enum ib_cq_notify_flags flags)
+{
+	struct siw_cq	 *cq  = siw_cq_ofa2siw(ofa_cq);
+
+	dprint(DBG_EH, "(CQ%d:) flags: 0x%8x\n", OBJ_ID(cq), flags);
+
+	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+		cq->notify = SIW_CQ_NOTIFY_SOLICITED;
+	else
+		cq->notify = SIW_CQ_NOTIFY_ALL;
+
+	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+		return atomic_read(&cq->qlen);
+
+	return 0;
+}
+
+/*
+ * siw_dereg_mr()
+ *
+ * Release Memory Region.
+ *
+ * TODO: Update function if Memory Windows are supported by siw:
+ *       Is OFED core checking for MW dependencies for current
+ *       MR before calling MR deregistration?.
+ *
+ * @ofa_mr:     OFA MR contained in siw MR.
+ */
+int siw_dereg_mr(struct ib_mr *ofa_mr)
+{
+	struct siw_mr	*mr;
+	struct siw_dev	*dev = siw_dev_ofa2siw(ofa_mr->device);
+
+	mr = siw_mr_ofa2siw(ofa_mr);
+
+	dprint(DBG_OBJ|DBG_MM, "(MEM%d): Release UMem %p, #ref's: %d\n",
+		mr->mem.hdr.id, mr->umem,
+		atomic_read(&mr->mem.hdr.ref.refcount));
+
+	mr->mem.stag_state = STAG_INVALID;
+
+	siw_pd_put(mr->pd);
+	siw_remove_obj(&dev->idr_lock, &dev->mem_idr, &mr->mem.hdr);
+	siw_mem_put(&mr->mem);
+
+	atomic_dec(&dev->num_mem);
+	return 0;
+}
+
+/*
+ * siw_reg_user_mr()
+ *
+ * Register Memory Region.
+ *
+ * @ofa_pd:	OFA PD contained in siw PD.
+ * @start:	starting address of MR (virtual address)
+ * @len:	len of MR
+ * @rnic_va:	not used by siw
+ * @rights:	MR access rights
+ * @udata:	user buffer to communicate STag and Key.
+ */
+struct ib_mr *siw_reg_user_mr(struct ib_pd *ofa_pd, u64 start, u64 len,
+			      u64 rnic_va, int rights, struct ib_udata *udata)
+{
+	struct siw_mr		*mr = NULL;
+	struct siw_pd		*pd = siw_pd_ofa2siw(ofa_pd);
+	struct ib_umem		*umem = NULL;
+	struct siw_ureq_reg_mr	ureq;
+	struct siw_uresp_reg_mr	uresp;
+	struct siw_dev		*dev = pd->hdr.dev;
+	int rv;
+
+	dprint(DBG_MM|DBG_OBJ, " start: 0x%016llx, "
+		"va: 0x%016llx, len: %llu, ctx: %p\n",
+		(unsigned long long)start,
+		(unsigned long long)rnic_va,
+		(unsigned long long)len,
+		ofa_pd->uobject->context);
+
+	if (atomic_inc_return(&dev->num_mem) > SIW_MAX_MR) {
+		dprint(DBG_ON, ": Out of MRs: %d\n",
+			atomic_read(&dev->num_mem));
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (!len) {
+		rv = -EINVAL;
+		goto err_out;
+	}
+
+#if defined(KERNEL_VERSION_PRE_2_6_26) && (OFA_VERSION < 140)
+	umem = ib_umem_get(ofa_pd->uobject->context, start, len, rights);
+#else
+	umem = ib_umem_get(ofa_pd->uobject->context, start, len, rights, 0);
+#endif
+
+	if (IS_ERR(umem)) {
+		dprint(DBG_MM, " ib_umem_get:%ld LOCKED:%lu, LIMIT:%lu\n",
+			PTR_ERR(umem), current->mm->locked_vm,
+			current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
+			PAGE_SHIFT);
+		rv = -PTR_ERR(umem);
+		umem = NULL;
+		goto err_out;
+	}
+	mr = kmalloc(sizeof *mr, GFP_KERNEL);
+	if (!mr) {
+		dprint(DBG_ON, ": malloc\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	mr->mem.stag_state = STAG_INVALID;
+
+	if (siw_mem_add(dev, &mr->mem) < 0) {
+		dprint(DBG_ON, ": siw_mem_add\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	dprint(DBG_OBJ|DBG_MM, "(MEM%d): New Object, UMEM %p\n",
+		mr->mem.hdr.id, umem);
+
+	mr->ofa_mr.lkey = mr->ofa_mr.rkey = mr->mem.hdr.id << 8;
+
+	mr->mem.va  = start;
+	mr->mem.len = len;
+	mr->mem.fbo = 0 ;
+	mr->mem.mr  = NULL;
+	mr->mem.perms = SR_MEM_LREAD | /* not selectable in OFA */
+			(rights & IB_ACCESS_REMOTE_READ  ? SR_MEM_RREAD  : 0) |
+			(rights & IB_ACCESS_LOCAL_WRITE  ? SR_MEM_LWRITE : 0) |
+			(rights & IB_ACCESS_REMOTE_WRITE ? SR_MEM_RWRITE : 0);
+
+	mr->umem = umem;
+
+	if (udata) {
+		rv = ib_copy_from_udata(&ureq, udata, sizeof ureq);
+		if (rv)
+			goto err_out_idr;
+
+		mr->ofa_mr.lkey |= ureq.stag_key;
+		mr->ofa_mr.rkey |= ureq.stag_key; /* XXX ??? */
+		uresp.stag = mr->ofa_mr.lkey;
+
+		rv = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+		if (rv)
+			goto err_out_idr;
+	}
+	mr->pd = pd;
+	siw_pd_get(pd);
+
+	mr->mem.stag_state = STAG_VALID;
+
+	return &mr->ofa_mr;
+
+err_out_idr:
+	siw_remove_obj(&dev->idr_lock, &dev->mem_idr, &mr->mem.hdr);
+err_out:
+	if (umem)
+		ib_umem_release(umem);
+
+	kfree(mr);
+
+	atomic_dec(&dev->num_mem);
+
+	return ERR_PTR(rv);
+}
+
+/*
+ * siw_create_srq()
+ *
+ * Create Shared Receive Queue of attributes @init_attrs
+ * within protection domain given by @ofa_pd.
+ *
+ * @ofa_pd:	OFA PD contained in siw PD.
+ * @init_attrs:	SRQ init attributes.
+ * @udata:	not used by siw.
+ */
+struct ib_srq *siw_create_srq(struct ib_pd *ofa_pd,
+			      struct ib_srq_init_attr *init_attrs,
+			      struct ib_udata *udata)
+{
+	struct siw_srq		*srq = NULL;
+	struct ib_srq_attr	*attrs = &init_attrs->attr;
+	struct siw_pd		*pd = siw_pd_ofa2siw(ofa_pd);
+	struct siw_dev		*dev = pd->hdr.dev;
+	int rv;
+
+	if (atomic_inc_return(&dev->num_srq) > SIW_MAX_SRQ) {
+		dprint(DBG_ON, " Out of SRQ's\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	if (attrs->max_wr > SIW_MAX_SRQ_WR || attrs->max_sge > SIW_MAX_SGE ||
+	    attrs->srq_limit > attrs->max_wr) {
+		rv = -EINVAL;
+		goto err_out;
+	}
+
+	srq = kmalloc(sizeof *srq, GFP_KERNEL);
+	if (!srq) {
+		dprint(DBG_ON, " malloc\n");
+		rv = -ENOMEM;
+		goto err_out;
+	}
+	INIT_LIST_HEAD(&srq->rq);
+	srq->max_sge = attrs->max_sge;
+	atomic_set(&srq->space, attrs->max_wr);
+	srq->limit = attrs->srq_limit;
+	if (srq->limit)
+		srq->armed = 1;
+
+	srq->pd	= pd;
+	siw_pd_get(pd);
+
+	spin_lock_init(&srq->lock);
+
+	return &srq->ofa_srq;
+
+err_out:
+	kfree(srq);
+	atomic_dec(&dev->num_srq);
+
+	return ERR_PTR(rv);
+}
+
+/*
+ * siw_modify_srq()
+ *
+ * Modify SRQ. The caller may resize SRQ and/or set/reset notification
+ * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification.
+ *
+ * NOTE: it is unclear if OFA allows for changing the MAX_SGE
+ * parameter. siw_modify_srq() does not check the attrs->max_sge param.
+ */
+int siw_modify_srq(struct ib_srq *ofa_srq, struct ib_srq_attr *attrs,
+		   enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+	struct siw_srq 	*srq = siw_srq_ofa2siw(ofa_srq);
+	unsigned long	flags;
+	int rv = 0;
+
+	lock_srq_rxsave(srq, flags);
+
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		/* resize request */
+		if (attrs->max_wr > SIW_MAX_SRQ_WR) {
+			rv =  -EINVAL;
+			goto out;
+		}
+		if (attrs->max_wr < srq->max_wr) { /* shrink */
+			if (attrs->max_wr <
+			    srq->max_wr - atomic_read(&srq->space)) {
+				rv = -EBUSY;
+				goto out;
+			}
+			atomic_sub(srq->max_wr - attrs->max_wr, &srq->space);
+		} else /* grow */
+			atomic_add(attrs->max_wr - srq->max_wr, &srq->space);
+		srq->max_wr = attrs->max_wr;
+	}
+	if (attr_mask & IB_SRQ_LIMIT) {
+		if (attrs->srq_limit) {
+			if (attrs->srq_limit > srq->max_wr) {
+				rv = -EINVAL;
+				/* FIXME: restore old space & max_wr?? */
+				goto out;
+			}
+			srq->armed = 1;
+		} else
+			srq->armed = 0;
+
+		srq->limit = attrs->srq_limit;
+	}
+out:
+	unlock_srq_rxsave(srq, flags);
+	return rv;
+}
+
+/*
+ * siw_query_srq()
+ *
+ * Query SRQ attributes.
+ */
+int siw_query_srq(struct ib_srq *ofa_srq, struct ib_srq_attr *attrs)
+{
+	struct siw_srq 	*srq = siw_srq_ofa2siw(ofa_srq);
+	unsigned long	flags;
+
+	lock_srq_rxsave(srq, flags);
+
+	attrs->max_wr = srq->max_wr;
+	attrs->max_sge = srq->max_sge;
+	attrs->srq_limit = srq->limit;
+
+	unlock_srq_rxsave(srq, flags);
+
+	return 0;
+}
+
+/*
+ * siw_destroy_srq()
+ *
+ * Destroy SRQ.
+ * SRQ WQE's are silently destroyed, since not belonging to any QP.
+ * Furthermore, it is assumed that the SRQ is not referenced by any
+ * QP anymore - the code trusts the OFA environment to keep track
+ * of QP references.
+ */
+int siw_destroy_srq(struct ib_srq *ofa_srq)
+{
+	struct list_head	*listp, *tmp;
+	struct siw_srq		*srq = siw_srq_ofa2siw(ofa_srq);
+	struct siw_dev		*dev = srq->pd->hdr.dev;
+	unsigned long flags;
+
+	lock_srq_rxsave(srq, flags); /* probably not necessary */
+	list_for_each_safe(listp, tmp, &srq->rq) {
+		list_del(listp);
+		siw_wqe_put(list_entry(listp, struct siw_wqe, list));
+	}
+	unlock_srq_rxsave(srq, flags);
+
+	siw_pd_put(srq->pd);
+	kfree(srq);
+	atomic_dec(&dev->num_srq);
+
+	return 0;
+}
+
+/*
+ * siw_post_srq_recv()
+ *
+ * Post a list of receive queue elements to SRQ.
+ * NOTE: The function does not check or lock a certain SRQ state
+ *       during the post operation. The code simply trusts the
+ *       OFA environment.
+ *
+ * @ofa_srq:	OFA SRQ contained in siw SRQ
+ * @wr:		List of R-WR's
+ * @bad_wr:	Updated to failing WR if posting fails.
+ */
+int siw_post_srq_recv(struct ib_srq *ofa_srq, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	struct siw_srq	*srq = siw_srq_ofa2siw(ofa_srq);
+	struct siw_wqe	*wqe = NULL;
+	unsigned long flags;
+	int rv = 0;
+
+	while (wr) {
+		if (!atomic_read(&srq->space) ||
+		    !(wqe = siw_srq_wqe_get(srq))) {
+			dprint(DBG_ON, " siw_srq_wqe_get\n");
+			rv = -ENOMEM;
+			break;
+		}
+		if (!wr->num_sge || wr->num_sge > srq->max_sge) {
+			dprint(DBG_WR|DBG_ON,
+				"(SRQ%p): Num SGE: %d\n", srq, wr->num_sge);
+			rv = -EINVAL;
+			break;
+		}
+		wr_type(wqe) = SIW_WR_RECEIVE;
+		wr_id(wqe) = wr->wr_id;
+		wqe->wr_status = SR_WR_QUEUED;
+
+		rv = siw_copy_sgl(wr->sg_list, wqe->wr.recv.sge, wr->num_sge);
+		if (rv == 0) {
+			/*
+			 * do not allow zero length receive
+			 * XXX correct?
+			 */
+			rv = -EINVAL;
+			break;
+		}
+		wqe->wr.recv.num_sge = wr->num_sge;
+		wqe->bytes = rv;
+
+		lock_srq_rxsave(srq, flags);
+
+		list_add_tail(&wqe->list, &srq->rq);
+		atomic_dec(&srq->space);
+
+		unlock_srq_rxsave(srq, flags);
+
+		wr = wr->next;
+	}
+	if (rv <= 0) {
+		dprint(DBG_WR|DBG_ON, "(SRQ %p): error=%d\n",
+			srq, rv);
+
+		if (wqe != NULL)
+			siw_wqe_put(wqe);
+		*bad_wr = wr;
+	}
+	dprint(DBG_WR|DBG_RX, "(SRQ%p): space=%d\n",
+		srq, atomic_read(&srq->space));
+
+	return rv > 0 ? 0 : rv;
+}
+
+
+struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
+{
+	return -ENOSYS;
+}
diff --git a/drivers/infiniband/hw/softiwarp/siw_verbs.h b/drivers/infiniband/hw/softiwarp/siw_verbs.h
new file mode 100644
index 0000000..53eac4f
--- /dev/null
+++ b/drivers/infiniband/hw/softiwarp/siw_verbs.h
@@ -0,0 +1,96 @@
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2010, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_VERBS_H
+#define _SIW_VERBS_H
+
+#include <linux/errno.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_cm.h"
+
+extern int siw_query_device(struct ib_device *, struct ib_device_attr *);
+
+extern struct ib_ucontext *siw_alloc_ucontext(struct ib_device *,
+					      struct ib_udata *);
+extern int siw_dealloc_ucontext(struct ib_ucontext *);
+extern int siw_query_port(struct ib_device *, u8, struct ib_port_attr *);
+extern int siw_query_pkey(struct ib_device *, u8, u16, u16 *);
+extern int siw_query_gid(struct ib_device *, u8, int, union ib_gid *);
+
+extern struct ib_pd *siw_alloc_pd(struct ib_device *, struct ib_ucontext *,
+				  struct ib_udata *);
+extern int siw_dealloc_pd(struct ib_pd *);
+extern struct ib_ah *siw_create_ah(struct ib_pd *, struct ib_ah_attr *);
+extern int siw_destroy_ah(struct ib_ah *);
+extern struct ib_qp *siw_create_qp(struct ib_pd *, struct ib_qp_init_attr *,
+				   struct ib_udata *);
+extern int siw_query_qp(struct ib_qp *, struct ib_qp_attr *, int,
+			struct ib_qp_init_attr *);
+extern int siw_ofed_modify_qp(struct ib_qp *, struct ib_qp_attr *, int,
+			      struct ib_udata *);
+extern int siw_destroy_qp(struct ib_qp *);
+extern int siw_post_send(struct ib_qp *, struct ib_send_wr *,
+			 struct ib_send_wr **);
+extern int siw_post_receive(struct ib_qp *, struct ib_recv_wr *,
+			    struct ib_recv_wr **);
+extern struct ib_cq *siw_create_cq(struct ib_device *, int, int,
+				   struct ib_ucontext *, struct ib_udata *);
+extern int siw_destroy_cq(struct ib_cq *);
+extern int siw_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *);
+extern int siw_req_notify_cq(struct ib_cq *, enum ib_cq_notify_flags);
+extern struct ib_mr *siw_reg_user_mr(struct ib_pd *, u64, u64, u64, int,
+				     struct ib_udata *);
+extern struct ib_mr *siw_get_dma_mr(struct ib_pd *, int);
+extern int siw_dereg_mr(struct ib_mr *);
+extern struct ib_srq *siw_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
+				     struct ib_udata *);
+extern int siw_modify_srq(struct ib_srq *, struct ib_srq_attr *,
+			  enum ib_srq_attr_mask, struct ib_udata *);
+extern int siw_query_srq(struct ib_srq *, struct ib_srq_attr *);
+extern int siw_destroy_srq(struct ib_srq *);
+extern int siw_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
+			     struct ib_recv_wr **);
+extern int siw_mmap(struct ib_ucontext *, struct vm_area_struct *);
+
+#endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 62d732a..718cc7c 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2628,6 +2628,166 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called qlge.
 
+config BGP_COLLECTIVE
+	tristate "BlueGene Ethernet-on-Collective support"
+	default y if BGP=y
+    depends on BGP
+	help
+	  This driver supports the BlueGene Ethernet-over-collective
+	  controller, for running IP between IO and Compute nodes.
+	  
+config BGP_COLLECTIVE_IP_CHECKSUM
+	bool "Request IP-layer software checksum on the BGP collective"
+	default y
+	depends on BGP_COLLECTIVE
+	help
+	  The BlueGene collective network has hardware CRC-and-retry, which is stronger than IP checksum
+	  But you can select IP checksumming as well.
+
+config BGP_COLLECTIVE_NAPI
+	tristate "BlueGene Ethernet-on-Collective NAPI support"
+	default n
+    depends on BGP
+	help
+	  This configures the BGP collective driver to use NAPI interrupt mitigation
+	  
+config BGP_DMA
+	tristate "BlueGene Torus DMA support"
+	default y if BGP=y
+    depends on BGP
+	help
+	  This driver supports the BlueGene torus DMA unit.
+	  You will need it if you want to use BGP_TORUS
+	  
+config BGP_TORUS
+	tristate "BlueGene Ethernet-on-Torus support"
+	default y if BGP=y
+    depends on BGP
+	help
+	  This driver supports the BlueGene Ethernet-over-torus
+	  controller, for running IP amongst Compute nodes
+	  
+config BGP_TORUS_DIAGNOSTICS
+	bool "Diagnostics for BlueGene Ethernet-on-Torus"
+	default y if BGP=y
+	depends on BGP
+	help
+	  This inserts diagnostics into the TCP layers, to support
+	  optimisation of the IP-on-BlueGene-Torus code
+	
+config BGP_FRANKENTORUS
+	tristate "BlueGene Ethernet-on-Torus support, vrnic-style"
+	default n
+    depends on BGP
+	help
+	  This driver supports the BlueGene Ethernet-over-torus vrnic
+	  controller, for running IP amongst Compute nodes
+	
+config BGP_TORUS_IP_CHECKSUM
+	bool "Request IP-layer software checksum on the BGP torus"
+	default y
+	depends on 	BGP_TORUS
+	help
+	  The BlueGene torus network has hardware CRC-and-retry, which is stronger than IP checksum
+	  But you can select IP checksumming as well.
+
+config BGP_RECEPTION_MEMORY_FIFO_SHIFT 
+        int "log2(BlueGene torus software reception FIFO size)"
+        depends on BGP
+        default "24"
+        help
+          FIFO should be somewhere between 64kB and 32MB
+
+config BGP_TORUS_ADAPTIVE_ROUTING
+	tristate "BlueGene Ethernet-on-Torus with adaptive routing"
+	default n
+    depends on BGP
+	help
+	  Support for IP with adaptive packet routing on the torus (experimental)
+	  
+config BGP_VRNIC
+	tristate "BlueGene virtual RNIC support"
+	default m if BGP=y
+    depends on BGP
+	help
+	  This driver supports the BlueGene virtual RNIC
+	  controller, for running test cases against the vRNIC
+	  
+config BGP_VRNIC_START
+		hex "Real address start of BGP VRNIC. Linux not to inadvertently use real store in this region"
+	    depends on BGP_VRNIC
+		default "0xe0000000"
+		 	         
+config BGP_VRNIC_SIZE
+		hex "Number of bytes of memory put over to BGP VRNIC. Linux not to inadvertently use real store in this region"
+	    depends on BGP_VRNIC
+		default "0x10000000"
+		
+config BGP_STATISTICS
+	tristate "BlueGene Statistics support"
+	default y if BGP=y
+    depends on BGP
+	help
+	  This driver supports gathering of statistics related to 
+	  BlueGene/P hardware
+	  
+
+config BGP_E10000
+        tristate "BlueGene on-chip Ethernet support"
+        default Y if BGP=y
+        depends on BGP
+        help
+          This driver supports the BlueGene 10Gb on-chip Ethernet
+          controller.
+
+config BGP_E10000_RXB 
+        int "Total size in bytes of receive buffers (1MB maximum)"
+        depends on BGP_E10000
+        default "1048576"
+
+config BGP_E10000_TXB
+        int "Number of transmit buffers"
+        depends on BGP_E10000
+        default "4096"
+
+config BGP_E10000_IP_CHECKSUM
+        bool "Enable HW checksum for TCP/UDP IPv4 traffic"
+        depends on BGP_E10000
+        default y
+
+config BGP_E10000_NAPI
+        bool "Enable 'new API' network interface"
+        depends on BGP_E10000
+        default n
+
+config BGP_E10000_EMAC_LOOPBACK
+        bool "Enable MAC loopback mode"
+        depends on BGP_E10000
+        help
+          This ties the output path directly to the input path at the MAC level.
+        default n
+
+config BGP_E10000_PHY_LOOPBACK
+        bool "Enable PHY loopback mode"
+        depends on BGP_E10000
+        help
+          This ties the output path directly to the input path in the PHY.
+        default n
+
+config BGP_E10000_DBG
+        bool "Debug enablement"
+        depends on BGP_E10000
+        help
+          This enables debug output.
+        default n
+
+config BGP_E10000_DBG_LEVEL
+        int "Debug level"
+        depends on BGP_E10000_DBG
+        help
+          This sets the amount of debug output.
+        default 57
+
 source "drivers/net/sfc/Kconfig"
 
 source "drivers/net/benet/Kconfig"
@@ -3091,6 +3251,23 @@
 
 config NET_POLL_CONTROLLER
 	def_bool NETPOLL
+	
+config TCP_HIATUS_COUNTS
+	bool "TCP output hiatus counts"
+	default n
+	help
+	  This option counts the number of times that TCP output is held back
+	  by reason (e.g. 'congestion window filled'). It is useful if you are
+	  trying to exploit fast networks, to help pin down what is limiting
+	  the transfer rate.
+	  
+config TCP_CONGESTION_OVERRIDES
+	bool "TCP output congestion overrides"
+	default n
+	help
+	  This option places controls in sysfs so that TCP congestion parameters
+	  can be overridden system-wide; e.g. turning Nagle off	  
+	
 
 config VIRTIO_NET
 	tristate "Virtio network driver (EXPERIMENTAL)"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 471baaf..60c09f5 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -5,6 +5,12 @@
 obj-$(CONFIG_E1000) += e1000/
 obj-$(CONFIG_E1000E) += e1000e/
 obj-$(CONFIG_IBM_NEW_EMAC) += ibm_newemac/
+obj-$(CONFIG_BGP_E10000) += bgp_e10000/
+obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective/
+obj-$(CONFIG_WRAP_COPY_TOFROM_USER) += bgp_memcpy/
+obj-$(CONFIG_BGP_TORUS) += bgp_torus/
+obj-$(CONFIG_BGP_STATISTICS) += bgp_statistics/
+# obj-$(CONFIG_BLUEGENE_SOCKETS) += bgp_sockets/
 obj-$(CONFIG_IGB) += igb/
 obj-$(CONFIG_IXGBE) += ixgbe/
 obj-$(CONFIG_IXGB) += ixgb/
diff --git a/drivers/net/bgp_collective/Makefile b/drivers/net/bgp_collective/Makefile
new file mode 100644
index 0000000..29fbe0a
--- /dev/null
+++ b/drivers/net/bgp_collective/Makefile
@@ -0,0 +1,7 @@
+# Makefile for BlueGene collective and torus driver
+
+EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__
+
+bgp_collective-y := bgcol.o bgnet.o
+
+obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective.o
diff --git a/drivers/net/bgp_collective/bgcol.c b/drivers/net/bgp_collective/bgcol.c
new file mode 100644
index 0000000..2c30f37
--- /dev/null
+++ b/drivers/net/bgp_collective/bgcol.c
@@ -0,0 +1,3710 @@
+/*********************************************************************
+ *
+ * Description: Blue Gene low-level driver for collective network
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors:
+ * Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * The protocol implemented here will send a 'jumbo' (9000 byte) frame
+ * in 38 packets, i.e. 240 bytes payload + 16 bytes link header per packet.
+ * The measured throughput was 4325 Mbit/sec on one IO link
+ *
+ * It is logically possible to send a 'jumbo' frame in 36 packets; to
+ * do this you need to pack 255 bytes of payload + 1 byte of link
+ * header per packet (you need to at least indicate which node has sent
+ * the packet); you probably want to do this by 'trampling' the first
+ * byte of each packet, sending a 'correction' byte sequence at the
+ * end of the frame, and having the receiver demultiplex and correct
+ * the frames.
+ * This should achieve 4565 Mbit/sec
+ *
+ * If you were to drive the link with an MTU of close to 65535, you
+ * could send a 65270-byte frame in 256 packets, which should achieve
+ * 4655 Mbit/sec.
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/bgp_personality.h>
+#include <asm/bgcns.h>
+#include <asm/time.h>
+
+
+#include "bglink.h"
+#include "bgcol.h"
+#include "bgnet.h"
+#include "bgp_dcr.h"
+#include "ppc450.h"
+
+#include <asm/bluegene.h>
+
+#define DRV_NAME	"bgcol"
+#define DRV_VERSION	"1.0"
+#define DRV_DESC	"IBM Blue Gene Collective Driver"
+
+MODULE_DESCRIPTION(DRV_DESC);
+MODULE_AUTHOR("IBM");
+MODULE_LICENSE("GPL");
+
+extern BGCNS_Descriptor bgcnsd;  /* defined in asm/bgp_cns.c */
+
+/*  SA_ONSTACK is deprecated, but its replacement has not made it into MCP yet. Compatibility ... */
+#if !defined(IRQF_DISABLED)
+#define IRQF_DISABLED SA_ONSTACK
+#endif
+
+/*  configuration selector macros */
+#define COLLECTIVE_RECEIVE_WITH_SLIH
+/* #define COLLECTIVE_DELIVER_VIA_TASKLET */
+/* #define COLLECTIVE_BREAK_ON_FRAME */
+#define COLLECTIVE_TRANSMIT_WITH_SLIH
+#define COLLECTIVE_TRANSMIT_WITH_FLIH
+#define COLLECTIVE_XMITTER_FREES
+#define COLLECTIVE_DUPLEX_SLIH
+#define COLLECTIVE_ONEPASS_TXRX
+#define BGP_COL_STATUS_VISIBILITY
+
+
+extern void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+
+/* For determining whether to save/restore the floating point register around the use in the SLIH */
+/* 20110107 tjcw: Don't think save/restore helps; atomicity fix in giveup_fpu() is what solves the apparent problem */
+/*          (a more-complex fix involving 'manually' setting up the FPU would be an alternative to the giveup_fpu() fix ) */
+/* 20110112 tjcw: Put FP save back in, so we leave all FP regs the way we found them */
+enum {
+  k_allow_fp_save = 1 ,
+  k_force_fp_save = 1
+};
+
+/*  For diagnosis of certain link sequencing problems, it can be useful to keep a trail of */
+/*  recently-arrived link headers. Set this macro if you want a trail kept */
+/* #define KEEP_LNKHDR_TRAIL */
+enum {
+  k_lnkhdr_trail_display_length = 50,  /*  Link header amount of trail to display */
+  k_lnkhdr_trail_length = 64,  /*  Link header ring buffer length, next power of 2 above k_lnkhdr_trail_display_length. */
+  k_lnhhdr_ffdc_limit = 20  /*  First-failure-data-capture limit, we want to catch first failures and not saturate the logging system */
+};
+
+/*  For diagnostics, track the last thing that we knew happened to the bgcol in interrupt mode */
+enum {
+  k_bgcolaction_none ,
+  k_bgcolaction_xmit ,
+  k_bgcolaction_xmit_enable ,
+  k_bgcolaction_xmit_irq ,
+  k_bgcolaction_xmit_irq_disable
+
+};
+
+struct bglink_proto * proto_array[k_link_protocol_limit] ;
+
+/* static int bgcolaction ; */
+
+extern int e10000_diag_count ;
+
+
+/* #define CONFIG_BLUEGENE_COLLECTIVE_TRACE */
+
+/* #define REQUIRE_TRACE */
+
+#include <linux/KernelFxLog.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */
+/* static int bgcol_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+int bgcol_debug_tracemask  = k_t_init | k_t_request | k_t_protocol | k_t_userspace ;
+/* int bgcol_debug_tracemask  = 0xffffffff ; */
+/* #endif */
+
+/*  Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+#define COMPILED_TRACEMASK (0xffffffff-k_t_detail-k_t_fifocontents)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+#define XTRACEN(i,x...)
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...)    KernelFxLog(1,x)
+#define TRACE1(x...)   KernelFxLog(1,x)
+#define TRACE2(x...)   KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#elif  defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+#define TRACE(x...)    KernelFxLog(bgcol_debug_tracemask & k_t_general,x)
+#define TRACE1(x...)   KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...)   KernelFxLog(bgcol_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+#define _BGP_DCR_COL 0
+
+#define FRAGMENT_TIMEOUT	(HZ/10)
+
+#define COL_LNKHDRLEN		(sizeof(struct bglink_hdr_col))
+#define COL_FRAGPAYLOAD	(COL_PAYLOAD - COL_LNKHDRLEN)
+#define COL_SKB_ALIGN		16
+
+
+#define BGP_COL_MAJOR_NUM  120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM    122
+#define BGP_COL_MINOR_NUMS  2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS   4
+#define _BGP_UA_COL0  (0x6)
+#define _BGP_PA_COL0  (0x10000000)
+#define _BGP_UA_COL1  (0x6)
+#define _BGP_PA_COL1  (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+/*
+ * 'Oversized' skbuffs are an attempt to increase throughput on the collective interface by arranging for
+ * 2 cores to work together on pulling data and distributing it. See commentary in bgnet.c as to what needs
+ * to be done to get it to work.
+ * Having an skbuff at 64K rather than 9K (to match etherhet 'jumbo' frames) doesn't really cost much memory;
+ * we are only likely to have a few MB of skbuffs in each IO node, and less in each compute node.
+ */
+enum {
+	k_use_plentiful_skb = 1 , /* Whether to use an oversized sk_buff to receive in to */
+	k_plentiful_skb_size = 256*COL_FRAGPAYLOAD
+};
+
+static void bgcol_prefill(struct sk_buff_head * skb_list, unsigned int count)
+{
+	unsigned int x ;
+	for(x=0;x<count;x+=1)
+		{
+			struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ;
+			if(skb)
+				{
+					skb_queue_tail(skb_list,skb) ;
+				}
+
+		}
+}
+
+static struct sk_buff * take_skb_from_list_for_filling(struct bg_col *col)
+{
+	return skb_dequeue (&col->skb_list_for_filling) ;
+}
+
+static void replenish_list_for_filling(struct bg_col *col)
+{
+	struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ;
+	if(skb)
+		{
+			skb_queue_tail(&col->skb_list_for_filling,skb) ;
+		}
+
+}
+/* int bgcol_diagnostic_use_napi ; */
+/*
+ * device management
+ */
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+static unsigned int bgpnet_num_devices = 0;
+
+
+static struct proc_dir_entry* bgpnetDir;
+static struct proc_dir_entry* barrierEntry;
+static struct proc_dir_entry* SerDesEntry;
+static struct proc_dir_entry* statisticsEntry;
+static struct proc_dir_entry* statusEntry;
+/* static struct proc_dir_entry* tracemaskEntry; */
+struct bg_col static_col;
+
+static struct bg_col *__bgcol = &static_col ;
+
+ static int bgpnet_add_device(int major, int minor, const char* name,
+                              unsigned long long base, int irq,
+                             irqreturn_t (*irq_handler)(int, void*));
+static int bgpnet_device_open(struct inode *inode, struct file *filp);
+static int bgpnet_device_mmap(struct file *filp,  struct vm_area_struct *);
+static int bgpnet_device_release(struct inode *inode, struct file * filp);
+static long bgpnet_device_ioctl(struct inode *inode,
+//    struct file * filp,
+                               unsigned int  cmd,   unsigned long arg);
+static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count,
+				  loff_t *f_pos);
+static unsigned int bgpnet_device_poll(struct file *file, poll_table * wait);
+static int bgpnet_barrier_read(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+static int bgpnet_barrier_write(struct file * filp, const char __user *buf,
+                                  unsigned long len, void * data);
+static int bgpnet_SerDes_read(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+static int bgpnet_SerDes_write(struct file * filp, const char __user *buf,
+                                  unsigned long len, void * data);
+
+
+static struct file_operations bgpnet_device_fops =
+{
+  .owner=   THIS_MODULE,
+  .open=    bgpnet_device_open,
+  .read=    bgpnet_device_read,
+  .write=   NULL,
+  .poll=    bgpnet_device_poll,
+  .unlocked_ioctl=   bgpnet_device_ioctl,
+  .release= bgpnet_device_release,
+  .mmap=    bgpnet_device_mmap,
+};
+
+struct bg_col *bgcol_get_dev()
+{
+    return __bgcol;
+}
+
+unsigned int bgcol_get_nodeid(struct bg_col* col)
+{
+    return col->nodeid;
+}
+
+/**********************************************************************
+ * IRQs
+ **********************************************************************/
+
+/* static irqreturn_t bgcol_unhandled_interrupt(int irq, void *dev, struct pt_regs* regs) */
+/* { */
+/*     panic("col: unhandled irq %d\n", irq); */
+/* } */
+
+static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev);
+
+#define IRQ_IDX_INJECT	0
+#define IRQ_IDX_RECEIVE	1
+
+#define DEF_IRQ(_irq, _name, _handler) \
+{ .irq = _irq, .name = _name, .handler = _handler }
+
+#define BG_COL_IRQ_INJ 180
+#define BG_COL_IRQ_RCV 181
+
+#define BG_COL_IRQ_GROUP 5
+#define BG_COL_IRQ_INJ_GINT 20
+#define BG_COL_IRQ_RCV_GINT 21
+
+/*  Linux 'virtual interrupt' numbers corresponding to how the collective is wired to the BIC */
+enum {
+	k_inject_irq = (5*32 + 20) + 32 ,
+	k_receive_irq = (5*32 + 21) + 32
+} ;
+
+static struct {
+    unsigned irq;
+    char *name;
+    irqreturn_t (*handler)(int irq, void *dev);
+} bgcol_irqs [] = {
+    DEF_IRQ(k_inject_irq, "Tree inject", bgcol_duplex_interrupt),	/* IRQ_IDX_INJECT */
+    DEF_IRQ(k_receive_irq, "Tree receive", bgcol_duplex_interrupt),	/* IRQ_IDX_RECEIVE */
+#if 0
+    DEF_IRQ("Tree VC0", bgcol_receive_interrupt),
+    DEF_IRQ("Tree VC1", bgcol_receive_interrupt),
+    DEF_IRQ("Tree CRNI timeout", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree no-target", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ALU overflow", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree local client inject", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree local client receive", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree write send CH0", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC send CH0", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC send CH0", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree write send CH1", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC send CH1", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC send CH1", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree write send CH2", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC send CH2", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC send CH2", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC rcv CH0", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC rcv CH0", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC rcv CH1", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC rcv CH1", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree ECC rcv CH2", bgcol_unhandled_interrupt),
+    DEF_IRQ("Tree link CRC rcv CH2", bgcol_unhandled_interrupt),
+#endif
+    { -1,NULL, NULL }
+};
+
+
+/**********************************************************************
+ *                                 Debug
+ **********************************************************************/
+
+static inline void dump_skb(struct sk_buff *skb)
+{
+  TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ;
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+  if( bgcol_debug_tracemask & k_t_detail )
+    {
+      int i;
+      for (i = 0; i < skb->len / 4 + 1; i++)
+            printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n');
+      printk("\n");
+    }
+#endif
+}
+
+static inline void dump_skb_partial(struct sk_buff *skb, int maxlength)
+{
+  TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ;
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+  if( bgcol_debug_tracemask & k_t_detail )
+    {
+      int j = (maxlength > skb->len) ? skb->len : maxlength ;
+      int i;
+      for (i = 0; i < j / 4 + 1; i++)
+        printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n');
+      printk("\n");
+    }
+#endif
+}
+
+static inline void dump_bgcol_packet(struct bglink_hdr_col *lnkhdr, void * payload)
+  {
+    TRACEN(k_t_general,"bgcol_packet: hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x",
+        lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key);
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+    if( bgcol_debug_tracemask & k_t_detail )
+      {
+        int i ;
+        int * pi = (int *) payload ;
+        for( i=0; i<COL_FRAGPAYLOAD/sizeof(int); i += 8)
+          {
+            TRACEN(k_t_bgcolpkt," %04x %08x %08x %08x %08x %08x %08x %08x %08x",
+                4*i, pi[i+0], pi[i+1], pi[i+2], pi[i+3], pi[i+4], pi[i+5], pi[i+6], pi[i+7]
+                 ) ;
+          }
+  }
+#endif
+  }
+
+/* Delivery of skbuffs to linux networking layer */
+/* Deliver an 'sk_buff' via a work queue, so that 'this' core can spend its time draining the collective hardware */
+struct bgcol_workqueue_item
+{
+	struct work_struct work ;
+	struct bglink_proto *proto ;
+	unsigned int src_key ;
+};
+static void bgcol_workqueue_actor(struct work_struct * work)
+{
+	char * cb = (char *) work ;
+	struct sk_buff *skb = (struct sk_buff *) (cb - offsetof(struct sk_buff, cb)) ;
+	struct bgcol_workqueue_item * bgcol_work =(struct bgcol_workqueue_item *) work ;
+	TRACEN(k_t_napi,"(>) work=%p skb=%p", work, skb) ;
+	bgcol_work->proto->col_rcv_trimmed(&static_col,skb,bgcol_work->proto,bgcol_work->src_key) ;
+	replenish_list_for_filling(&static_col) ;
+	TRACEN(k_t_napi,"(<)") ;
+}
+static void bgcol_deliver_via_workqueue(struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto *proto )
+{
+	struct bgcol_workqueue_item * bgcol_work = (struct bgcol_workqueue_item *)(skb->cb) ;
+	int rc ;
+	TRACEN(k_t_napi,"(>)skb=%p", skb) ;
+	    __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+	    __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+	INIT_WORK(&bgcol_work->work,bgcol_workqueue_actor) ;
+	bgcol_work->proto = proto ;
+	bgcol_work->src_key = lnkhdr->src_key ;
+	rc=schedule_work_on(k_WorkqueueDeliveryCPU,&bgcol_work->work) ;
+	TRACEN(k_t_napi,"(<) rc=%d",rc) ;
+}
+/**********************************************************************
+ *                          Interrupt handling
+ **********************************************************************/
+
+/*  Routines related to interrupt management from bgp_bic.c */
+void bic_disable_irq(unsigned int irq) ;   /*  Intended to be called from a FLIH to indicate that this interrupt will not fire again */
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;  /*  Intended to indicate which core will take the next interrupt of this type. Doesn't explocitly enable but other async things may enable */
+void bic_unmask_irq(unsigned int irq) ;  /*  Explicitly enable this interrupt */
+
+/*  Enable receive interrupts */
+void bgcol_enable_interrupts(struct bg_col *bgcol)
+{
+//    unsigned rec_enable;
+    unsigned long flags ;
+    TRACE( "(>) bgcol=%p", bgcol);
+    printk(KERN_NOTICE "enable ints \n");
+
+    spin_lock_irqsave(&bgcol->lock, flags);
+
+     /*  set watermarks */
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_RWM(0) );
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG1, _TR_GLOB_VCFG_RWM(0) );
+     /*  set watermarks */
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_IWM(4) );  /*  let transmit fifos get half empty before interrupting */
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG1, _TR_GLOB_VCFG_IWM(4) );  /*  don't really want an interrupt from ch1 inject        */
+
+    // Can only do anything useful with 'data arrived' interrupts (and then really only with the eth-on-tree channel)
+//    rec_enable = mfdcrx(bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN);
+//    rec_enable |= COL_IRQMASK_REC;
+//    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable );
+
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN,_TR_REC_PRX_WM0 ) ;
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN,_TR_REC_PRX_WM1 ) ;
+
+     /*  clear exception flags */
+    mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXF );
+    mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXF );
+
+    spin_unlock_irqrestore(&bgcol->lock, flags);
+//    TRACE( "(<) rec_enable:0x%08x", rec_enable);
+    TRACE( "(<)");
+}
+
+static inline void bgcol_enable_interrupts_rcv(struct bg_col *bgcol)
+{
+//    unsigned rec_enable;
+    TRACE( "(>) bgcol=%p", bgcol);
+   
+//    rec_enable = COL_IRQMASK_REC ;
+//    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable );
+    // Can only do anything useful with 'data arrived' interrupts (and then really only with the eth-on-tree channel)
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN,_TR_REC_PRX_WM0 ) ; 
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN,_TR_REC_PRX_WM1 ) ; 
+    TRACE( "(<)");
+}
+
+static inline void bgcol_enable_interrupts_xmit(struct bg_col *bgcol)
+{
+    unsigned long flags;
+    TRACE( "bgcol=%p", bgcol);
+
+    mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, (_TR_INJ_PIX_ENABLE | _TR_INJ_PIX_WM0 ) );
+
+}
+
+
+static inline void bgcol_disable_interrupts(struct bg_col *bgcol)
+{
+  TRACEN(k_t_irqflow,"bgcol=%p", bgcol);
+
+  mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE );
+  mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 );
+
+}
+
+static inline void bgcol_disable_interrupts_rcv(struct bg_col *bgcol)
+{
+  TRACEN(k_t_irqflow,"bgcol=%p", bgcol);
+
+  mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 );
+
+}
+
+static inline void bgcol_disable_interrupts_xmit(struct bg_col *bgcol)
+{
+  TRACEN(k_t_irqflow, "bgcol=%p", bgcol);
+
+  mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE );
+}
+
+void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn)
+{
+    unsigned long flags;
+    unsigned long prxen;
+
+    spin_lock_irqsave(&chn->col->lock, flags);
+    chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(chn->idx);
+    prxen = mfdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN);
+    if (chn->idx)
+	mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM1);
+    else
+	mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM0);
+    spin_unlock_irqrestore(&chn->col->lock, flags);
+
+    return;
+}
+
+static void inj_timeout(unsigned long colArg)
+{
+    printk(KERN_INFO "bgcol: inject fifo timed out!\n");
+}
+
+void bgcol_set_mtu(struct bg_col *bgcol, unsigned int mtu)
+  {
+    unsigned int max_packets_per_frame=(mtu+COL_FRAGPAYLOAD-1) / COL_FRAGPAYLOAD ;
+    bgcol->max_packets_per_frame = max_packets_per_frame ;
+    bgcol->mtu = max_packets_per_frame * COL_FRAGPAYLOAD + COL_SKB_ALIGN ;
+  }
+
+/* Plan is to save and restore those FP regs which are used as marshalling area */
+/* Not the untouched registers, and not the fpscr */
+enum {
+  k_dh_regs_to_save = 16
+};
+typedef struct
+    {
+       double primary, secondary ;
+    } dh_reg_t ;
+typedef struct
+    {
+      dh_reg_t reg_array[k_dh_regs_to_save] ;
+    } dh_area_t;
+static inline void dh_reg_save(dh_area_t * dh_area)
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+
+    TRACEN(k_t_irqflow, "dh_area=%p",dh_area) ;
+           asm  (
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[dh_area]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "stfpdx  2,%[index1],%[dh_area]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "stfpdx  3,%[index2],%[dh_area]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "stfpdx  4,%[index1],%[dh_area]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "stfpdx  5,%[index2],%[dh_area]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "stfpdx  6,%[index1],%[dh_area]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "stfpdx  7,%[index2],%[dh_area]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "stfpdx  8,%[index1],%[dh_area]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "stfpdx  9,%[index2],%[dh_area]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "stfpdx  10,%[index1],%[dh_area]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "stfpdx  11,%[index2],%[dh_area]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "stfpdx  12,%[index1],%[dh_area]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "stfpdx  13,%[index2],%[dh_area]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "stfpdx  14,%[index1],%[dh_area]       \n\t"  /* F4=Q14 load */
+               "stfpdx  15,%[index2],%[dh_area]       \n\t"  /* F5=Q15 load */
+                     :          /* outputs */
+                       "=m" (*dh_area),
+                       [index1] "=b" (index1),
+                       [index2] "=b" (index2)
+                     :            /* Inputs */
+                       [dh_area] "b" (dh_area)         /* inputs */
+                       /* Clobbers nothing */
+                       );
+
+  }
+static inline void dh_reg_restore(dh_area_t * dh_area)
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    TRACEN(k_t_irqflow, "dh_area=%p",dh_area) ;
+               asm volatile(
+                   "li      %[index1],16                    \n\t"  /* Indexing values */
+                   "lfpdx  1,0,%[dh_area]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                   "li      %[index2],32                    \n\t"  /* Indexing values */
+                   "lfpdx  2,%[index1],%[dh_area]       \n\t"  /* F2=Q2 load */
+                   "li      %[index1],48                    \n\t"  /* Indexing values */
+                   "lfpdx  3,%[index2],%[dh_area]       \n\t"  /* F3=Q3 load */
+                   "li      %[index2],64                    \n\t"  /* Indexing values */
+                   "lfpdx  4,%[index1],%[dh_area]       \n\t"  /* F4=Q4 load */
+                   "li      %[index1],80                    \n\t"  /* Indexing values */
+                   "lfpdx  5,%[index2],%[dh_area]       \n\t"  /* F5=Q5 load */
+                   "li      %[index2],96                    \n\t"  /* Indexing values */
+                   "lfpdx  6,%[index1],%[dh_area]       \n\t"  /* F6=Q6 load */
+                   "li      %[index1],112                   \n\t"  /* Indexing values */
+                   "lfpdx  7,%[index2],%[dh_area]       \n\t"  /* F7=Q7 load */
+                   "li      %[index2],128                    \n\t"  /* Indexing values */
+                   "lfpdx  8,%[index1],%[dh_area]       \n\t"  /* F8=Q8 load */
+                   "li      %[index1],144                    \n\t"  /* Indexing values */
+                   "lfpdx  9,%[index2],%[dh_area]       \n\t"  /* F9=Q9 load */
+                   "li      %[index2],160                    \n\t"  /* Indexing values */
+                   "lfpdx  10,%[index1],%[dh_area]       \n\t"  /* F0=Q10 load */
+                   "li      %[index1],176                   \n\t"  /* Indexing values */
+                   "lfpdx  11,%[index2],%[dh_area]       \n\t"  /* F1=Q11 load */
+                   "li      %[index2],192                    \n\t"  /* Indexing values */
+                   "lfpdx  12,%[index1],%[dh_area]       \n\t"  /* F2=Q12 load */
+                   "li      %[index1],208                   \n\t"  /* Indexing values */
+                   "lfpdx  13,%[index2],%[dh_area]       \n\t"  /* F3=Q13 load */
+                   "li      %[index2],224                    \n\t"  /* Indexing values */
+                   "lfpdx  14,%[index1],%[dh_area]       \n\t"  /* F4=Q14 load */
+                   "lfpdx  15,%[index2],%[dh_area]       \n\t"  /* F5=Q15 load */
+                   :          /* outputs */
+                     "=m" (*dh_area),
+                     [index1] "=b" (index1),
+                     [index2] "=b" (index2)
+                   :            /* Inputs */
+                     [dh_area] "b" (dh_area)         /* inputs */
+                     /* Clobbers nothing */
+                     ) ;
+
+  }
+
+static dh_area_t dh_savearea __attribute__((aligned(16))) ;
+/*  Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_inject(void *port, void* first_quad, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+    asm volatile(
+                     "lfpdx   0,0,%[first_quad]        \n\t"  /* F0=Q0 load */
+                     "li      3,16                    \n\t"  /* Indexing values */
+                     "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "li      4,32                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "li      3,48                    \n\t"  /* Indexing values */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "li      4,64                    \n\t"  /* Indexing values */
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q0 store to TR0_DI */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "li      3,80                    \n\t"  /* Indexing values */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "li      4,96                    \n\t"  /* Indexing values */
+                     "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "li      3,112                   \n\t"  /* Indexing values */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+                     "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "li      4,128                    \n\t"  /* Indexing values */
+                     "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "li      3,144                    \n\t"  /* Indexing values */
+                     "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "li      4,160                    \n\t"  /* Indexing values */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+                     "stfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+                     "lfpdx  0,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "li      3,176                   \n\t"  /* Indexing values */
+                     "lfpdx  1,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "li      4,192                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "li      3,208                   \n\t"  /* Indexing values */
+                     "stfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+                     "stfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+                     "stfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                     "li      4,224                    \n\t"  /* Indexing values */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q10 store */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q11 store */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q12 store */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q13 store */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q14 store */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q15 store */
+                     :
+                     : [first_quad]      "b" (first_quad) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads),
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9",
+                       "r3" , "r4"  );
+  }
+/*  Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_inject2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+    asm volatile(
+                     "lfdx   0,0,%[first_quad_0]        \n\t"  /* F0=Q0 load */
+                     "lfsdx   0,0,%[first_quad_1]        \n\t"  /* F0=Q0 load */
+                     "li      3,16                    \n\t"  /* Indexing values */
+                     "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "li      4,32                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "li      3,48                    \n\t"  /* Indexing values */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "li      4,64                    \n\t"  /* Indexing values */
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q0 store to TR0_DI */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "li      3,80                    \n\t"  /* Indexing values */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "li      4,96                    \n\t"  /* Indexing values */
+                     "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "li      3,112                   \n\t"  /* Indexing values */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+                     "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "li      4,128                    \n\t"  /* Indexing values */
+                     "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "li      3,144                    \n\t"  /* Indexing values */
+                     "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "li      4,160                    \n\t"  /* Indexing values */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+                     "stfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+                     "lfpdx  0,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "li      3,176                   \n\t"  /* Indexing values */
+                     "lfpdx  1,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "li      4,192                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "li      3,208                   \n\t"  /* Indexing values */
+                     "stfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+                     "stfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+                     "stfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                     "li      4,224                    \n\t"  /* Indexing values */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q10 store */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q11 store */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q12 store */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q13 store */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q14 store */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q15 store */
+                     :
+                     : [first_quad_0]      "b" (first_quad_0) ,           /* Inputs */
+                       [first_quad_1]      "b" (first_quad_1) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads),
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9",
+                       "r3" , "r4"  );
+  }
+/*  load a bgcol payload's worth from memory into registers */
+static inline void bgcol_payload_inject_load(void* first_quad, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+                     "lfpdx   0,0,%[first_quad]        \n\t"  /* F0=Q0 load */
+                     "li      3,16                    \n\t"  /* Indexing values */
+                     "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "li      4,32                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "li      3,48                    \n\t"  /* Indexing values */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "li      4,64                    \n\t"  /* Indexing values */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "li      3,80                    \n\t"  /* Indexing values */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "li      4,96                    \n\t"  /* Indexing values */
+                     "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "li      3,112                   \n\t"  /* Indexing values */
+                     "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "li      4,128                    \n\t"  /* Indexing values */
+                     "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "li      3,144                    \n\t"  /* Indexing values */
+                     "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "li      4,160                    \n\t"  /* Indexing values */
+                     "lfpdx  10,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "li      3,176                   \n\t"  /* Indexing values */
+                     "lfpdx  11,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "li      4,192                    \n\t"  /* Indexing values */
+                     "lfpdx  12,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "li      3,208                   \n\t"  /* Indexing values */
+                     "lfpdx  13,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                     "li      4,224                    \n\t"  /* Indexing values */
+                     "lfpdx  14,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "lfpdx  15,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                     :
+                     : [first_quad]      "b" (first_quad) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15","r3" , "r4"  );
+  }
+static inline void bgcol_payload_inject_load2(double* first_quad_0, double* first_quad_1, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+                     "lfdx   0,0,%[first_quad_0]        \n\t"  /* F0=Q0 load */
+                     "lfsdx   0,0,%[first_quad_1]        \n\t"  /* F0=Q0 load */
+                     "li      3,16                    \n\t"  /* Indexing values */
+                     "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "li      4,32                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "li      3,48                    \n\t"  /* Indexing values */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "li      4,64                    \n\t"  /* Indexing values */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "li      3,80                    \n\t"  /* Indexing values */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "li      4,96                    \n\t"  /* Indexing values */
+                     "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "li      3,112                   \n\t"  /* Indexing values */
+                     "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "li      4,128                    \n\t"  /* Indexing values */
+                     "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "li      3,144                    \n\t"  /* Indexing values */
+                     "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "li      4,160                    \n\t"  /* Indexing values */
+                     "lfpdx  10,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "li      3,176                   \n\t"  /* Indexing values */
+                     "lfpdx  11,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "li      4,192                    \n\t"  /* Indexing values */
+                     "lfpdx  12,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "li      3,208                   \n\t"  /* Indexing values */
+                     "lfpdx  13,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                     "li      4,224                    \n\t"  /* Indexing values */
+                     "lfpdx  14,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "lfpdx  15,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                     :
+                     : [first_quad_0]      "b" (first_quad_0) ,           /* Inputs */
+                       [first_quad_1]      "b" (first_quad_1) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15","r3" , "r4"  );
+  }
+static inline void bgcol_payload_inject_load2partial(double* first_quad_0, double* first_quad_1, void *remaining_quads, int quadcount )
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+                     "mtctr  %[quadcount]        \n\t"
+                     "lfdx   0,0,%[first_quad_0]        \n\t"  /* F0=Q0 load */
+                     "lfsdx   0,0,%[first_quad_1]        \n\t"  /* F0=Q0 load */
+                     "li      3,16                    \n\t"  /* Indexing values */
+                     "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                      "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,32                    \n\t"  /* Indexing values */
+                     "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,48                    \n\t"  /* Indexing values */
+                     "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,64                    \n\t"  /* Indexing values */
+                     "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,80                    \n\t"  /* Indexing values */
+                     "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,96                    \n\t"  /* Indexing values */
+                     "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,112                   \n\t"  /* Indexing values */
+                     "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,128                    \n\t"  /* Indexing values */
+                     "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,144                    \n\t"  /* Indexing values */
+                     "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,160                    \n\t"  /* Indexing values */
+                     "lfpdx  10,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,176                   \n\t"  /* Indexing values */
+                     "lfpdx  11,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,192                    \n\t"  /* Indexing values */
+                     "lfpdx  12,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      3,208                   \n\t"  /* Indexing values */
+                     "lfpdx  13,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "li      4,224                    \n\t"  /* Indexing values */
+                     "lfpdx  14,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "bdz    1                           \n\t"  /* Skip out if done */
+                     "lfpdx  15,4,%[remaining_quads]       \n"  /* F5=Q15 load */
+                     "1:                                   \n\t"  /* Jump-out label */
+                     :
+                     : [first_quad_0]      "b" (first_quad_0) ,           /* Inputs */
+                       [first_quad_1]      "b" (first_quad_1) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads) ,
+                       [quadcount] "r" (quadcount)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15","r3" , "r4"  );
+  }
+static inline void bgcol_payload_inject_storeload(void *port, void* first_quad, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q0 store to TR0_DI */
+               "lfpdx   0,0,%[first_quad]        \n\t"  /* F0=Q0 load */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "li      3,16                    \n\t"  /* Indexing values */
+               "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "li      4,32                    \n\t"  /* Indexing values */
+               "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "li      3,48                    \n\t"  /* Indexing values */
+               "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "li      4,64                    \n\t"  /* Indexing values */
+               "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "li      3,80                    \n\t"  /* Indexing values */
+               "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "stfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "li      4,96                    \n\t"  /* Indexing values */
+               "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "stfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "li      3,112                   \n\t"  /* Indexing values */
+               "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "stfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "li      4,128                    \n\t"  /* Indexing values */
+               "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "stfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "li      3,144                    \n\t"  /* Indexing values */
+               "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "stfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+               "li      4,160                    \n\t"  /* Indexing values */
+               "lfpdx  10,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "stfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+               "li      3,176                   \n\t"  /* Indexing values */
+               "lfpdx  11,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "stfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+               "li      4,192                    \n\t"  /* Indexing values */
+               "lfpdx  12,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "stfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+               "li      3,208                   \n\t"  /* Indexing values */
+               "lfpdx  13,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                    "stfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+               "li      4,224                    \n\t"  /* Indexing values */
+               "lfpdx  14,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "stfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+               "lfpdx  15,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                    :
+                     : [first_quad]      "b" (first_quad) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads),
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15","r3" , "r4"  );
+  }
+static inline void bgcol_payload_inject_storeload2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads)
+  {
+/*     BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q0 store to TR0_DI */
+               "lfdx   0,0,%[first_quad_0]        \n\t"  /* F0=Q0 load */
+               "lfsdx   0,0,%[first_quad_1]        \n\t"  /* F0=Q0 load */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "li      3,16                    \n\t"  /* Indexing values */
+               "lfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "li      4,32                    \n\t"  /* Indexing values */
+               "lfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "li      3,48                    \n\t"  /* Indexing values */
+               "lfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "li      4,64                    \n\t"  /* Indexing values */
+               "lfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "li      3,80                    \n\t"  /* Indexing values */
+               "lfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+                     "stfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "li      4,96                    \n\t"  /* Indexing values */
+               "lfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+                     "stfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "li      3,112                   \n\t"  /* Indexing values */
+               "lfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+                     "stfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "li      4,128                    \n\t"  /* Indexing values */
+               "lfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+                     "stfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "li      3,144                    \n\t"  /* Indexing values */
+               "lfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+                     "stfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+               "li      4,160                    \n\t"  /* Indexing values */
+               "lfpdx  10,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+                     "stfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+               "li      3,176                   \n\t"  /* Indexing values */
+               "lfpdx  11,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+                     "stfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+               "li      4,192                    \n\t"  /* Indexing values */
+               "lfpdx  12,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+                     "stfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+               "li      3,208                   \n\t"  /* Indexing values */
+               "lfpdx  13,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+                    "stfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+               "li      4,224                    \n\t"  /* Indexing values */
+               "lfpdx  14,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+                     "stfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+               "lfpdx  15,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                    :
+                     : [first_quad_0]      "b" (first_quad_0) ,           /* Inputs */
+                       [first_quad_1]      "b" (first_quad_1) ,           /* Inputs */
+                       [remaining_quads] "b" (remaining_quads),
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15","r3" , "r4"  );
+  }
+static inline void bgcol_payload_inject_store(void *port)
+  {
+           asm volatile(
+                     "stfpdx  0,0,%[port]        \n\t"  /* Q0 store to TR0_DI */
+                     "stfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+                     "stfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+                     "stfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+                     "stfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+                     "stfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+                     "stfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+                     "stfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+                     "stfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+                     "stfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+                     "stfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+                     "stfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+                     "stfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+                     "stfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+                     "stfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+                     "stfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+                     :
+                     : /* inputs */
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"  );
+  }
+
+/*  receive a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_receive240(void *port, void *remaining_quads)
+  {
+/*     BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+           asm volatile(
+               "lfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "lfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "lfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "lfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "lfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "lfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "lfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "lfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "lfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "lfpdx  0,0,%[port]        \n\t"  /* Q10 store */
+               "li      3,16                    \n\t"  /* Indexing values */
+               "stfpdx   1,0,%[remaining_quads]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      4,32                    \n\t"  /* Indexing values */
+               "stfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q2 load */
+               "lfpdx  1,0,%[port]        \n\t"  /* Q11 store */
+               "li      3,48                    \n\t"  /* Indexing values */
+               "stfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q3 load */
+               "lfpdx  2,0,%[port]        \n\t"  /* Q12 store */
+               "li      4,64                    \n\t"  /* Indexing values */
+               "stfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q4 load */
+               "lfpdx  3,0,%[port]        \n\t"  /* Q13 store */
+               "li      3,80                    \n\t"  /* Indexing values */
+               "stfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q5 load */
+               "lfpdx  4,0,%[port]        \n\t"  /* Q14 store */
+               "li      4,96                    \n\t"  /* Indexing values */
+               "stfpdx  6,3,%[remaining_quads]       \n\t"  /* F6=Q6 load */
+               "lfpdx  5,0,%[port]        \n\t"  /* Q15 store */
+               "li      3,112                   \n\t"  /* Indexing values */
+               "stfpdx  7,4,%[remaining_quads]       \n\t"  /* F7=Q7 load */
+               "li      4,128                    \n\t"  /* Indexing values */
+               "stfpdx  8,3,%[remaining_quads]       \n\t"  /* F8=Q8 load */
+               "li      3,144                    \n\t"  /* Indexing values */
+               "stfpdx  9,4,%[remaining_quads]       \n\t"  /* F9=Q9 load */
+               "li      4,160                    \n\t"  /* Indexing values */
+               "stfpdx  0,3,%[remaining_quads]       \n\t"  /* F0=Q10 load */
+               "li      3,176                   \n\t"  /* Indexing values */
+               "stfpdx  1,4,%[remaining_quads]       \n\t"  /* F1=Q11 load */
+               "li      4,192                    \n\t"  /* Indexing values */
+               "stfpdx  2,3,%[remaining_quads]       \n\t"  /* F2=Q12 load */
+               "li      3,208                   \n\t"  /* Indexing values */
+               "stfpdx  3,4,%[remaining_quads]       \n\t"  /* F3=Q13 load */
+               "li      4,224                    \n\t"  /* Indexing values */
+               "stfpdx  4,3,%[remaining_quads]       \n\t"  /* F4=Q14 load */
+               "stfpdx  5,4,%[remaining_quads]       \n\t"  /* F5=Q15 load */
+                     :
+                     :            /* Inputs */
+                       [remaining_quads] "b" (remaining_quads),
+                       [port]            "b" (port)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "r3" , "r4"  );
+  }
+
+
+/*  Load a full bgcol payload into 16 parallel floating point registers */
+/*  Caution ... the compiler doesn't know that we want the regs later on */
+static inline unsigned int bgcol_payload_load(
+    void *port,  /*  The FIFO port */
+    void *lnkhdr,  /*  Where to put the first 16 bytes of the payload */
+    void *destport  /*  Which address to tap to ask for the next packet */
+    )
+  {
+    unsigned int src_key ;
+    unsigned int dummy ;
+    struct { unsigned char c [16]  ; } *lnkhdrc =  lnkhdr ;
+/*     BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */
+
+           asm  (
+               "lfpdx  0,0,%[port]        \n\t"  /* lnkhdr */
+               "lfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "lfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "lfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "lfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "lfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "lfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "stfpdx 0,0,%[lnkhdr]      \n\t"
+               "lfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "lfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "lfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "lfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+               "lfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+               "lfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+               "lwz      %[src_key],4(%[lnkhdr])        \n\t"
+               "lfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+               "lfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+               "lfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+               "lwz     %[dummy],0(%[destport])   \n\t"  /* trigger to pull the next packet in */
+                     :          /* outputs */
+                       [dummy] "=r" (dummy),
+                       [src_key] "=b" (src_key),
+                        "=m" (*lnkhdrc)
+                     :            /* Inputs */
+                       [port]            "b" (port) ,
+                       [lnkhdr] "b" (lnkhdrc) ,
+                       [destport] "b" (destport)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"
+                       );
+           TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ;
+    return src_key ;
+  }
+
+static inline unsigned int bgcol_payload_load2(
+    void *port,  /*  The FIFO port */
+    double *lnkhdr0,  /*  Where to put the first 8 bytes of the payload */
+    double *lnkhdr1,  /*  Where to put the second 8 bytes of the payload */
+    void *destport  /*  Which address to tap to ask for the next packet */
+    )
+  {
+    unsigned int src_key ;
+    unsigned int dummy ;
+/*     BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */
+/*     BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */
+
+           asm  (
+               "lfpdx  0,0,%[port]        \n\t"  /* lnkhdr */
+               "lfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "lfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "lfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "lfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "lfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "lfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "stfdx 0,0,%[lnkhdr0]      \n\t"
+               "lfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "stfsdx 0,0,%[lnkhdr1]      \n\t"
+               "lfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "lfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "lfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+               "lfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+               "lfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+               "lwz      %[src_key],4(%[lnkhdr0])        \n\t"
+               "lfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+               "lfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+               "lfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+               "lwz     %[dummy],0(%[destport])   \n\t"  /* trigger to pull the next packet in */
+                     :          /* outputs */
+                       [dummy] "=r" (dummy),
+                       [src_key] "=b" (src_key),
+                        "=m" (*lnkhdr0),
+                        "=m" (*lnkhdr1)
+                     :            /* Inputs */
+                       [port]            "b" (port) ,
+                       [lnkhdr0] "b" (lnkhdr0) ,
+                       [lnkhdr1] "b" (lnkhdr1) ,
+                       [destport] "b" (destport)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"
+                       );
+           TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ;
+    return src_key ;
+  }
+
+/*  Save the previous payload to store, and load the next payload from FIFO */
+static inline unsigned int bgcol_payload_storeload(
+    void *port,
+    void *lnkhdr,
+    void * payloadptr,
+    void *destport )
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    unsigned int src_key ;
+    struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+    struct { unsigned char c [16]  ; } *lnkhdrc  ;
+/*     BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */
+/*     BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+    lnkhdrc =  lnkhdr ;
+
+    payload = payloadptr;
+    TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ;
+
+           asm  (
+               "lfpdx   0,0,%[port]        \n\t"  /* lnkhdr */
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "lfpdx   1,0,%[port]        \n\t"  /* Q1 store */
+               "stfpdx  2,%[index1],%[payload]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "lfpdx   2,0,%[port]        \n\t"  /* Q2 store */
+               "stfpdx  3,%[index2],%[payload]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "lfpdx   3,0,%[port]        \n\t"  /* Q3 store */
+               "stfpdx  4,%[index1],%[payload]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "lfpdx   4,0,%[port]        \n\t"  /* Q4 store */
+               "stfpdx  5,%[index2],%[payload]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "lfpdx   5,0,%[port]        \n\t"  /* Q5 store */
+               "stfpdx  6,%[index1],%[payload]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "lfpdx   6,0,%[port]        \n\t"  /* Q6 store */
+               "stfpdx  7,%[index2],%[payload]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "lfpdx   7,0,%[port]        \n\t"  /* Q7 store */
+               "stfpdx  8,%[index1],%[payload]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "lfpdx   8,0,%[port]        \n\t"  /* Q8 store */
+               "stfpdx  0,0,%[lnkhdr]      \n\t"
+               "stfpdx  9,%[index2],%[payload]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "lfpdx   9,0,%[port]        \n\t"  /* Q9 store */
+               "stfpdx  10,%[index1],%[payload]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "lfpdx   10,0,%[port]        \n\t"  /* Q10 store */
+               "stfpdx  11,%[index2],%[payload]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "lfpdx   11,0,%[port]        \n\t"  /* Q11 store */
+               "stfpdx  12,%[index1],%[payload]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "lfpdx   12,0,%[port]        \n\t"  /* Q12 store */
+               "stfpdx  13,%[index2],%[payload]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "lfpdx   13,0,%[port]        \n\t"  /* Q13 store */
+               "lwz     %[src_key],4(%[lnkhdr])        \n\t"
+               "stfpdx  14,%[index1],%[payload]       \n\t"  /* F4=Q14 load */
+               "lfpdx   14,0,%[port]        \n\t"  /* Q14 store */
+               "stfpdx  15,%[index2],%[payload]       \n\t"  /* F5=Q15 load */
+               "lfpdx   15,0,%[port]        \n\t"  /* Q15 store */
+               "lwz     %[index1],0(%[destport])   \n\t"  /* trigger to pull the next packet in */
+                     :          /* outputs */
+                       [src_key] "=b" (src_key),
+                       "=m" (*payload),
+                       "=m" (*lnkhdrc),
+                       [index1] "=b" (index1),
+                       [index2] "=b" (index2)
+                     :            /* Inputs */
+                       [port]            "b" (port) ,
+                       [payload] "b" (payload),
+                       [lnkhdr] "b" (lnkhdrc) ,
+                       [destport] "b" (destport)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"
+                       );
+
+           TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ;
+    return src_key ;
+  }
+
+static inline unsigned int bgcol_payload_storeload2(
+    void *port,
+    double *lnkhdr0,
+    double *lnkhdr1,
+    void * payloadptr,
+    void *destport )
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    unsigned int src_key ;
+    struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+     /*     BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */
+     /*     BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */
+/*     BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+
+    payload = payloadptr;
+    TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ;
+
+           asm  (
+               "lfpdx   0,0,%[port]        \n\t"  /* lnkhdr */
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "lfpdx   1,0,%[port]        \n\t"  /* Q1 store */
+               "stfpdx  2,%[index1],%[payload]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "lfpdx   2,0,%[port]        \n\t"  /* Q2 store */
+               "stfpdx  3,%[index2],%[payload]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "lfpdx   3,0,%[port]        \n\t"  /* Q3 store */
+               "stfpdx  4,%[index1],%[payload]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "lfpdx   4,0,%[port]        \n\t"  /* Q4 store */
+               "stfpdx  5,%[index2],%[payload]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "lfpdx   5,0,%[port]        \n\t"  /* Q5 store */
+               "stfpdx  6,%[index1],%[payload]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "lfpdx   6,0,%[port]        \n\t"  /* Q6 store */
+               "stfpdx  7,%[index2],%[payload]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "lfpdx   7,0,%[port]        \n\t"  /* Q7 store */
+               "stfpdx  8,%[index1],%[payload]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "lfpdx   8,0,%[port]        \n\t"  /* Q8 store */
+               "stfdx  0,0,%[lnkhdr0]      \n\t"
+               "stfpdx  9,%[index2],%[payload]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "lfpdx   9,0,%[port]        \n\t"  /* Q9 store */
+               "stfsdx  0,0,%[lnkhdr1]      \n\t"
+               "stfpdx  10,%[index1],%[payload]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "lfpdx   10,0,%[port]        \n\t"  /* Q10 store */
+               "stfpdx  11,%[index2],%[payload]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "lfpdx   11,0,%[port]        \n\t"  /* Q11 store */
+               "stfpdx  12,%[index1],%[payload]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "lfpdx   12,0,%[port]        \n\t"  /* Q12 store */
+               "stfpdx  13,%[index2],%[payload]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "lfpdx   13,0,%[port]        \n\t"  /* Q13 store */
+               "lwz     %[src_key],4(%[lnkhdr0])        \n\t"
+               "stfpdx  14,%[index1],%[payload]       \n\t"  /* F4=Q14 load */
+               "lfpdx   14,0,%[port]        \n\t"  /* Q14 store */
+               "stfpdx  15,%[index2],%[payload]       \n\t"  /* F5=Q15 load */
+               "lfpdx   15,0,%[port]        \n\t"  /* Q15 store */
+               "lwz     %[index1],0(%[destport])   \n\t"  /* trigger to pull the next packet in */
+                     :          /* outputs */
+                       [src_key] "=b" (src_key),
+                       "=m" (*payload),
+                       "=m" (*lnkhdr0),
+                       "=m" (*lnkhdr1),
+                       [index1] "=b" (index1),
+                       [index2] "=b" (index2)
+                     :            /* Inputs */
+                       [port]            "b" (port) ,
+                       [payload] "b" (payload),
+                       [lnkhdr0] "b" (lnkhdr0) ,
+                       [lnkhdr1] "b" (lnkhdr1) ,
+                       [destport] "b" (destport)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"
+                       );
+
+           TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ;
+    return src_key ;
+  }
+
+/*  Save the previous payload to store */
+static inline void bgcol_payload_store(
+    void * payloadptr)
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload=payloadptr ;
+/*     BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+    TRACEN(k_t_fifocontents, "bgcol_payload_store payload=%p",payload) ;
+           asm  (
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "stfpdx  2,%[index1],%[payload]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "stfpdx  3,%[index2],%[payload]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "stfpdx  4,%[index1],%[payload]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "stfpdx  5,%[index2],%[payload]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "stfpdx  6,%[index1],%[payload]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "stfpdx  7,%[index2],%[payload]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "stfpdx  8,%[index1],%[payload]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "stfpdx  9,%[index2],%[payload]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "stfpdx  10,%[index1],%[payload]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "stfpdx  11,%[index2],%[payload]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "stfpdx  12,%[index1],%[payload]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "stfpdx  13,%[index2],%[payload]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "stfpdx  14,%[index1],%[payload]       \n\t"  /* F4=Q14 load */
+               "stfpdx  15,%[index2],%[payload]       \n\t"  /* F5=Q15 load */
+                     :          /* outputs */
+                       "=m" (*payload),
+                       [index1] "=b" (index1),
+                       [index2] "=b" (index2)
+                     :            /* Inputs */
+                       [payload] "b" (payload)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15"
+                       );
+  }
+
+/*  receive 256 bytes, a 16-byte header and a 240-byte payload */
+/*  returns the 'source key', the key of the node which sent the data */
+
+static inline int bgcol_payload_receive256(void *port,
+    void *lnkhdr,
+    unsigned char * payload_table[],
+    unsigned int table_index_mask,
+    void *destport )
+  {
+    int table_offset ;
+    int src_key ;
+    struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+    struct { unsigned char c [16]  ; } *lnkhdrc =  lnkhdr ;
+
+           asm  (
+               "lfpdx  0,0,%[port]        \n\t"  /* lnkhdr */
+               "lfpdx  1,0,%[port]        \n\t"  /* Q1 store */
+               "lfpdx  2,0,%[port]        \n\t"  /* Q2 store */
+               "lfpdx  3,0,%[port]        \n\t"  /* Q3 store */
+               "lfpdx  4,0,%[port]        \n\t"  /* Q4 store */
+               "lfpdx  5,0,%[port]        \n\t"  /* Q5 store */
+               "lfpdx  6,0,%[port]        \n\t"  /* Q6 store */
+               "stfpdx 0,0,%[lnkhdr]      \n\t"
+               "lfpdx  7,0,%[port]        \n\t"  /* Q7 store */
+               "lfpdx  8,0,%[port]        \n\t"  /* Q8 store */
+               "lfpdx  9,0,%[port]        \n\t"  /* Q9 store */
+               "lwz      %[src_key],4(%[lnkhdr])        \n\t"
+               "lfpdx  10,0,%[port]        \n\t"  /* Q10 store */
+               "lfpdx  11,0,%[port]        \n\t"  /* Q11 store */
+               "lfpdx  12,0,%[port]        \n\t"  /* Q12 store */
+               "and    3,%[src_key],%[table_index_mask] \n\t"
+               "lfpdx  13,0,%[port]        \n\t"  /* Q13 store */
+               "slwi   %[table_offset],3,2              \n\t"
+               "lfpdx  14,0,%[port]        \n\t"  /* Q14 store */
+               "lwzx     %[payload],%[table_offset],%[payload_table] \n\t"
+               "lfpdx  15,0,%[port]        \n\t"  /* Q15 store */
+               "lwz       5,0(%[destport])   \n\t"  /* trigger to pull the next packet in */
+               "li      3,16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      4,32                    \n\t"  /* Indexing values */
+               "stfpdx  2,3,%[payload]       \n\t"  /* F2=Q2 load */
+               "li      3,48                    \n\t"  /* Indexing values */
+               "stfpdx  3,4,%[payload]       \n\t"  /* F3=Q3 load */
+               "li      4,64                    \n\t"  /* Indexing values */
+               "stfpdx  4,3,%[payload]       \n\t"  /* F4=Q4 load */
+               "li      3,80                    \n\t"  /* Indexing values */
+               "stfpdx  5,4,%[payload]       \n\t"  /* F5=Q5 load */
+               "li      4,96                    \n\t"  /* Indexing values */
+               "stfpdx  6,3,%[payload]       \n\t"  /* F6=Q6 load */
+               "li      3,112                   \n\t"  /* Indexing values */
+               "stfpdx  7,4,%[payload]       \n\t"  /* F7=Q7 load */
+               "li      4,128                    \n\t"  /* Indexing values */
+               "stfpdx  8,3,%[payload]       \n\t"  /* F8=Q8 load */
+               "li      3,144                    \n\t"  /* Indexing values */
+               "stfpdx  9,4,%[payload]       \n\t"  /* F9=Q9 load */
+               "li      4,160                    \n\t"  /* Indexing values */
+               "stfpdx  10,3,%[payload]       \n\t"  /* F0=Q10 load */
+               "li      3,176                   \n\t"  /* Indexing values */
+               "stfpdx  11,4,%[payload]       \n\t"  /* F1=Q11 load */
+               "li      4,192                    \n\t"  /* Indexing values */
+               "stfpdx  12,3,%[payload]       \n\t"  /* F2=Q12 load */
+               "li      3,208                   \n\t"  /* Indexing values */
+               "stfpdx  13,4,%[payload]       \n\t"  /* F3=Q13 load */
+               "li      4,224                    \n\t"  /* Indexing values */
+               "stfpdx  14,3,%[payload]       \n\t"  /* F4=Q14 load */
+               "stfpdx  15,4,%[payload]       \n\t"  /* F5=Q15 load */
+                     : [payload] "=b" (payload),         /* outputs */
+                       [src_key] "=b" (src_key),
+                       [table_offset] "=b" (table_offset),
+                       "=m" (*payload),
+                       "=m" (*lnkhdrc)
+                     :            /* Inputs */
+                       [port]            "b" (port) ,
+                       [lnkhdr] "b" (lnkhdrc) ,
+                       [payload_table] "b" (payload_table) ,
+                       [table_index_mask] "b" (table_index_mask) ,
+                       [destport] "b" (destport)
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14",
+                       "fr15",
+                       "r3" , "r4", "r5"
+                       );
+           TRACEN(k_t_fifocontents, "bgcol_payload_receive256 table_offset=%08x payload=%p\n src_key=%08x",table_offset,payload,src_key) ;
+    return src_key ;
+  }
+/**********************************************************************
+ * Receive and transmit
+ **********************************************************************/
+
+/* #if defined(COLLECTIVE_DELIVER_VIA_TASKLET) */
+/* static void bgcol_receive_proto_tasklet_handler(unsigned long dummy) */
+/*   { */
+/*     struct bg_col *bgcol = __bgcol; */
+/*     struct sk_buff *skb = skb_dequeue(&bgcol->fragskb_list_rcv); */
+/*  */
+/*     TRACE("bgnet: (>)bgcol_receive_proto_tasklet_handler"); */
+/*  */
+/*     while( skb ) */
+/*       { */
+//         /*  deliver to upper protocol layers */
+/*         struct bglink_hdr_col *lnkhdrp = (struct bglink_hdr_col *)&(skb->cb) ; */
+/*         struct bglink_proto *proto; */
+/*         proto = bgcol_find_linkproto(lnkhdrp->lnk_proto); */
+/*         if (proto) */
+/*           { */
+/*  */
+/*             TRACE("Handed to proto rcv=%p", proto->rcv) ; */
+/*             TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdrp->conn_id, lnkhdrp->this_pkt, lnkhdrp->total_pkt, lnkhdrp->dst_key, lnkhdrp->src_key, lnkhdrp->lnk_proto); */
+/*             dump_skb_partial(skb,64) ; */
+/*             TRACE("proto->rcv=%p skb=%p lnkhdrp=%p proto=%p", */
+/*                 proto->rcv,skb, lnkhdrp, proto */
+/*                 ) ; */
+/*             (void) proto->rcv(skb, lnkhdrp, proto); */
+/*           } */
+/*         else */
+/*           { */
+/*               dump_skb_partial(skb,64); */
+/*               TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdrp->lnk_proto); */
+/*               dev_kfree_skb(skb); */
+/*           } */
+/*         skb = skb_dequeue(&bgcol->fragskb_list_rcv) ; */
+/*       } */
+/*  */
+/*     TRACE("bgnet: (<)bgcol_receive_proto_tasklet_handler"); */
+/*  */
+/*   } */
+/*  */
+/* static DECLARE_TASKLET(bgcol_receive_proto_tasklet,bgcol_receive_proto_tasklet_handler,0); */
+/* #endif */
+
+static inline void bgcol_vacate_slot(struct bg_col *bgcol, unsigned int slot)
+  {
+    bgcol->per_eth_table[slot].payload = (void *)0xffffffff ;  /*  so we get a trap if we try to store through it */
+    bgcol->per_eth_table[slot].expect = 0xffffffff ;
+    bgcol->skb_rcv_table[slot] = NULL ;
+    TRACEN(k_t_general,"Slot %d vacated",slot );
+  }
+
+
+static void init_ethkey_table(struct bg_col *bgcol)
+  {
+  int x ;
+  for( x = 0 ; x < k_ethkey_table_size ; x += 1)
+    {
+      bgcol_vacate_slot(bgcol,x) ;
+    }
+  }
+
+#if defined(KEEP_LNKHDR_TRAIL)
+static struct bglink_hdr_col lnkhdr_trail[k_lnkhdr_trail_length] ;
+static unsigned int lnkhdr_trail_index ;
+static unsigned int lnkhdr_trail_shown_index ;
+static int trail_shown_count ;
+
+static void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr)
+  {
+    lnkhdr_trail[lnkhdr_trail_index & (k_lnkhdr_trail_length-1)] = *lnkhdr ;
+    lnkhdr_trail_index += 1 ;
+  }
+
+static void show_lnkhdr_trail(const char * reason)
+  {
+    if( trail_shown_count < k_lnhhdr_ffdc_limit )
+      {
+        unsigned int trail_count = (k_lnkhdr_trail_display_length > lnkhdr_trail_index) ? lnkhdr_trail_index : k_lnkhdr_trail_display_length ;
+        unsigned int current_index = lnkhdr_trail_index - trail_count ;
+        printk(KERN_INFO "lnkhdr trail to packet %d, reason <%s>:\n", lnkhdr_trail_index, reason) ;
+        while( current_index != lnkhdr_trail_index)
+          {
+            unsigned int x = ( current_index & (k_lnkhdr_trail_length-1)) ;
+            if( current_index >= lnkhdr_trail_shown_index )
+        	    {
+		    printk(KERN_INFO "lnkhdr_trail[%02x] dst_key=%08x src_key=%08x conn_id=%04x this_pkt=%02x total_pkt=%02x lnk_proto=%04x opt=[%02x:%02x:%02x]\n",
+			(current_index-lnkhdr_trail_index) & 0xff,
+			lnkhdr_trail[x].dst_key,
+			lnkhdr_trail[x].src_key,
+			lnkhdr_trail[x].conn_id,
+			lnkhdr_trail[x].this_pkt,
+			lnkhdr_trail[x].total_pkt,
+			lnkhdr_trail[x].lnk_proto,
+			lnkhdr_trail[x].opt.opt_net.option,
+			lnkhdr_trail[x].opt.opt_net.pad_head,
+			lnkhdr_trail[x].opt.opt_net.pad_tail
+		    ) ;
+        	    }
+            current_index += 1 ;
+
+          }
+        trail_shown_count += 1 ;
+        lnkhdr_trail_shown_index = lnkhdr_trail_index ;
+      }
+  }
+
+static void show_payload(void * payload, unsigned int mioaddr)
+{
+	    if( trail_shown_count < k_lnhhdr_ffdc_limit )
+		    {
+          unsigned int *pi=(unsigned int *) payload ;
+          unsigned int x ;
+          for(x=0; x<240/sizeof(unsigned int)-9; x+=8)
+            {
+              printk(KERN_INFO "payload [%08x %08x %08x %08x %08x %08x %08x %08x]\n",
+                  pi[x],pi[x+1],pi[x+2],pi[x+3],pi[x+4],pi[x+5],pi[x+6],pi[x+7]
+                                                                           ) ;
+            } ;
+          printk(KERN_INFO "payload [%08x %08x %08x %08x]\n",
+              pi[x],pi[x+1],pi[x+2],pi[x+3]
+                                       ) ;
+		    }
+}
+#else
+static inline void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr)
+  {
+
+  }
+static inline void show_lnkhdr_trail(const char * reason)
+  {
+    TRACE("%s", reason);
+  }
+static void show_payload(void * payload, unsigned int mioaddr)
+{
+	    TRACE("payload=%p mioaddr=0x%08x", payload, mioaddr);
+}
+
+#endif
+
+#if !defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+static inline void bgcol_deliver_directly(struct bg_col *bgcol,struct bglink_hdr_col *lnkhdr, struct sk_buff *skb)
+  {
+	    struct bglink_proto *proto;
+
+	     /*  deliver to upper protocol layers */
+	    proto = bglink_find_proto(lnkhdr->lnk_proto);
+	  if(!bgcol->deliver_without_workqueue)
+		  {
+			  TRACEN(k_t_general,"Delivering skb=%p via work queue",skb) ;
+			  if( proto)
+				  {
+					  bgcol_deliver_via_workqueue(skb, lnkhdr,proto) ;
+				  }
+			  else
+				  {
+					  dump_skb_partial(skb,64);
+					  TRACEN(k_t_request,"(!!!) bgcol: unsupported link protocol (%p) %x", proto, lnkhdr->lnk_proto);
+					  dev_kfree_skb(skb);
+					  replenish_list_for_filling(bgcol) ;
+				  }
+
+		  }
+	  else
+		  {
+		    if (proto)
+		      {
+			TRACE("Handed to proto=%p", proto) ;
+			TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key, lnkhdr->lnk_proto);
+			dump_skb_partial(skb,64) ;
+			TRACE("proto->col_rcv=%p skb=%p lnkhdr=%p proto=%p",
+			    proto->col_rcv,skb, lnkhdr, proto
+			    ) ;
+			(void) proto->col_rcv(bgcol,skb, lnkhdr, proto);
+		/*         enable_kernel_fp() ; */
+		      }
+		    else
+		      {
+			  dump_skb_partial(skb,64);
+			  TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdr->lnk_proto);
+			  dev_kfree_skb(skb);
+		      }
+		    replenish_list_for_filling(bgcol) ;
+		  }
+  }
+#endif
+
+static char scratch_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16)));
+static inline int bgcol_receive_mark3(struct bg_col *bgcol, unsigned channel,unsigned int status_in, unsigned int mioaddr)
+{
+    void *payloadptr;
+/*     union bgcol_status status; */
+    unsigned int unload_count ;
+    unsigned int unload_index ;
+    struct bglink_hdr_col lnkhdr __attribute__((aligned(8)));
+    double *lnkhdrd = (double *)&lnkhdr ;
+    unsigned int total_unload_count = 0 ;
+    unsigned int end_frame_hint = 0 ;
+#if defined(KEEP_RECV_TOTAL)
+    unsigned int recv_total = bgcol->recv_total ;
+#endif
+/*     bgcol->recv_total += total_unload_count ; */
+
+/*     status.raw = status_in ; */
+/*     unload_count = status.x.rcv_hdr ; */
+    unload_count = bgcol_status_rcv_hdr(status_in) ;
+/*     bgcol->recv_fifo_histogram2[unload_count & 0x0f ] += 1; */
+    TRACE("status=%08x", status_in);
+
+#if defined(KEEP_RECV_TOTAL)
+    bgcol->recv_total = recv_total + unload_count ;  /*  Not exact, for the case where we exit the loop early, but good enought for statistics */
+#endif
+#if defined(COLLECTIVE_ONEPASS_TXRX)
+    if(unload_count > 0)
+#else
+    while(unload_count > 0)
+#endif
+      {
+        unsigned int received_src_key ;
+        unsigned int slot ;
+        unsigned int received_seq ;
+        unsigned int expected_seq ;
+
+        unsigned int seq_next_packet ;
+        unsigned int seq_tot_packet ;
+        unsigned char * deposited_payload  ;
+        unsigned char * next_payload  ;
+        unsigned int received ;
+        unsigned int expected ;
+
+        /* Load up the FP regs with the first packet from the FIFO, and get ready to analyze it */
+        received_src_key=bgcol_payload_load2((void*)mioaddr + _BGP_TRx_DR,lnkhdrd,lnkhdrd+1,(void*)(mioaddr + _BGP_TRx_HR)) ;
+#if defined(KEEP_LNKHDR_TRAIL)
+        record_lnkhdr_trail(&lnkhdr) ;
+#endif
+        slot = received_src_key & (k_ethkey_table_size-1) ;
+        received = ((unsigned int *)&lnkhdr)[2] ;
+        expected = bgcol->per_eth_table[slot].expect ;
+        /* Find if it was an 'expected' packet in context of previous packets from this source */
+        received_seq = ( received >> 8 ) & 0xff ;
+        expected_seq = ( expected >> 8 ) & 0xff ;
+        seq_tot_packet = expected & 0xff ;
+        seq_next_packet = expected_seq + 1 ;
+
+        bgcol->per_eth_table[slot].expect = expected + 0x0100 ;
+
+        deposited_payload = bgcol->per_eth_table[slot].payload ;
+        next_payload = deposited_payload + COL_FRAGPAYLOAD ;
+
+        TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)",
+             slot,
+             received_seq, expected_seq,
+             received, expected
+             ) ;
+
+          if( ( received == expected ) && (seq_next_packet < seq_tot_packet) )
+          {
+            bgcol->per_eth_table[slot].payload = next_payload ;
+            for(unload_index=1;unload_index<unload_count;unload_index+=1)
+            {
+               /*  This is the busiest loop. Keep it simple .... */
+               /*  save the payload to store, and load up the next one */
+              received_src_key=bgcol_payload_storeload2(
+                  (void*)mioaddr + _BGP_TRx_DR,
+                  lnkhdrd,
+                  lnkhdrd+1,
+                  deposited_payload,
+                  (void*)(mioaddr + _BGP_TRx_HR)) ;
+#if defined(KEEP_LNKHDR_TRAIL)
+              record_lnkhdr_trail(&lnkhdr) ;
+#endif
+              slot = received_src_key & (k_ethkey_table_size-1) ;
+              received = ((unsigned int *)&lnkhdr)[2] ;
+              expected = bgcol->per_eth_table[slot].expect ;
+              /* Find if it was an 'expected' packet in context of previous packets from this source */
+              expected_seq = ( expected >> 8 ) & 0xff ;
+              seq_tot_packet = expected & 0xff ;
+              deposited_payload = bgcol->per_eth_table[slot].payload ;
+
+              seq_next_packet = expected_seq + 1 ;
+
+
+              next_payload = deposited_payload + COL_FRAGPAYLOAD ;
+
+              TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)",
+                   slot,
+                   received_seq, expected_seq,
+                   received, expected
+                   ) ;
+              if( received != expected ) break ;
+              bgcol->per_eth_table[slot].payload = next_payload ;
+              bgcol->per_eth_table[slot].expect = expected + 0x0100 ;
+              if( seq_next_packet >= seq_tot_packet ) break ;
+            }
+            total_unload_count += unload_index ;
+          }
+        else
+          {
+            total_unload_count += 1 ;
+          }
+
+        TRACE("slot=%08x seq(%x,%x) re(%08x,%08x)",
+             slot,
+             received_seq, expected_seq,
+             received, expected
+             ) ;
+
+/* We have registers loaded, and we have exited the busy loop for one of a number of reasons
+ * 1) This is the last packet of a frame
+ * 2) We have unloaded everything that the status word said was in the FIFO
+ * 3) This packet doesn't continue the previous frame from this source properly
+ *   a) This is the first packet of a frame, and there was no frame in progress
+ *   b) The sender has sent packets in a sequence that we do not understand
+ *
+ * Diagnose which, and handle appropriately
+ */
+        end_frame_hint = 0 ;
+        if( received == expected && ((unsigned int)deposited_payload) != 0xffffffff )
+          {
+             /*  Things are going well, put the payload down into memory, and work out what to do with it */
+            TRACE("Putting payload down at %p", deposited_payload);
+
+            bgcol_payload_store(deposited_payload) ;
+            if( seq_next_packet >= seq_tot_packet)
+              {
+                 /*  Frame is complete. Deliver it up a layer */
+                struct sk_buff *skb = bgcol->skb_rcv_table[slot] ;
+                if( seq_next_packet > seq_tot_packet)
+                	{
+                		TRACEN(k_t_request,"(!!!) seq_next_packet=%d seq_tot_packet=%d",
+                				seq_next_packet,	seq_tot_packet) ;
+                	}
+/*                 BUG_ON(seq_next_packet > seq_tot_packet) ; // we think we checked this as we went along; firewall report here */
+                TRACEN(k_t_general,"Frame is complete");
+    #if defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+                skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ;
+                TRACEN(k_t_general,"scheduling proto tasklet");
+                tasklet_schedule(&bgcol_receive_proto_tasklet);
+    #else
+                bgcol_deliver_directly(bgcol,&lnkhdr, skb) ;
+    #endif
+                 /*  and tag the slot as vacant */
+                bgcol_vacate_slot(bgcol,slot) ;
+                 /*  'break' here should cause the interrupt handler to return */
+                 /*  this CPU can then deliver the frame and the next CPU can take up */
+                 /*  draining the bgcol */
+#if defined(COLLECTIVE_BREAK_ON_FRAME)
+                break ;
+#endif
+                end_frame_hint = 1 ;
+              }
+            }
+        else
+          {
+            if( received == expected )
+        	    {
+        		     /*  Packet looked good, but destination address was 0xffffffff. Diagnose it ... */
+        		TRACEN(k_t_protocol,"Unexpected dest address 0xffffffff, received=0x%08x", received) ;
+                        TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+        	    }
+             /*  The packet wasn't in sequence with previous packets from the source. Look to see if we can handle it */
+            if( 0 == lnkhdr.this_pkt )
+              {
+                if ( lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN <= bgcol->mtu)
+                  {
+                    if( 1 == lnkhdr.total_pkt )
+                      {
+                        struct sk_buff *skb = bgcol->skb_mini ;
+                         /*  We have a single-packet frame. Use 'skb_mini' and send it on */
+                        if( skb )
+                          {
+                            skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN);
+                            payloadptr = skb_put(skb, COL_FRAGPAYLOAD);
+                            TRACE("Putting payload in mini slot at %p", payloadptr);
+                            bgcol_payload_store(payloadptr) ;
+                #if defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+                            kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ;
+                            *kept_lnkhdrp = lnkhdr ;
+                            skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ;
+                            TRACE("scheduling proto tasklet");
+                            tasklet_schedule(&bgcol_receive_proto_tasklet);
+                #else
+                            bgcol_deliver_directly(bgcol,&lnkhdr, skb) ;
+                #endif
+                          }
+/*                         bgcol->skb_mini = alloc_skb(COL_FRAGPAYLOAD + COL_SKB_ALIGN , GFP_KERNEL | GFP_ATOMIC ) ; */
+                        bgcol->skb_mini = take_skb_from_list_for_filling(bgcol) ;
+                        end_frame_hint = 1 ;
+                         /*  If there was a partial frame in the underneath skbuff, it can be left for */
+                         /*  completion later. This doesn't seem likely; but the receive logic will work for it. */
+                      }
+                    else
+                      {
+                             /*  Put the payload down at the beginning of the skb we had up our sleeve */
+                            struct sk_buff *skb = bgcol->skb_in_waiting ;
+                            if( skb && (skb_tailroom(skb) >= lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN ) )
+                              {
+                                struct bglink_hdr_col *kept_lnkhdrp ;
+                                int size = lnkhdr.total_pkt * COL_FRAGPAYLOAD ;
+                                skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN);
+                                payloadptr = skb_put(skb, size);
+                                kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ;
+                                *kept_lnkhdrp = lnkhdr ;
+                                TRACE("Putting payload in waiting slot at %p", payloadptr);
+                                bgcol_payload_store(payloadptr) ;
+                              }
+                            else
+                              {
+                                if( skb ) dev_kfree_skb(skb) ;  /*  Maybe someone upped the MTU on us */
+                                skb = NULL ;
+                              }
+/*                             bgcol->skb_in_waiting = alloc_skb( */
+/*                         		    k_use_plentiful_skb ? k_plentiful_skb_size :  bgcol->mtu */
+//                        		    , GFP_KERNEL | GFP_ATOMIC);  /*  And grab a new one */
+                            bgcol->skb_in_waiting = take_skb_from_list_for_filling(bgcol) ;
+                            if( skb )
+                              {
+                             /*  If there's a part-arrived frame, trample it */
+                            if( bgcol->skb_rcv_table[slot] )
+                              {
+                                TRACEN(k_t_protocol,"Dropping previous partial frame");
+                                TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+                                TRACEN(k_t_protocol,"expected slot=%d re=(%08x,%08x)", slot, received, expected);
+                                show_lnkhdr_trail("partial frame") ;
+                                {
+                                  struct bgnet_dev *bgnet = bgcol->bgnet ;
+                                  bgnet->stats.rx_errors += 1;
+                                  bgnet->stats.rx_missed_errors += 1;
+                                }
+
+                                dev_kfree_skb(bgcol->skb_rcv_table[slot]) ;
+                              }
+
+
+
+                              /*  Set things up for the fast loop */
+                             bgcol->skb_rcv_table[slot]=skb ;
+                             bgcol->per_eth_table[slot].payload = payloadptr+COL_FRAGPAYLOAD ;
+                             bgcol->per_eth_table[slot].expect = (lnkhdr.conn_id << 16) | (1 << 8) | (lnkhdr.total_pkt) ;
+                             TRACE("Saved first packet of new frame, next bgcol->per_eth_table[%d]={%p,%08x}", slot, bgcol->per_eth_table[slot].payload,bgcol->per_eth_table[slot].expect);
+
+                           }
+
+                    else
+                      {
+                        TRACEN(k_t_protocol,"No skbuff memory available, dropping packet");
+                        TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+                        bgcol->recv_no_skbuff += 1 ;
+                        bgcol->bgnet->stats.rx_dropped += 1;
+                        bgcol->bgnet->stats.rx_errors += 1;
+                      }
+                  }
+                  }
+                else
+                  {
+                    bgcol_payload_store(scratch_payload) ;
+                    TRACEN(k_t_protocol,"Frame larger than MTU, dropping");
+                    show_lnkhdr_trail("Frame larger than MTU") ;
+                    show_payload(scratch_payload,mioaddr) ;
+                    bgcol->bgnet->stats.rx_errors += 1;
+                    bgcol->bgnet->stats.rx_over_errors += 1;
+                  }
+              }
+
+            else
+              {
+                 /*  Unexpected mid-frame packet */
+                bgcol_payload_store(scratch_payload) ;
+                TRACEN(k_t_protocol,"Unexpected packet from middle of frame, dropping");
+                show_lnkhdr_trail("Unexpected packet from middle of frame") ;
+                show_payload(scratch_payload,mioaddr) ;
+                bgcol->bgnet->stats.rx_errors += 1;
+                bgcol->bgnet->stats.rx_fifo_errors += 1;
+              }
+              }
+
+
+         /*  We have handled the reason why the 'fast loop' dropped out. Refresh the status */
+#if !defined(COLLECTIVE_ONEPASS_TXRX)
+           /*  and redrive the 'fast loop' if there is anything in the fifo. */
+/*           status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)); */
+/*           unload_count = status.x.rcv_hdr ; */
+          unload_count = bgcol_status_rcv_hdr(*(unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+/*           bgcol->recv_fifo_histogram3[unload_count & 0x0f ] += 1; */
+#endif
+      }
+/*     bgcol->recv_total += total_unload_count ; */
+/*  Return the number of packets we unloaded, and set the high bit if we have */
+/*  reason to think there's nothing coming in any time soon */
+    return total_unload_count
+         | ( ( end_frame_hint && (unload_count == total_unload_count ) )
+              ? 0x80000000 : 0
+           ) ;
+
+}
+
+
+
+/*  Attempting to free skbuffs in an interrupt handler doesn't work well, some 'destructor' callbacks */
+/*  protest if they are driven at interrupt level. So we queue them to be freed later. */
+#ifndef COLLECTIVE_TRANSMIT_WITH_SLIH
+static void bgcol_completed_buffer_handler(unsigned long dummy)
+  {
+    struct bg_col* bgcol=__bgcol ;
+    TRACE("(>)[%s:%d]",__func__, __LINE__) ;
+     /*  Free any skbufs the transmit interrupt handler has finished with */
+      {
+        struct sk_buff *freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+        while (freeskb)
+          {
+            TRACEN(k_t_irqflow,"Freeing skb=%p", freeskb) ;
+            dump_skb_partial(freeskb,64) ;
+            dev_kfree_skb(freeskb) ;
+            freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+          }
+      }
+    TRACE("(<)[%s:%d]",__func__, __LINE__) ;
+  }
+static DECLARE_TASKLET(bgcol_completed_buffer_tasklet,bgcol_completed_buffer_handler,0) ;
+#endif
+
+/* static char local_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16))) ; */
+static void bgcol_xmit_next_skb(struct bg_col* bgcol)
+  {
+	  if(! skb_queue_empty(&(bgcol->skb_list_xmit)))
+		  {
+			    struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_xmit) ) ;
+			    struct bgnet_dev *bgnet = bgcol->bgnet ;
+			    unsigned int i_am_compute_node = (bgnet->bgcol_vector ^ bgnet->eth_bridge_vector) & 0x00ffffff ;
+			    TRACE("bgcol_xmit_next_skb bgcol_vector=0x%08x eth_bridge_vector=0x%08x i_am_compute_node=%08x",
+				bgnet->bgcol_vector,bgnet->eth_bridge_vector,i_am_compute_node
+				) ;
+			    bgcol->skb_current_xmit=skb ;
+			    if( skb )
+			      {
+				unsigned long offset;
+				union bgcol_header dest ;
+				struct ethhdr *eth = (struct ethhdr *)skb->data;
+				 /*  Work out what bgcol header to use for the new skb */
+
+				TRACEN(k_t_irqflow,"%s: skb=%p, eth=%p, bgnet=%p, len=%d", __FUNCTION__, skb, eth, bgnet, skb->len);
+				dump_skb_partial(skb, 64) ;
+				dest.raw = 0 ;
+				dest.p2p.pclass = bgnet->bgcol_route;
+
+				if (is_broadcast_ether_addr(eth->h_dest)) {
+					     /*  May have to go to the IO node for broadcasting */
+					    if(0 == i_am_compute_node)
+					      {
+						TRACE("broadcasting from IO node") ;
+						dest.bcast.tag = 0;
+						bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+					      }
+					    else
+					      {
+						TRACE("sending to IO node for broadcast") ;
+						dest.p2p.vector = bgnet->eth_bridge_vector;
+						dest.p2p.p2p = 1;
+						bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol;
+					      }
+				} else {
+				      TRACE("bgcol_xmit_next_skb bgnet->bgcol_vector=%08x bgnet->eth_bridge_vector=%08x",bgnet->bgcol_vector,bgnet->eth_bridge_vector) ;
+				      if (bgnet->eth_mask == 0 ||
+					  ((bgnet->eth_mask & *(unsigned int *)(&eth->h_dest[0])) ==
+					   (bgnet->eth_local))) {
+					     if(0 == i_am_compute_node)
+					       {
+						 TRACE("sending to compute node") ;
+						 dest.p2p.vector = *(unsigned int *)(&eth->h_dest[2]);
+						 bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+					       }
+					     else
+					       {
+						 dest.p2p.vector = bgnet->eth_bridge_vector;
+						 if(( bgnet->eth_bridge_vector ^ (*(unsigned int *)(&eth->h_dest[2]))) & 0x00ffffff)
+						   {
+						     TRACE("sending to IO node for reflection") ;
+						     bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol;
+						   }
+						 else
+						   {
+						     TRACE("sending to IO node as final destination") ;
+						     bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+						   }
+					       }
+				      } else {
+					  TRACE("sending to IO node for onward transmission") ;
+					  dest.p2p.vector = bgnet->eth_bridge_vector;
+					  bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+				      }
+				    dest.p2p.p2p = 1;
+				}
+
+				/* initialize link layer */
+				bgcol->lnkhdr_xmit.dst_key = eth_to_key(eth->h_dest);
+				bgcol->lnkhdr_xmit.src_key = bgnet->bgcol_vector;
+
+				/* pad out head of packet so it starts at a 16 Byte boundary */
+				offset = ((unsigned long)skb->data) & 0xf;
+				bgcol->lnkhdr_xmit.opt.opt_net.pad_head = offset;
+				bgcol->lnkhdr_xmit.opt.opt_net.pad_tail = (COL_FRAGPAYLOAD - ((skb->len + offset) % COL_FRAGPAYLOAD)) % COL_FRAGPAYLOAD;
+				bgcol->current_xmit_data=skb->data - offset ;
+				bgcol->current_xmit_len=skb->len + offset ;
+				 /*  prepare link header */
+				bgcol->lnkhdr_xmit.conn_id = bgcol->curr_conn++;
+				bgcol->lnkhdr_xmit.total_pkt = ((skb->len + offset - 1) / COL_FRAGPAYLOAD) + 1;
+				bgcol->lnkhdr_xmit.this_pkt = 0;
+				TRACE("%s: dst_key=%08x src_key=%08x lnk_proto=%d conn_id=%d total_pkt=%d pad_head=%d pad_tail=%d", __FUNCTION__,
+				    bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, bgcol->lnkhdr_xmit.lnk_proto, bgcol->lnkhdr_xmit.conn_id, bgcol->lnkhdr_xmit.total_pkt, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail );
+				bgcol->fragidx_xmit = 0 ;
+				bgcol->dest_xmit = dest ;
+				    TRACEN(k_t_lowvol,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d",
+					  bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail);
+			      }
+		  }
+  }
+
+/*  Push packets in until we finish the skb or the fifo fills */
+/*  Returns 2 if we would like to push something into the fifo but cannot because it is full */
+/*  Returns 1 if we pushed something into the fifo */
+static inline int bgcol_xmit_push_packets(struct bg_col* bgcol,
+/*     struct bgcol_channel *chn, */
+    unsigned int status_in, unsigned int mioaddr)
+  {
+    unsigned int fragidx ;
+    struct bgnet_dev *bgnet = bgcol->bgnet ;
+    union bgcol_status status;
+    union bgcol_header dest ;
+    struct sk_buff *skb = bgcol->skb_current_xmit ;
+    void *payloadptr = bgcol->current_xmit_data ;
+    int len = bgcol->current_xmit_len ;
+    int fullness ;
+    int initial_fragidx ;
+    double *lnkhdrxd = (double *) &(bgcol->lnkhdr_xmit) ;
+
+    dest = bgcol->dest_xmit ;
+    fragidx = bgcol->fragidx_xmit ;
+    TRACE("bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d",
+          bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx);
+    dump_skb_partial(skb,64) ;
+    if( 0 != ( ((unsigned)(payloadptr) ) & 0x0f ) )
+	    {
+		    TRACEN(k_t_request, "Misaligned payloadptr=%p", payloadptr) ;
+	    }
+/*     BUG_ON(0 != ( ((unsigned)(payloadptr) ) & 0x0f ) ) ; */
+    if( 0 == ( ((unsigned)payloadptr) & 0x0f ) )
+      {
+       /*  Have we got space in the FIFO ? */
+      status.raw = status_in ;
+      fullness = status.x.inj_hdr ;
+/*       bgcol->send_fifo_histogram[fullness] += 1 ; // fullness statistics */
+      TRACE("bgnet xmit: status=%08x",status.raw);
+      if (fullness >= COL_FIFO_SIZE )
+      {
+         /*  No room. Upper routines will retry when appropriate */
+        TRACEN(k_t_irqflow,"Send FIFO full");
+        TRACEN(k_t_irqflow,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d",
+              bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx);
+        return 2 ;
+      }
+       /*  update fragment index */
+      bgcol->lnkhdr_xmit.this_pkt = fragidx;
+      initial_fragidx = fragidx ;
+#if defined(COLLECTIVE_ONEPASS_TXRX)
+      if( len >= COL_FRAGPAYLOAD )
+#else
+      while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE)
+#endif
+        {
+          bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+          dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ;
+          fragidx += 1 ;
+          bgcol->lnkhdr_xmit.this_pkt = fragidx;
+          *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) =  dest.raw;
+          len -= COL_FRAGPAYLOAD;
+          payloadptr += COL_FRAGPAYLOAD;
+          fullness += 1;
+          while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE)
+              {
+                 /*  We have full packets, and space in the fifo for them */
+                TRACE("bgcol: ptr=%p, len=%d", payloadptr, len);
+                bgcol_payload_inject_storeload2((void*)(mioaddr + _BGP_TRx_DI),lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+                dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ;
+                fragidx += 1 ;
+                bgcol->lnkhdr_xmit.this_pkt = fragidx;
+                 /*  write destination header */
+                *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) =  dest.raw ;
+                len -= COL_FRAGPAYLOAD;
+                payloadptr += COL_FRAGPAYLOAD;
+                fullness += 1;
+             }
+          bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ;
+#if !defined(COLLECTIVE_ONEPASS_TXRX)
+          status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+          fullness = status.x.inj_hdr ;
+#endif
+        }
+      bgnet->stats.tx_bytes += COL_FRAGPAYLOAD*(fragidx-initial_fragidx) ;
+
+       /*  Either the FIFO is full, or we are near (or at) the end of the skb-worth of data */
+       /*  Stuff one packet in. */
+
+
+      if( len > 0 && fullness < COL_FIFO_SIZE )
+          {
+               /*  If the last packet doesn't cross a page boundary, we can send it with */
+               /*  whatever is in memory after it, and we won't get a SEGV. */
+              TRACE("bgcol: ptr=%p, len=%d", payloadptr, len);
+              bgnet->stats.tx_bytes += len;
+
+                   /*  write destination header */
+/*               enable_kernel_fp() ; */
+                  *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) =  dest.raw;
+/*                   bgcol_payload_inject_load2partial(lnkhdrxd,lnkhdrxd+1, payloadptr,(len+15)/16) ; */
+                      bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+                      bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ;
+
+              len=0 ;
+          }
+
+      }
+    else
+      {
+         /*  The packet was misaligned. This will cause the skb to be flushed and we will get a */
+         /*  fresh one next time. */
+        len=0 ;
+      }
+    TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+
+    TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+     /*  Did we complete the skb ? */
+    if( 0 == len )
+      {
+          /*  Yes, we can free this one and upper layers will cue the next one */
+        TRACEN(k_t_irqflow,"bgcol: finished skb=%p", skb);
+        bgnet->stats.tx_packets++;
+        dump_skb_partial(skb,64);
+/*  Linux seems unhappy freeing skb's in an interrupt handler */
+#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+#if defined(COLLECTIVE_XMITTER_FREES)
+        skb_queue_tail(&bgcol->skb_list_free,skb) ;
+#else
+        dev_kfree_skb(skb) ;
+#endif
+#else
+        skb_queue_tail(&bgcol->skb_list_free,skb) ;
+        tasklet_schedule(&bgcol_completed_buffer_tasklet) ;
+#endif
+        bgcol->skb_current_xmit=NULL ;
+      }
+    else
+      {
+         /*  No, Remember the link header for next time */
+        TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+        TRACE("bgcol: more to go for skb=%p , fragidx=%d, len=%d", skb, fragidx, skb->len);
+        bgcol->fragidx_xmit = fragidx ;
+        bgcol->current_xmit_len=len ;
+        bgcol->current_xmit_data=payloadptr ;
+      }
+    TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+    return 1 ;  /* Indicate that a redrive might be productive */
+  }
+
+
+/*  One pass at filling the transmit FIFO. */
+/*  Returns 2 if we would like to push something into the fifo but cannot because it is full */
+/*  Returns 1 if we pushed something into the fifo (and we would like a redrive because we finished a frame) */
+/*  Returns 0 if all the data has been put in the FIFO (and a redrive would be unproductive unless someone queues a frame for sending) */
+/*   An upper layer must redrive or enable interrupts if it gets a non-zero. */
+static inline int bgcol_xmit_onepass(struct bg_col *bgcol, unsigned int status_in, unsigned int mioaddr)
+  {
+/*     unsigned chnidx = bgcol->bgnet_channel ; */
+    struct sk_buff *skb = bgcol->skb_current_xmit ;
+    if( NULL == skb)
+      {
+	struct bgnet_dev *bgnet = bgcol->bgnet ;
+        if( bgnet)
+          {
+	    bgcol_xmit_next_skb(bgcol) ;
+	    skb = bgcol->skb_current_xmit ;
+	    if( NULL == skb )
+	      {
+		TRACEN(k_t_irqflow,"bgcol: no more to send");
+		return 0 ;
+	      }
+          }
+        else
+          {
+            TRACEN(k_t_irqflow,"bgcol: bgnet is not ready");
+            return 0 ;
+          }
+      }
+     /*  By this stage we should have a viable skb and a viable link header */
+    return bgcol_xmit_push_packets(bgcol,
+        status_in,
+        mioaddr) ;
+  }
+
+/*  'full duplex' SLIH, receiving and sending */
+/*  Number of times to spin before concluding there isn't anything on the bgcol */
+enum {
+  k_unproductive_receive_threshold = 10 ,
+  k_unproductive_transmit_threshold = 10
+};
+
+void bgcol_duplex_slih_eth(unsigned long dummy)
+  {
+    struct bg_col *bgcol = __bgcol ;
+    struct bgcol_channel *chn = &bgcol->chn[bgcol->bgnet_channel];
+    unsigned int mioaddr=chn->mioaddr ;
+    unsigned int status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+    unsigned int rcr ;
+    unsigned int rcx ;
+    unsigned int productive=0 ;
+    unsigned int unproductive_receive_count=0 ;
+    unsigned int unproductive_transmit_count=0 ;
+    unsigned int rcrset = 0 ;
+    unsigned int skip_fp_save = bgcol->skip_fp_save ;
+
+    enable_kernel_fp() ;
+    if (k_force_fp_save || (k_allow_fp_save && ! skip_fp_save))
+      {
+        dh_reg_save(&dh_savearea) ;
+      }
+
+#if defined(KEEP_BG_COL_STATISTICS)
+    bgcol->send_fifo_histogram0[(status >> 16) & 0x0f] += 1 ;
+    bgcol->recv_fifo_histogram0[(status      ) & 0x0f] += 1 ;
+#endif
+    for(;;)
+      {
+        TRACEN(k_t_irqflow,"status=%08x", status);
+        rcr = bgcol_receive_mark3(bgcol, bgcol->bgnet_channel, status, mioaddr) ;
+#if defined(KEEP_BG_COL_STATISTICS) && defined(EXTRA_TUNING)
+        {
+        	unsigned int extra_status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+        	    bgcol->send_fifo_histogram2[(extra_status >> 16) & 0x0f] += 1 ;
+        	    bgcol->recv_fifo_histogram2[(extra_status      ) & 0x0f] += 1 ;
+
+        }
+#endif
+        rcx = bgcol_xmit_onepass(bgcol, status, mioaddr) ;
+        TRACEN(k_t_irqflow,"rcr=0x%08x rcx=0x%08x", rcr, rcx);
+        status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+#if defined(KEEP_BG_COL_STATISTICS)
+    bgcol->send_fifo_histogram1[(status >> 16) & 0x0f] += 1 ;
+    bgcol->recv_fifo_histogram1[(status      ) & 0x0f] += 1 ;
+#endif
+         /*  What we do now depends on whether the slihs were 'productive' ... */
+        unproductive_receive_count = rcr ? 0 : (unproductive_receive_count+1) ;
+        unproductive_transmit_count = (rcx==1) ? 0 : (unproductive_transmit_count+1) ;
+        productive += ( 0 != rcr || 1 == rcx ) ;
+        rcrset = ( rcr > 0 ) ? 0 : rcrset ;
+        rcrset |= rcr ;
+        if( 0 == productive )
+          {
+#if defined(KEEP_BG_COL_STATISTICS)
+            bgcol->spurious_interrupts += 1 ;
+#endif
+            break ;  /*  a spurious interrupt */
+          }
+        if( ( unproductive_receive_count > k_unproductive_receive_threshold
+              || (rcrset & 0x80000000)
+            )
+            &&
+            ( unproductive_transmit_count > k_unproductive_transmit_threshold
+               || (rcx == 0 )
+            )
+          ) break ;  /*  Neither transmit not receive are likely to progress */
+      }
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+    if( bgcol_diagnostic_use_napi )
+	    {
+		TRACEN(k_t_napi,"napi_complete(%p)",&(bgcol->bgnet->napi)) ;
+		napi_complete(&(bgcol->bgnet->napi)) ;
+	    }
+#endif
+    bgcol->handler_running = 0 ;
+    if( 0 != rcx )
+      {
+         /*  Filled the TX FIFO, need an interrupt when it has room */
+        TRACEN(k_t_irqflow,"Enabling TX interrupts");
+        bgcol_enable_interrupts_xmit(bgcol) ;  /* Ask for an interrupt when there is space */
+      }
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+    mod_timer(&bgcol->missed_interrupt_timer, jiffies+200) ;  /*  Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */
+#endif
+    bgcol_enable_interrupts_rcv(bgcol) ;
+    if (k_force_fp_save || (k_allow_fp_save && ! skip_fp_save))
+      {
+        dh_reg_restore(&dh_savearea) ;
+      }
+  }
+
+// eth isn't up, so assume we are running CIOD wakeup protocol
+static void bgcol_wakeup_slih(unsigned long dummy)
+  {
+    struct bg_col *bgcol = __bgcol ;
+    unsigned int chn;
+
+    TRACEN(k_t_userspace,"(>)") ;
+    for (chn = 0; chn < BGP_MAX_CHANNEL; chn++)
+      {
+        if(bgcol->chn[chn].chrdev )
+          {
+            TRACEN(k_t_userspace,"Channel %d wake_up_interruptible(%p)",chn,&bgcol->chn[chn].chrdev->read_wq) ;
+            bgcol->chn[chn].chrdev->read_complete = 1;
+            wake_up_interruptible(&bgcol->chn[chn].chrdev->read_wq);
+          }
+      }
+    TRACEN(k_t_userspace,"(<)") ;
+  }
+
+void bgcol_duplex_slih(unsigned long dummy)
+  {
+    struct bg_col *bgcol = __bgcol ;
+    if( bgcol->eth_is_up)
+      {
+        bgcol_duplex_slih_eth(dummy) ;
+      }
+    else
+      {
+        bgcol_wakeup_slih(dummy) ;
+      }
+  }
+
+
+static DECLARE_TASKLET(bgcol_duplex_slih_tasklet,bgcol_duplex_slih,0);
+
+static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev)
+  {
+    struct bg_col *bgcol = (struct bg_col*)dev;
+
+    TRACE("bgnet: (>)interrupt %d", irq);
+    bgcol->handler_running = 1 ;
+    bgcol_disable_interrupts_xmit(bgcol) ;
+    bgcol_disable_interrupts_rcv(bgcol) ;
+    (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF);
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+    if( bgcol_diagnostic_use_napi)
+	    {
+		    TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ;
+		    napi_schedule(&bgcol->bgnet->napi) ;
+	    }
+    else
+	    {
+		    tasklet_schedule(&bgcol_duplex_slih_tasklet);
+
+	    }
+#else
+    tasklet_schedule(&bgcol_duplex_slih_tasklet);
+#endif
+    TRACE("bgnet: (<)interrupt %d", irq);
+    return IRQ_HANDLED ;
+  }
+
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+static void bgcol_missed_interrupt(unsigned long dummy)
+{
+	    struct bg_col *bgcol = (struct bg_col*)&static_col;
+	    TRACEN(k_t_irqflow,"(>)") ;
+
+	    bgcol->handler_running = 1 ;
+	    bgcol_disable_interrupts_xmit(bgcol) ;
+	    bgcol_disable_interrupts_rcv(bgcol) ;
+	    (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF);
+	#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+	    if( bgcol_diagnostic_use_napi)
+		    {
+			    TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ;
+			    napi_schedule(&bgcol->bgnet->napi) ;
+		    }
+	    else
+		    {
+			    tasklet_schedule(&bgcol_duplex_slih_tasklet);
+
+		    }
+	#else
+	    tasklet_schedule(&bgcol_duplex_slih_tasklet);
+	#endif
+	mod_timer(&bgcol->missed_interrupt_timer, jiffies+10) ;  /*  Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */
+	TRACEN(k_t_irqflow,"(<)") ;
+}
+#endif
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+  struct bg_col *bgcol=__bgcol ;
+  TRACEN(k_t_irqflow|k_t_startxmit,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len);
+#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+  skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+#if defined(COLLECTIVE_TRANSMIT_WITH_FLIH)
+  if( ! bgcol->handler_running)
+	  {
+		  TRACEN(k_t_irqflow,"Enabling TX interrupts");
+		  bgcol_enable_interrupts_xmit(bgcol) ;  /* Ask for an interrupt when there is space */
+	  }
+#else
+      tasklet_schedule(&bgcol_duplex_slih_tasklet);
+#endif
+#else
+    {
+      unsigned int flags ;
+
+      dump_skb_partial(skb,64) ;
+      spin_lock_irqsave(&bgcol->irq_lock_xmit, flags) ;
+        {
+          struct sk_buff *xskb = bgcol->skb_current_xmit ;
+          if( NULL == xskb && skb_queue_empty(&(bgcol->skb_list_xmit)))
+            {
+              int rc ;
+              TRACEN(k_t_irqflow,"%s: Enq+en skb=%p, len=%d", __FUNCTION__, skb, skb->len);
+              skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+              enable_kernel_fp();
+              rc = bgcol_xmit_handle(bgcol) ;
+             if( 0 == rc )
+                {
+                   /*  No room in fifo */
+                  TRACEN(k_t_irqflow,"Enabling TX interrupts");
+                  bgcol_enable_interrupts_xmit(bgcol) ;  /* Ask for an interrupt when there is space */
+                }
+            }
+          else
+            {
+              TRACEN(k_t_irqflow,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len);
+              skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+            }
+        }
+      spin_unlock_irqrestore(&bgcol->irq_lock_xmit, flags);
+    }
+#endif
+/*     } */
+#if defined(COLLECTIVE_XMITTER_FREES)
+    {
+	    struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+	    while(skb)
+		    {
+			    TRACEN(k_t_irqflow,"Freeing sent skb=%p",skb);
+			    dev_kfree_skb(skb) ;
+			    skb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+		    }
+
+    }
+#endif
+  return 0 ;
+}
+
+
+static int bgpnet_add_device(int major,
+                              int minor,
+                             const char* devname,
+                             unsigned long long physaddr,
+                             int irq,
+                             irqreturn_t (*irq_handler)(int, void *))
+{
+  int ret;
+  dev_t devno;
+  struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices];
+
+  TRACEN(k_t_init,"devname=%s major=%d minor=%d",devname,major,minor) ;
+ /* initilize struct */
+  init_MUTEX (&dev->sem);
+  dev->major  = major;
+  dev->minor  = minor;
+  dev->physaddr = physaddr;
+  init_waitqueue_head(&dev->read_wq);
+  dev->read_complete = 0;
+  if (physaddr) {
+          dev->regs = ioremap(physaddr, 4096);
+  }
+  devno=MKDEV(major,minor);
+
+ /* register i.e., /proc/devices */
+  ret=register_chrdev_region(devno,1,(char *)devname);
+
+  if (ret)
+    {
+      printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n",
+              major,minor,ret);
+      return ret;
+    }
+
+ /* add cdev */
+  cdev_init(&dev->cdev,&bgpnet_device_fops);
+  dev->cdev.owner=THIS_MODULE;
+  dev->cdev.ops=&bgpnet_device_fops;
+  ret=cdev_add(&dev->cdev,devno,1);
+  if (ret)
+    {
+      printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n",
+             major,minor,ret);
+      return ret;
+    }
+
+ /* signal to pass to owning process, should be altered using ioctl */
+  dev->signum=-1;
+
+  bgpnet_num_devices++;
+
+  return 0;
+}
+
+static int bgpnet_device_open (struct inode *inode, struct file *filp)
+{
+  struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev);
+
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+  up(&dev->sem);
+
+  dev->current=current;
+  filp->private_data = (void*) dev;
+
+  TRACEN(k_t_userspace,"bgpnet: device (%d,%d) opened by process \"%s\" pid %i",
+        MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid);
+
+  return 0;
+}
+
+
+/*
+ * Read doesn't actually read anything.   It simply blocks if the fifo is empty.
+ */
+static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count,
+				 loff_t *f_pos)
+{
+    struct bgpnet_dev* dev = (struct bgpnet_dev *)filp->private_data;
+    union bgcol_status status;
+    int chn = dev->minor;
+
+    if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) {
+        status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx));
+        if (!status.x.rcv_hdr) {
+            TRACEN(k_t_userspace,"bgpnet: read found status not ready status=0x%08x", status.raw);
+               /* enable interrupt when packets come in. */
+/*                 bgcol_enable_rcv_wm_interrupt(&__bgcol->chn[chn]); */
+                bgcol_enable_interrupts_rcv(__bgcol) ;
+                TRACEN(k_t_userspace,"wait_event_interruptible(wait_queue_head=%p)", &dev->read_wq);
+                wait_event_interruptible(dev->read_wq, dev->read_complete);
+                dev->read_complete = 0;
+                TRACEN(k_t_userspace,"bgpnet: read wakes up");
+        }
+       /* Ok if we give a false positive -- we tried.
+         * Note that we never actually copy out some data.  The status might be a useful
+         * thing to write in the buffer, but the caller only cares to block until
+         * something is there. */
+
+    }
+
+    return 0;
+}
+
+
+/* Don't think this will work on the 'bgnet' channel. What is the intent ? CIOD ? */
+/* If for CIOD, it may have suffered in the 'revised interrupt handler' integrataion */
+/*
+ * Note that poll only waits for data to be available in the read fifo.
+ * We do this by enabling an interrupt while we wait.  The interrupt is disabled
+ * when it fires.  The poll may complete before it fires (timeout), but that is ok.
+ */
+static unsigned int bgpnet_device_poll(struct file *filp, poll_table * wait)
+{
+    struct bgpnet_dev* dev = (struct bgpnet_dev*) filp->private_data;
+    unsigned int rc;
+    union bgcol_status status;
+    unsigned int chn = dev->minor;
+
+    if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) {
+        TRACEN(k_t_userspace,"poll_wait(&wait_queue_head = %p)", &dev->read_wq);
+        bgcol_enable_interrupts_rcv(__bgcol) ;
+        poll_wait(filp, &dev->read_wq, wait);
+
+       /* Return current col status. */
+       rc = POLLOUT|POLLWRNORM; /* For now implement read poll only */
+        status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx));
+        if (status.x.rcv_hdr) {
+            TRACEN(k_t_userspace,"bgpnet: poll found status ready status=0x%08x", status.raw);
+               /* got something already */
+                rc |= POLLIN|POLLRDNORM;
+        } else {
+            TRACEN(k_t_userspace,"bgpnet: poll found status not ready status=0x%08x", status.raw);
+               /* enable interrupt when packets come in. */
+//                mtdcrx(_BGP_DCR_TR_REC_PRXEN, (chn ? _TR_REC_PRX_WM1 : _TR_REC_PRX_WM0));
+                bgcol_enable_interrupts_rcv(__bgcol) ;
+        }
+    } else
+	rc = POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM;
+
+    return rc;
+}
+
+
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+  unsigned long vsize = vma->vm_end - vma->vm_start;
+  struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data;
+  int ret = -1;
+
+ /* ------------------------------------------------------- */
+ /* set up page protection.                                 */
+ /* ------------------------------------------------------- */
+
+  vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+  vma->vm_flags     |= VM_IO;
+  vma->vm_flags     |= VM_RESERVED;
+
+ /* ------------------------------------------------------- */
+ /*                  do the mapping                         */
+ /* ------------------------------------------------------- */
+
+  if (device->physaddr != 0)
+    ret = remap_pfn_range(vma,
+                          vma->vm_start,
+                          device->physaddr >> PAGE_SHIFT,
+                          vsize,
+                          vma->vm_page_prot);
+
+  if (ret) {
+      printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n",
+                   device->major, device->minor);
+  } else {
+      TRACEN(k_t_userspace,"bgpnet: mapped (%d,%d) to vm=%lx",
+             device->major, device->minor, vma->vm_start);
+  }
+  return ret? -EAGAIN :0;
+}
+
+/* ************************************************************************* */
+/*                  BG/P network: release device                             */
+/* ************************************************************************* */
+
+ static int bgpnet_device_release (struct inode *inode, struct file * filp)
+{
+  struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data;
+
+ /* Ensure exclusive access*/
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+  dev->current = NULL;
+  up(&dev->sem);
+
+  TRACEN(k_t_userspace,"bgpnet: device (%d,%d) successfully released",
+         MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
+  return 0;
+}
+
+
+static long bgpnet_device_ioctl (struct inode *inode,
+//                                struct file * filp,
+                                unsigned int cmd,
+                                unsigned long arg)
+{
+    TRACEN(k_t_userspace,"cmd=0x%08x arg=0x%08x",cmd,(unsigned int)arg) ;
+    return 0;
+}
+
+static int bgpnet_SerDes_read (char *page, char **start, off_t offset,
+                             int count, int *eof, void *data)
+{
+        int rc;
+
+        TRACEN(k_t_userspace,"offset=0x%08x count=0x%08x",(unsigned int)offset,count) ;
+        if (offset > 0)
+                rc = 0;
+        else {
+                local_irq_disable();
+                rc = snprintf(page, count, "%d", bgcnsd.services->trainSerDes(0));
+                local_irq_enable();
+        }
+
+        *eof = 1;
+
+        return (rc >= 0 ? rc : 0);
+}
+
+
+
+static int bgpnet_SerDes_write(struct file * filp, const char __user *buf,
+                                  unsigned long len, void * data)
+{
+        int rc;
+
+        TRACEN(k_t_userspace,"len=0x%08x data=%p",(unsigned int)len,data) ;
+        local_irq_disable();
+        rc = bgcnsd.services->trainSerDes(1);
+        local_irq_enable();
+
+        return (rc < 0 ? -EIO : len);
+}
+
+
+
+/*  Base 10 is assumed.  Hexadecimal numbers must begin with 0x or 0X (ie. 0xabadcafe). */
+/*  Binary numbers must begin with the letter b in lowercase (ie. b01101001). */
+#define LOWER(c) ((c) < 'a' ? (c) + ('a' - 'A') : (c))
+static inline unsigned long atol(char *str)
+{
+        unsigned long value = 0;
+        unsigned char base = 10;
+
+        if ((*str == '0') && (LOWER(*(str+1)) == 'x')) {
+                base = 16;  /*  hexadecimal */
+                str += 2;
+        } else if (*str == 'b') {
+                base = 2;  /*  binary */
+                str++;
+        }
+
+        for (; *str; str++) {
+                unsigned char digit = (*str > '9') ? (10 + LOWER(*str) - 'a') : (*str - '0');
+                if (digit >= base) {
+                        value = 0;
+                        break;
+                }
+                value = value * base + digit;
+        }
+
+        return value;
+}
+
+static int bgpnet_barrier_write(struct file* filp,
+                                const char __user* buffer,
+                                unsigned long len,
+                                void* data)
+{
+        int rc = len;
+        unsigned int timeout;
+        char valStr[128];
+        int strLen = sizeof(valStr)-1;
+
+        TRACEN(k_t_userspace,"(>) len=0x%08x data=%p",(unsigned int)len,data) ;
+        if (strLen > len)
+                strLen = len;
+        if (copy_from_user(valStr, buffer, strLen))
+                rc = -EFAULT;
+        else if (len) {
+                // NULL terminate the string of digits and convert to its numeric value.
+                if (valStr[strLen-1] == '\n')
+                        strLen--;
+                valStr[strLen] = '\0';
+                timeout = atol(valStr);
+
+                // Zero means disable barrier.  Non-zero specifies timeout for barrier and initializes barrier.
+                local_irq_disable();
+                if (timeout) {
+                        int ret;
+
+                        barrierEntry->data = (void*) timeout;
+
+                        ret = bgcnsd.services->globalBarrier_nonBlocking(1, 1, timeout);
+                        if (ret != BGCNS_RC_CONTINUE && ret != BGCNS_RC_COMPLETE)
+                                rc = -EIO;
+                } else {
+                        if (bgcnsd.services->disableBarrier(1))
+                                rc = -EIO;
+                }
+                local_irq_enable();
+        }
+
+        TRACEN(k_t_userspace,"(<) rc=%d",rc) ;
+        return rc;
+}
+
+
+static int bgpnet_barrier_read (char *page, char **start, off_t offset,
+                             int count, int *eof, void *data)
+{
+        int rc;
+
+        TRACEN(k_t_userspace,"(>) offset=0x%08x count=0x%08x",(unsigned int)offset,count) ;
+        if (offset > 0)
+                rc = 0;
+        else {
+                local_irq_disable();
+                rc = snprintf(page, count, "%d", bgcnsd.services->globalBarrier_nonBlocking(1, 0, (unsigned int) barrierEntry->data));
+                local_irq_enable();
+        }
+
+        *eof = 1;
+
+        TRACEN(k_t_userspace,"(<) rc=%d",rc) ;
+        return (rc >= 0 ? rc : 0);
+}
+
+
+
+
+
+/**********************************************************************
+ * Initialization and shut-down
+ **********************************************************************/
+
+static inline void bgcol_reset_channel(struct bgcol_channel *chn)
+{
+    mtdcrx(chn->dcrbase + _BGP_DCR_TR_RCTRL, _TR_RCTRL_RST);
+    mtdcrx(chn->dcrbase + _BGP_DCR_TR_SCTRL, _TR_RCTRL_RST);
+}
+
+
+static int bgcol_init_channel(unsigned long idx, struct bg_col *col)
+{
+    struct bgcol_channel* chn = &col->chn[idx];
+    int i;
+
+    chn->paddr = COL_CHANNEL_PADDR(idx);
+    chn->dcrbase = col->dcrbase + COL_CHANNEL_DCROFF(idx);
+    chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(idx);
+    chn->irq_inj_pending_mask = COL_IRQ_INJ_PENDING_MASK(idx);
+    init_timer(&chn->inj_timer);
+    chn->inj_timer.function = inj_timeout;
+    chn->inj_timer.data = (unsigned long) col;
+    chn->inj_timer.expires = 0;
+    for (i = 0; i < BGP_MAX_DEVICES; i++)
+	if (bgpnet_devices[i].major == BGP_COL_MAJOR_NUM &&
+	    bgpnet_devices[i].minor == idx) {
+		chn->chrdev = &bgpnet_devices[i];
+		break;
+	}
+    if (i >= BGP_MAX_DEVICES)
+	chn->chrdev = NULL;
+    chn->col = col;
+    chn->idx = idx;
+
+    if (!request_mem_region(chn->paddr, _BGP_COL_SIZE, COL_DEV_NAME))
+	return -1;
+
+    chn->mioaddr = (unsigned long)ioremap(chn->paddr, _BGP_COL_SIZE);
+    if (!chn->mioaddr)
+	goto err_remap;
+
+    if (chn)
+	mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG1,
+                 _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4));
+    else
+	mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG0,
+                _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4));
+
+/*     mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, COL_IRQMASK_REC); */
+/*     mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_INJ_PIXEN, COL_IRQMASK_INJ); */
+
+// Don't want to enable any interrupts at this stage
+    mtdcrx( chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 );
+    mtdcrx( chn->col->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE );
+
+
+    return 0;
+
+ err_remap:
+    printk("error mapping col\n");
+    release_mem_region(chn->mioaddr, _BGP_COL_SIZE);
+
+    return -1;
+}
+
+static int bgcol_uninit_channel(struct bgcol_channel *chn,
+				 struct bg_col *col)
+{
+    if (chn->mioaddr)
+    {
+	iounmap((void*)chn->mioaddr);
+	chn->mioaddr = 0;
+
+	 /*  unconditionally... */
+	release_mem_region(chn->paddr, _BGP_COL_SIZE);
+    }
+    return 0;
+}
+
+static int bgcol_init (struct bg_col *col)
+{
+    int cidx, rc, idx;
+
+/*     skb_queue_head_init(&skb_delivery_queue) ; */
+    if( 0 == col->mtu)
+      {
+        bgcol_set_mtu(col,60960+sizeof(struct ethhdr) ) ;  /*  It's possible that the 'bgnet' might have won a race to set MTU ... */
+      }
+    col->skb_in_waiting = alloc_skb(
+        		    k_use_plentiful_skb ? k_plentiful_skb_size :  col->mtu
+        		    , GFP_KERNEL );
+    col->skb_mini = alloc_skb(BGNET_FRAG_MTU + COL_SKB_ALIGN , GFP_KERNEL ) ;
+
+    spin_lock_init(&col->lock);
+    spin_lock_init(&col->irq_lock);
+
+    skb_queue_head_init(&col->skb_list_for_filling) ;
+    skb_queue_head_init(&col->skb_list_for_delivering) ;
+    skb_queue_head_init(&col->skb_list_for_freeing) ;
+
+    bgcol_prefill(&col->skb_list_for_filling, 100)  ;
+
+
+    col->dcrbase = COL_DCR_BASE;
+
+    skb_queue_head_init(&col->skb_list_xmit) ;
+    skb_queue_head_init(&col->skb_list_free) ;
+    col->skb_current_xmit = NULL ;
+
+    skb_queue_head_init(&col->fragskb_list_rcv) ;
+    init_ethkey_table(col) ;
+
+     /*  abuse IO port structure for DCRs */
+    if (!request_region(col->dcrbase, COL_DCR_SIZE, COL_DEV_NAME))
+	return -1;
+
+     /*  disable device IRQs before we attach them */
+    bgcol_disable_interrupts(col);
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+    setup_timer(&col->missed_interrupt_timer,bgcol_missed_interrupt,0) ;
+#endif
+    col->nodeid = mfdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_NADDR);
+
+    for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++) {
+	if (bgcol_init_channel(cidx, col) != 0)
+	    goto err_channel;
+    }
+
+     /*  clear exception flags */
+    mfdcrx(col->dcrbase + _BGP_DCR_TR_INJ_PIXF);
+    mfdcrx(col->dcrbase + _BGP_DCR_TR_REC_PRXF);
+
+     /*  allocate IRQs last; otherwise, if an IRQ is still pending, we */
+     /*  get kernel segfaults */
+    for (idx = 0; bgcol_irqs[idx].irq != -1; idx++)
+    {
+#if defined(COLLECTIVE_TREE_AFFINITY)
+	  bic_set_cpu_for_irq(bgcol_irqs[idx].irq,k_TreeAffinityCPU) ;
+	  TRACEN(k_t_general,"setting affinity irq=%d affinity=%d",bgcol_irqs[idx].irq, k_TreeAffinityCPU );
+#endif
+	rc = request_irq(bgcol_irqs[idx].irq, bgcol_irqs[idx].handler,
+			 IRQF_DISABLED, bgcol_irqs[idx].name, col);
+	if (rc)
+	    goto err_irq_alloc;
+    }
+
+
+    return 0;
+
+ err_irq_alloc:
+    for (idx = 0; bgcol_irqs[idx].irq != -1; idx++)
+	free_irq(bgcol_irqs[idx].irq, col);
+
+ err_channel:
+    for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++)
+	bgcol_uninit_channel(&col->chn[cidx], col);
+
+    release_region(col->dcrbase, COL_DCR_SIZE);
+
+    return -1;
+}
+
+void bgcol_eth_up(struct bg_col* col)
+  {
+    TRACEN(k_t_init,"eth coming up") ;
+    col->eth_is_up=1 ;
+    tasklet_schedule(&bgcol_duplex_slih_tasklet);
+  }
+void bgcol_eth_down(struct bg_col* col)
+  {
+    TRACEN(k_t_init,"eth going down") ;
+    col->eth_is_up=0 ;
+  }
+
+int bluegene_globalBarrier_nonBlocking(unsigned int channel, int reset, unsigned int timeoutInMillis) ;
+
+extern unsigned long long printk_clock_aligner ;
+/* Determine the offset of the 'local' timebase from a 'common' time signal as per global barrier */
+void bgcol_align_timebase(void)
+  {
+    int rc0 ;
+    int rc1 = -1 ;
+    unsigned long flags ;
+    unsigned long long tb0, tb1, tb ;
+    local_irq_save(flags) ;
+    tb0 = get_tb() ;
+    rc0 = bluegene_globalBarrier_nonBlocking(3,1,1000 ) ;
+    tb1 = get_tb() ;
+    if( rc0 == BGCNS_RC_CONTINUE ) rc1 = bluegene_globalBarrier_nonBlocking(3,0,1000 ) ;
+    tb = get_tb() ;
+    printk_clock_aligner = tb ;
+    TRACEN(k_t_init,"rc0=%d rc1=%d tb0=0x%016llx tb1=0x%016llx tb=0x%016llx",rc0,rc1,tb0,tb1,tb) ;
+    if( rc0 == BGCNS_RC_CONTINUE && rc1 == BGCNS_RC_CONTINUE)
+      {
+        printk(KERN_INFO "(!!!) Barrier timed out, some compute node might not have started\n") ;
+      }
+    local_irq_restore(flags) ;
+  }
+
+/**********************************************************************
+ *                      /proc filesystem
+ **********************************************************************/
+
+#define TGREAD(r, d) \
+        rc = snprintf(page, remaining, "%.30s (%03x): %08x\n", d, \
+                      bgcol->dcrbase + r, mfdcrx(bgcol->dcrbase + r)); \
+        if (rc < 0) goto out; \
+        if (rc > remaining) { remaining = 0; goto out; } \
+        page += rc;  \
+        remaining -= rc;
+
+#define TGSHOW(r) \
+  rc = snprintf(page, remaining, "%.60s : %08x\n", #r, (unsigned int)(r) );\
+  if (rc < 0) goto out; \
+  if (rc > remaining) { remaining = 0; goto out; } \
+  page += rc;  \
+  remaining -= rc;
+
+
+static int bgpnet_statistics_read (char *page, char **start, off_t off,
+                            int count, int *eof, void *data)
+{
+    struct bg_col *bgcol = data;
+    int rc, remaining = count;
+    *eof = 1;
+    TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable");
+    TGREAD(_BGP_DCR_TR_REC_PRXF,  "Receive Exception Flag  ");
+    TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable");
+    TGREAD(_BGP_DCR_TR_INJ_PIXF,  "Injection Exception Flag  ");
+
+    TGSHOW(*((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx))) ;
+    TGSHOW(*((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_SO))) ;
+    TGSHOW(bgcol->curr_conn) ;
+#if !defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+    TGSHOW(spin_is_locked(&bgcol->irq_lock_xmit)) ;
+#endif
+    TGSHOW(skb_queue_len(&bgcol->skb_list_xmit)) ;
+    TGSHOW(skb_queue_len(&bgcol->skb_list_free)) ;
+    TGSHOW(skb_queue_len(&bgcol->fragskb_list_rcv)) ;
+    TGSHOW(bgcol->skb_current_xmit) ;
+    TGSHOW(bgcol->current_xmit_len) ;
+    TGSHOW(bgcol->fragidx_xmit) ;
+    TGSHOW(bgcol->recv_total) ;
+    TGSHOW(bgcol->recv_guess_miss) ;
+    TGSHOW(bgcol->recv_no_skbuff) ;
+    TGSHOW(bgcol->recv_no_first_packet) ;
+    TGSHOW(bgcol->spurious_interrupts) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_INJ].status) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_INJ].irq_count) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_INJ].irqs_unhandled) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_RCV].status) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_RCV].irq_count) ;
+    TGSHOW(irq_desc[BG_COL_IRQ_RCV].irqs_unhandled) ;
+
+     out:
+
+        return count - remaining;
+}
+
+static int bgpnet_status_read (char *page, char **start, off_t off,
+                            int count, int *eof, void *data)
+{
+    struct bg_col *bgcol = data;
+    int rc, remaining = count;
+    *eof = 1;
+
+
+    TGREAD(_BGP_DCR_TR_GLOB_FPTR, "Fifo Pointer");
+    TGREAD(_BGP_DCR_TR_GLOB_NADDR, "Node Address");
+    TGREAD(_BGP_DCR_TR_GLOB_VCFG0, "VC0 Configuration");
+    TGREAD(_BGP_DCR_TR_GLOB_VCFG1, "VC1 Configuration");
+    TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable");
+    TGREAD(_BGP_DCR_TR_REC_PRXF,  "Receive Exception Flag  ");
+    TGREAD(_BGP_DCR_TR_REC_PRDA, "Receive Diagnostic Address");
+    TGREAD(_BGP_DCR_TR_REC_PRDD, "Receive Diagnostic Data");
+    TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable");
+    TGREAD(_BGP_DCR_TR_INJ_PIXF,  "Injection Exception Flag  ");
+    TGREAD(_BGP_DCR_TR_INJ_PIDA, "Injection Diagnostic Address");
+    TGREAD(_BGP_DCR_TR_INJ_PIDD, "Injection Diagnostic Data");
+    TGREAD(_BGP_DCR_TR_INJ_CSPY0, "VC0 payload checksum");
+    TGREAD(_BGP_DCR_TR_INJ_CSHD0, "VC0 header checksum");
+    TGREAD(_BGP_DCR_TR_INJ_CSPY1, "VC1 payload checksum");
+    TGREAD(_BGP_DCR_TR_INJ_CSHD1, "VC1 header checksum");
+
+    TGREAD(_BGP_DCR_TR_CLASS_RDR0, "Route Desc 0, 1");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR1, "Route Desc 2, 3");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR2, "Route Desc 4, 5");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR3, "Route Desc 6, 7");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR4, "Route Desc 8, 9");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR5, "Route Desc 10, 11");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR6, "Route Desc 12, 13");
+    TGREAD(_BGP_DCR_TR_CLASS_RDR7, "Route Desc 14, 15");
+    TGREAD(_BGP_DCR_TR_CLASS_ISRA, "Idle pattern low");
+    TGREAD(_BGP_DCR_TR_CLASS_ISRB, "Idle pattern high");
+
+    TGREAD(_BGP_DCR_TR_DMA_DMAA, "SRAM diagnostic addr");
+    TGREAD(_BGP_DCR_TR_DMA_DMAD, "SRAM diagnostic data");
+    TGREAD(_BGP_DCR_TR_DMA_DMADI, "SRAM diagnostic data inc");
+    TGREAD(_BGP_DCR_TR_DMA_DMAH, "SRAM diagnostic header");
+
+    TGREAD(_BGP_DCR_TR_ERR_R0_CRC, "CH0: Receiver link CRC errors");
+    TGREAD(_BGP_DCR_TR_ERR_R0_CE, "CH0: Receiver SRAM errors corrected");
+    TGREAD(_BGP_DCR_TR_ERR_S0_RETRY, "CH0: Sender link retransmissions");
+    TGREAD(_BGP_DCR_TR_ERR_S0_CE, "CH0: Sender SRAM errors corrected");
+
+    TGREAD(_BGP_DCR_TR_ERR_R1_CRC, "CH1: Receiver link CRC errors");
+    TGREAD(_BGP_DCR_TR_ERR_R1_CE, "CH1: Receiver SRAM errors corrected");
+    TGREAD(_BGP_DCR_TR_ERR_S1_RETRY, "CH1: Sender link retransmissions");
+    TGREAD(_BGP_DCR_TR_ERR_S1_CE, "CH1: Sender SRAM errors corrected");
+
+    TGREAD(_BGP_DCR_TR_ERR_R2_CRC, "CH2: Receiver link CRC errors");
+    TGREAD(_BGP_DCR_TR_ERR_R2_CE, "CH2: Receiver SRAM errors corrected");
+    TGREAD(_BGP_DCR_TR_ERR_S2_RETRY, "CH2: Sender link retransmissions");
+    TGREAD(_BGP_DCR_TR_ERR_S2_CE, "CH2: Sender SRAM errors corrected");
+
+    TGREAD(_BGP_DCR_TR_ARB_RCFG, "ARB: General router config");
+    TGREAD(_BGP_DCR_TR_ARB_RSTAT, "ARB: General router status");
+    TGREAD(_BGP_DCR_TR_ARB_HD00, "ARB: Next hdr, CH0, VC0");
+    TGREAD(_BGP_DCR_TR_ARB_HD01, "ARB: Next hdr, CH0, VC1");
+    TGREAD(_BGP_DCR_TR_ARB_HD10, "ARB: Next hdr, CH1, VC0");
+    TGREAD(_BGP_DCR_TR_ARB_HD11, "ARB: Next hdr, CH1, VC1");
+    TGREAD(_BGP_DCR_TR_ARB_HD20, "ARB: Next hdr, CH2, VC0");
+    TGREAD(_BGP_DCR_TR_ARB_HD21, "ARB: Next hdr, CH2, VC1");
+
+   rc = snprintf(page, remaining, "CH0: status=%08x\n",
+                  in_be32((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx)));
+   if (rc < 0) goto out;
+   if (rc > remaining) { remaining = 0; goto out; }
+    page += rc; remaining -= rc;
+
+    rc = snprintf(page, remaining, "CH1: status=%08x\n",
+                  in_be32((unsigned*)(bgcol->chn[1].mioaddr + _BGP_TRx_Sx)));
+    if (rc < 0) goto out;
+    if (rc > remaining) { remaining = 0; goto out; }
+    page += rc; remaining -= rc;
+
+    rc = snprintf(page, remaining, "Data placement total=%d guess wrong=%d\n",
+                  bgcol->recv_total, bgcol->recv_guess_miss) ;
+    if (rc < 0) goto out;
+    if (rc > remaining) { remaining = 0; goto out; }
+    page += rc; remaining -= rc;
+    rc = snprintf(page,remaining, "Receive no_skbuff=%d no_first_packet=%d\n",
+                  bgcol->recv_no_skbuff, bgcol->recv_no_first_packet) ;
+    if (rc < 0) goto out;
+    if (rc > remaining) { remaining = 0; goto out; }
+    page += rc; remaining -= rc;
+
+#if defined(KEEP_BG_COL_STATISTICS)
+      {
+/*         int x ; */
+/*         for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/*           { */
+/*             rc = snprintf(page, remaining, "sf_h0[%d]=%d\n", x, bgcol->send_fifo_histogram0[x]) ; */
+/*             if (rc < 0) goto out; */
+/*             if (rc > remaining) { remaining = 0; goto out; } */
+/*              page += rc; remaining -= rc; */
+/*           } */
+/*         for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/*           { */
+/*             rc = snprintf(page, remaining, "sf_h1[%d]=%d\n", x, bgcol->send_fifo_histogram1[x]) ; */
+/*             if (rc < 0) goto out; */
+/*             if (rc > remaining) { remaining = 0; goto out; } */
+/*              page += rc; remaining -= rc; */
+/*           } */
+/*         for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/*           { */
+/*             rc = snprintf(page, remaining, "rf_h0[%d]=%d\n", x, bgcol->recv_fifo_histogram0[x]) ; */
+/*             if (rc < 0) goto out; */
+/*             if (rc > remaining) { remaining = 0; goto out; } */
+/*              page += rc; remaining -= rc; */
+/*           } */
+/*         for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/*           { */
+/*             rc = snprintf(page, remaining, "rf_h1[%d]=%d\n", x, bgcol->recv_fifo_histogram1[x]) ; */
+/*             if (rc < 0) goto out; */
+/*             if (rc > remaining) { remaining = 0; goto out; } */
+/*              page += rc; remaining -= rc; */
+/*           } */
+        rc=snprintf(page, remaining, "spurious interrupts=%d\n", bgcol->spurious_interrupts) ;
+        if (rc < 0) goto out;
+        if (rc > remaining) { remaining = 0; goto out; }
+         page += rc; remaining -= rc;
+      }
+#endif
+
+ out:
+
+    return count - remaining;
+}
+
+
+static int bgcol_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data)
+  {
+    char proc_write_buffer[256] ;
+    unsigned long actual_len=(len<255) ? len : 255 ;
+    int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ;
+    if( rc != 0 ) return -EFAULT ;
+    proc_write_buffer[actual_len] = 0 ;
+    return actual_len ;
+  }
+
+/* static unsigned char xtable[256] = */
+/*     { */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/*     }; */
+/*  */
+/* static int bgcol_atoix(const unsigned char *cp) */
+/*   { */
+/*     int result = 0 ; */
+/*     unsigned char ecp = xtable[*cp] ; */
+/*     while (ecp < 0x10) */
+/*       { */
+/*         result = (result << 4 ) | ecp ; */
+/*         cp += 1 ; */
+/*         ecp = xtable[*cp] ; */
+/*       } */
+/*     return result ; */
+/*   } */
+
+static int dcrcopy ;
+static int proc_docoldcr(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+  {
+    int rc ;
+    unsigned int dcrindex=static_col.dcrbase+(unsigned int)(ctl->extra1) ;
+    unsigned int dcrvalue=mfdcrx(dcrindex) ;
+    TRACE("(>)ctl=%p write=%d len=%d mfdcrx(0x%08x)=0x%08x", ctl,write,*lenp,dcrindex,dcrvalue) ;
+
+    dcrcopy=dcrvalue ;
+    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+
+    TRACE("(<)") ;
+    return rc ;
+  }
+
+static int proc_docolmio_0(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+  {
+    int rc ;
+    TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+    ctl->data=(unsigned*)(static_col.chn[0].mioaddr + (unsigned int)(ctl->extra1)) ;
+    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+    TRACE("(<)") ;
+    return rc ;
+  }
+
+static int proc_docolmio_1(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+  {
+    int rc ;
+    TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+    ctl->data=(unsigned*)(static_col.chn[1].mioaddr + (unsigned int)(ctl->extra1)) ;
+    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+    TRACE("(<)") ;
+    return rc ;
+  }
+
+static struct ctl_path bgp_col_ctl_path[] = {
+	{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "collective", .ctl_name = 0, },
+	{ },
+};
+
+#define CTL_PARAM_ADDR(Name,Addr)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = (int *)Addr,              \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dointvec       \
+  }
+
+#define CTL_PARAM_MIO_0(Name,Offset)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_docolmio_0,       \
+          .extra1	  = (void *)Offset             \
+  }
+
+#define CTL_PARAM_MIO_1(Name,Offset)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_docolmio_1,       \
+          .extra1	  = (void *)Offset             \
+  }
+
+#define CTL_PARAM_COLDCR(Name,DCRNumber)        \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &dcrcopy , \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_docoldcr  ,     \
+          .extra1          = (void *) DCRNumber   \
+  }
+
+static unsigned int static_pagesize = 1 << PAGE_SHIFT ;
+
+static struct ctl_table bgp_col_ctl_table[] = {
+/* 		CTL_PARAM_ADDR("napi",&bgcol_diagnostic_use_napi) , */
+		CTL_PARAM_ADDR("pagesize",&static_pagesize) ,
+		CTL_PARAM_ADDR("tracemask",&bgcol_debug_tracemask) ,
+/* 		CTL_PARAM_ADDR("e10000_diag_count",&e10000_diag_count) , */
+		CTL_PARAM_COLDCR("Receive-Exception-Enable",_BGP_DCR_TR_REC_PRXEN),
+		CTL_PARAM_COLDCR("Receive-Exception-Flag",_BGP_DCR_TR_REC_PRXF),
+		CTL_PARAM_COLDCR("Injection-Exception-Enable",_BGP_DCR_TR_INJ_PIXEN),
+		CTL_PARAM_COLDCR("Injection-Exception-Flag",_BGP_DCR_TR_INJ_PIXF),
+		CTL_PARAM_MIO_0("BGP_TR0_S0",_BGP_TRx_Sx) ,
+		CTL_PARAM_MIO_1("BGP_TR1_S1",_BGP_TRx_Sx) ,
+		CTL_PARAM_ADDR("skip_fp_save",&static_col.skip_fp_save) ,
+    CTL_PARAM_ADDR("curr_conn",&static_col.curr_conn) ,
+		CTL_PARAM_ADDR("current_xmit_len",&static_col.current_xmit_len) ,
+		CTL_PARAM_ADDR("fragidx_xmit",&static_col.fragidx_xmit) ,
+		CTL_PARAM_ADDR("recv_total",&static_col.recv_total) ,
+		CTL_PARAM_ADDR("recv_guess_miss",&static_col.recv_guess_miss) ,
+		CTL_PARAM_ADDR("recv_no_skbuff",&static_col.recv_no_skbuff) ,
+		CTL_PARAM_ADDR("recv_no_first_packet",&static_col.recv_no_first_packet) ,
+		CTL_PARAM_ADDR("deliver_without_workqueue",&static_col.deliver_without_workqueue) ,
+#if defined(KEEP_BG_COL_STATISTICS)
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "sf_h0" ,
+		          .data           = static_col.send_fifo_histogram0,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "sf_h1" ,
+		          .data           = static_col.send_fifo_histogram1,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "rf_h0" ,
+		          .data           = static_col.recv_fifo_histogram0,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "rf_h1" ,
+		          .data           = static_col.recv_fifo_histogram1,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+#if defined(EXTRA_TUNING)
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "sf_h2" ,
+		          .data           = static_col.send_fifo_histogram2,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+		  {
+		          .ctl_name       = CTL_UNNUMBERED,
+		          .procname       = "rf_h2" ,
+		          .data           = static_col.recv_fifo_histogram2,
+		          .maxlen         = COL_FIFO_SIZE*sizeof(int),
+		          .mode           = 0644,
+		          .proc_handler   = &proc_dointvec
+		  } ,
+
+#endif
+#endif
+		  { 0 }
+
+
+} ;
+
+static void register_collective_sysctl(struct bg_col *col)
+{
+	col->sysctl_table_header=register_sysctl_paths(bgp_col_ctl_path,bgp_col_ctl_table) ;
+	TRACEN(k_t_init, "sysctl_table_header=%p",col->sysctl_table_header) ;
+
+}
+
+int __init
+bgcol_module_init(void)
+{
+    struct bg_col *col = &static_col ;
+    int rc;
+    unsigned long long tr0, tr1, ts0, ts1;
+
+    TRACEN(k_t_init,"(>)") ;
+
+    register_collective_sysctl(&static_col) ;
+
+    tr0=((unsigned long long)_BGP_UA_COL0<<32)  + _BGP_PA_COL0;
+    tr1=((unsigned long long)_BGP_UA_COL1<<32)  + _BGP_PA_COL1;
+    ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0;
+    ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1;
+
+    /*  */
+//     bgptree_vc0, bgptree_vc1, bgptorus_g0, and bgptorus_g1 are added in 'bluegene_networks.c'; duplicating here gets warning messages
+   bgpnet_add_device(BGP_COL_MAJOR_NUM,  0,"bgptree_vc0", tr0, -1, NULL);
+   bgpnet_add_device(BGP_COL_MAJOR_NUM,  1, "bgptree_vc1", tr1, -1, NULL);
+/*     bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0, -1, NULL); */
+/*     bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1, -1, NULL); */
+    bgpnet_add_device(BGP_GI_MAJOR_NUM,    0, "bgpgi",       0,   -1, NULL);
+
+    bgpnetDir = proc_mkdir("bgpnet", NULL);
+    if (bgpnetDir) {
+        barrierEntry = create_proc_entry("barrier", S_IRUGO, bgpnetDir);
+        if (barrierEntry) {
+                barrierEntry->nlink = 1;
+                barrierEntry->read_proc = (void*) bgpnet_barrier_read;
+                barrierEntry->write_proc = (void*) bgpnet_barrier_write;
+                barrierEntry->data = (void*) 1;
+        }
+
+        SerDesEntry = create_proc_entry("SerDes", S_IRUGO, bgpnetDir);
+        if (SerDesEntry) {
+                SerDesEntry->nlink = 1;
+                SerDesEntry->read_proc = (void*) bgpnet_SerDes_read;
+                SerDesEntry->write_proc = (void*) bgpnet_SerDes_write;
+        }
+    }
+#if defined(KEEP_BG_COL_STATISTICS) || defined(BGP_COL_STATUS_VISIBILITY)
+    bgpnetDir = proc_mkdir("bgpcol", NULL);
+    if (bgpnetDir) {
+#if defined(KEEP_BG_COL_STATISTICS)
+        statisticsEntry = create_proc_entry("statistics", S_IRUGO, bgpnetDir);
+        if (statisticsEntry) {
+          statisticsEntry->nlink = 1;
+          statisticsEntry->read_proc = (void*) bgpnet_statistics_read;
+          statisticsEntry->write_proc = (void*) bgcol_proc_write;
+          statisticsEntry->data = col ;
+        }
+#endif
+#if defined(BGP_COL_STATUS_VISIBILITY)
+        statusEntry = create_proc_entry("status", S_IRUGO, bgpnetDir);
+        if (statusEntry) {
+          statusEntry->nlink = 1;
+          statusEntry->read_proc = (void*) bgpnet_status_read;
+          statusEntry->write_proc = (void*) bgcol_proc_write;
+          statusEntry->data = col ;
+        }
+#endif
+/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */
+/*         tracemaskEntry = create_proc_entry("tracemask", S_IRUGO, bgpnetDir); */
+/*         if (tracemaskEntry) { */
+/*           tracemaskEntry->nlink = 1; */
+/*           tracemaskEntry->read_proc = (void*) bgpnet_tracemask_read; */
+/*           tracemaskEntry->write_proc = (void*) bgpnet_tracemask_write; */
+/*         } */
+/* #endif */
+   }
+#endif
+
+    rc = bgcol_init(col);
+    if (rc)
+	goto err_col_init;
+
+    mb();
+
+
+    TRACEN(k_t_init,"(<)") ;
+    return 0;
+
+ err_col_init:
+    /* XXX: unmap IRQs */
+    TRACEN(k_t_init,"(<) error, rc=%d",rc) ;
+    return rc;
+}
diff --git a/drivers/net/bgp_collective/bgcol.h b/drivers/net/bgp_collective/bgcol.h
new file mode 100644
index 0000000..6044540
--- /dev/null
+++ b/drivers/net/bgp_collective/bgcol.h
@@ -0,0 +1,293 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ *          Volkmar Uhlig <vuhlig@us.ibm.com>
+ *          Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * Description:   Header file for col device
+ *
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__NET__BLUEGENE__COL_H__
+#define __DRIVERS__NET__BLUEGENE__COL_H__
+
+#define KEEP_BG_COL_STATISTICS
+#define EXTRA_TUNING
+/* #define KEEP_RECV_TOTAL */
+#define HAS_MISSED_INTERRUPT_TIMER
+
+#define _BGP_COL_BASE		(0x610000000ULL)
+#define _BGP_COL_OFFSET	(0x001000000ULL)
+#define _BGP_COL_SIZE		(0x400)
+
+#define _BGP_TORUS_BASE		(0x601140000ULL)
+#define _BGP_TORUS_OFFSET	(0x000010000ULL)
+
+#define BGP_MAX_CHANNEL		2
+#define BGP_COL_CHANNEL	0
+#define BGP_COL_ADDR_BITS	24
+
+#define COL_CHANNEL_PADDR(c)	(_BGP_COL_BASE + ((c)*_BGP_COL_OFFSET))
+#define COL_CHANNEL_DCROFF(c)	(0x20 + ((c) * 8))
+#define COL_DCR_BASE		(0xc00)
+#define COL_DCR_SIZE		(0x80)
+
+#define COL_IRQMASK_INJ	(_TR_INJ_PIX_APAR0  | _TR_INJ_PIX_APAR1  |\
+                                 _TR_INJ_PIX_ALIGN0 | _TR_INJ_PIX_ALIGN1 |\
+                                 _TR_INJ_PIX_ADDR0  | _TR_INJ_PIX_ADDR1  |\
+                                 _TR_INJ_PIX_DPAR0  | _TR_INJ_PIX_DPAR1  |\
+                                 _TR_INJ_PIX_COLL   | _TR_INJ_PIX_UE     |\
+                                 _TR_INJ_PIX_PFO0   | _TR_INJ_PIX_PFO1   |\
+                                 _TR_INJ_PIX_HFO0   | _TR_INJ_PIX_HFO1)
+
+#define COL_IRQMASK_REC	(_TR_REC_PRX_APAR0  | _TR_REC_PRX_APAR1  |\
+                                 _TR_REC_PRX_ALIGN0 | _TR_REC_PRX_ALIGN1 |\
+                                 _TR_REC_PRX_ADDR0  | _TR_REC_PRX_ADDR1  |\
+                                 _TR_REC_PRX_COLL   | _TR_REC_PRX_UE     |\
+                                 _TR_REC_PRX_PFU0   | _TR_REC_PRX_PFU1   |\
+                                 _TR_REC_PRX_HFU0   | _TR_REC_PRX_HFU1   |\
+				 _TR_REC_PRX_WM0    | _TR_REC_PRX_WM1 )
+
+#define COL_IRQ_RCV_PENDING_MASK(idx) (1U << (1 - idx))
+#define COL_IRQ_INJ_PENDING_MASK(idx) (1U << (2 - idx))
+
+
+#define COL_IRQ_GROUP		5
+#define COL_IRQ_BASE		20
+#define COL_IRQ_NONCRIT_NUM	20
+#define COL_NONCRIT_BASE	0
+#define COL_FIFO_SIZE		8
+
+
+union bgcol_header {
+	unsigned int raw;
+	struct {
+		unsigned int pclass	: 4;
+		unsigned int p2p	: 1;
+		unsigned int irq	: 1;
+		unsigned vector		: 24;
+		unsigned int csum_mode	: 2;
+	} p2p;
+	struct {
+		unsigned int pclass	: 4;
+		unsigned int p2p	: 1;
+		unsigned int irq	: 1;
+		unsigned int op		: 3;
+		unsigned int opsize	: 7;
+		unsigned int tag	: 14;
+		unsigned int csum_mode	: 2;
+	} bcast;
+} __attribute__((packed));
+
+union bgcol_status {
+	unsigned int raw;
+	struct {
+		unsigned int inj_pkt	: 4;
+		unsigned int inj_qwords	: 4;
+	        unsigned int __res0	: 4;
+		unsigned int inj_hdr	: 4;
+		unsigned int rcv_pkt	: 4;
+		unsigned int rcv_qwords : 4;
+		unsigned int __res1	: 3;
+		unsigned int irq	: 1;
+		unsigned int rcv_hdr	: 4;
+	} x;
+} __attribute__((packed));
+
+static inline unsigned int bgcol_status_inj_pkt   (unsigned int status) { return status >> 28 ; }
+static inline unsigned int bgcol_status_inj_qwords(unsigned int status) { return (status >> 24) & 0x0f ; }
+static inline unsigned int bgcol_status_inj_hdr   (unsigned int status) { return (status >> 16) & 0x0f ; }
+static inline unsigned int bgcol_status_rcv_pkt   (unsigned int status) { return (status >> 12) & 0x0f ; }
+static inline unsigned int bgcol_status_rcv_qwords(unsigned int status) { return (status >> 8 ) & 0x0f ; }
+static inline unsigned int bgcol_status_irq       (unsigned int status) { return (status >> 4 ) & 1 ; }
+static inline unsigned int bgcol_status_rcv_hdr   (unsigned int status) { return status & 0x0f ; }
+
+
+/* some device defined */
+#define _BGP_DCR_TR_RCTRL	(_BGP_DCR_TR_CH0_RCTRL - _BGP_DCR_TR_CH0)
+#define _BGP_DCR_TR_SCTRL	(_BGP_DCR_TR_CH0_SCTRL - _BGP_DCR_TR_CH0)
+#define _BGP_DCR_TR_RSTAT	(_BGP_DCR_TR_CH0_RSTAT - _BGP_DCR_TR_CH0)
+
+/*  hardware specification: 4 bytes address, 256 bytes payload */
+#define COL_ALEN	4
+#define COL_PAYLOAD	256
+
+#define FRAGMENT_LISTS          256
+
+
+struct bgpnet_dev
+{
+  int                  major,minor;        /* device major, minor */
+  unsigned long long   physaddr;           /* physical address */
+  struct task_struct* current;            /* process holding device */
+  int                  signum;             /* signal to send holding process */
+  wait_queue_head_t    read_wq;
+  int                  read_complete;
+  void                 *regs;              /* mapped regs (only used with col) */
+  struct semaphore     sem;                /* interruptible semaphore */
+  struct cdev          cdev;               /* container device? */
+};
+
+
+struct bgcol_channel {
+    phys_addr_t paddr;
+    unsigned long mioaddr;
+    unsigned int dcrbase;
+    unsigned long irq_rcv_pending_mask;
+    unsigned long irq_inj_pending_mask;
+    struct timer_list inj_timer;
+    unsigned int injected;
+    unsigned int partial_injections;
+    unsigned int unaligned_hdr_injections;
+    unsigned int unaligned_data_injections;
+    unsigned int received;
+    unsigned int inject_fail;
+    unsigned int dropped;
+    unsigned int delivered;
+    unsigned int idx;
+    struct bg_col* col;
+    struct bgpnet_dev* chrdev;
+};
+
+enum {
+  k_ethkey_table_size=256
+};
+
+struct bg_col_per_eth {
+  unsigned char * payload ;
+  unsigned int expect ;
+};
+
+struct bg_col {
+    spinlock_t lock;
+    spinlock_t irq_lock;
+    struct bgcol_channel chn[BGP_MAX_CHANNEL];
+    unsigned int dcrbase;
+    unsigned int curr_conn;
+    unsigned int nodeid;
+    unsigned int inj_wm_mask;
+    unsigned int bgnet_channel ;
+
+    unsigned int max_packets_per_frame ;
+    unsigned int mtu ;
+
+    unsigned int eth_is_up ;
+
+    /* statistics */
+    unsigned fragment_timeout;
+
+     /*  Interrupt management */
+    unsigned int handler_running ;
+    unsigned int skip_fp_save ;
+     /*  Transmission items */
+      struct bglink_hdr_col lnkhdr_xmit __attribute__((aligned(8))); /* Link header being used for partially-sent skb */
+      spinlock_t irq_lock_xmit ;
+      struct sk_buff_head skb_list_xmit ;   /* List of skb's to be sent */
+      struct sk_buff_head skb_list_free ;   /* Keep a list of skb's to free at user level */
+      struct sk_buff * skb_current_xmit ;   /* Partially-sent skb, if any */
+      void * current_xmit_data ;            /* Data from current skb adjusted for alignment */
+      int current_xmit_len ;                /* Length of current skb data */
+      union bgcol_header dest_xmit ;
+      unsigned int fragidx_xmit ;
+
+     /*  Reception items */
+      struct bglink_hdr_col lnkhdr_rcv __attribute__((aligned(8))); /* Link header pulled out of reception FIFO */
+      struct sk_buff_head fragskb_list_rcv  ; /* List of fully-received frames */
+      struct sk_buff_head fragskb_list_discard  ; /* List of frames to discard */
+      struct sk_buff * skb_in_waiting ;  /*  An skb ready to catch the start of a 'new' frame */
+      struct sk_buff * skb_mini ;  /*  A 'miniature' skbuff just right for catching single-packet frames */
+
+      /* Core-to-core items */
+      struct sk_buff_head skb_list_for_filling ;
+      struct sk_buff_head skb_list_for_delivering ;
+      struct sk_buff_head skb_list_for_freeing ;
+
+      unsigned int deliver_without_workqueue ; /* Whether to activate the 'deliver on other core' code for an skbuff */
+
+
+        struct bgnet_dev *bgnet ;
+
+     /*  Statistics */
+
+         int recv_total ;
+         int recv_guess_miss ;
+         int recv_no_skbuff ;
+         int recv_no_first_packet ;
+
+     /*  'big' tables */
+        struct bg_col_per_eth per_eth_table[k_ethkey_table_size] ;
+        struct sk_buff * skb_rcv_table[k_ethkey_table_size] ;
+
+     /*  Tuning statistics */
+#if defined(KEEP_BG_COL_STATISTICS)
+        unsigned int send_fifo_histogram0[16] ;
+        unsigned int send_fifo_histogram1[16] ;
+        unsigned int recv_fifo_histogram0[16] ;
+        unsigned int recv_fifo_histogram1[16] ;
+#if defined(EXTRA_TUNING)
+        unsigned int send_fifo_histogram2[16] ;
+        unsigned int recv_fifo_histogram2[16] ;
+#endif
+#endif
+        unsigned int spurious_interrupts ;
+     /*  Diagnostic controls */
+        struct ctl_table_header * sysctl_table_header ;
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+    struct timer_list missed_interrupt_timer ;
+#endif
+};
+
+/**********************************************************************
+ * driver
+ **********************************************************************/
+
+#define COL_DEV_NAME "bgcol"
+
+extern int bgcol_debug_tracemask ;
+struct bg_col;
+
+struct bg_col *bgcol_get_dev(void);
+void bgcol_enable_interrupts(struct bg_col* col);
+unsigned int bgcol_get_nodeid(struct bg_col* col);
+void bgcol_link_hdr_init(struct bglink_hdr_col *lnkhdr);
+int bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest,
+		struct bglink_hdr_col *lnkhdr, void *data, int len);
+int __bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest,
+		  struct bglink_hdr_col *lnkhdr, void *data, int len);
+
+void bgcol_set_mtu(struct bg_col* col, unsigned int mtu) ;
+void bgcol_enable_inj_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_disable_inj_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_disable_rcv_wm_interrupt(struct bgcol_channel* chn);
+
+void bgcol_duplex_slih(unsigned long dummy) ;
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int __init bgcol_module_init(void) ;
+enum {
+	bgcol_diagnostic_use_napi = 1
+};
+
+void bgcol_eth_up(struct bg_col* col) ;
+void bgcol_eth_down(struct bg_col* col) ;
+
+void bgcol_align_timebase(void) ;
+/* extern int bgcol_diagnostic_use_napi ; */
+
+#endif
diff --git a/drivers/net/bgp_collective/bglink.h b/drivers/net/bgp_collective/bglink.h
new file mode 100644
index 0000000..37feca2
--- /dev/null
+++ b/drivers/net/bgp_collective/bglink.h
@@ -0,0 +1,158 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ *          Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description:   Link layer definitions
+ *
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__BLUEGENE__LINK_H__
+#define __DRIVERS__BLUEGENE__LINK_H__
+
+#include <linux/skbuff.h>
+
+#include <asm/atomic.h>
+
+/* link layer protocol IDs */
+#define BGLINK_P_NET	0x01
+#define BGLINK_P_CON	0x10
+
+union link_proto_opt {
+    u16 raw;
+    struct {
+	u16 option	: 4;
+	u16 pad_head	: 4;
+	u16 pad_tail	: 8;
+    } opt_net;
+    struct {
+	u16 len;
+    } opt_con;
+} __attribute__((packed));
+
+struct bglink_hdr_col {
+    u32 dst_key;
+    u32 src_key;
+    u16 conn_id;
+    u8 this_pkt;
+    u8 total_pkt;
+    u16 lnk_proto;   /*  net, con, ... */
+    union link_proto_opt opt;
+} ;  /*  __attribute__((packed)); */
+
+struct bglink_hdr_col_map {
+    u32 dst_key;
+    u32 src_key;
+    u32 conn_this_total;
+    u32 proto_option_head_tail ;
+} ;
+
+struct bglink_hdr_torus {
+    u32 dst_key;
+    u32 len;
+    u16 lnk_proto;   /*  net, con, ... */
+    union link_proto_opt opt;
+} ;  /*  __attribute__((packed)); */
+
+/* link protocol callbacks
+ * rcv is called when new packet arrives
+ * flush is called when the device was busy and becomes idle
+ *     again (flow control)
+ */
+struct bgnet_dev ;
+struct bg_col ;
+struct bglink_proto {
+    u16 lnk_proto;
+    int receive_from_self;
+    int (*col_rcv)(struct bg_col*, struct sk_buff*, struct bglink_hdr_col *, struct bglink_proto *proto);
+    int (*col_rcv_trimmed)(struct bg_col*, struct sk_buff*, struct bglink_proto *proto, unsigned int src_key);
+    int (*col_flush)(int chn);
+    int (*torus_rcv)(struct sk_buff*, struct bglink_hdr_torus *);
+    void *private;
+    struct list_head list;
+};
+
+extern struct list_head linkproto_list;
+
+static void bglink_register_proto(struct bglink_proto *proto) __attribute__ ((unused)) ;
+static void bglink_unregister_proto(struct bglink_proto *proto) __attribute__ ((unused)) ;;
+static struct bglink_proto* bglink_find_proto(u16 proto)__attribute__ ((unused)) ;
+
+enum {
+  k_link_protocol_limit = 8   /*  we only actually have 'eth' and 'eth_reflector' at the moment, but we might get 'con' and more */
+};
+extern struct bglink_proto * proto_array[k_link_protocol_limit] ;
+static void bglink_register_proto(struct bglink_proto *proto)
+{
+  if( proto->lnk_proto < k_link_protocol_limit)
+    {
+      proto_array[proto->lnk_proto] = proto ;
+    }
+}
+
+static void bglink_unregister_proto(struct bglink_proto *proto)
+{
+  if( proto->lnk_proto < k_link_protocol_limit)
+    {
+      proto_array[proto->lnk_proto] = NULL ;
+    }
+}
+
+static struct bglink_proto* bglink_find_proto(u16 proto)
+{
+    return proto_array[proto & (k_link_protocol_limit-1)] ;
+}
+
+
+#if 0
+/*
+ * Here are some thoughts on how we might better consolidate link headers
+ * for the col and torus.  The idea is that there's an 8-byte packet header
+ * that must be sent (at least) once per packet, and an 8-byte fragment header
+ * that has to be included with every fragment.  For the col we can include
+ * both headers in every fragment.  For the torus, there's not room to send
+ * the packet header in every fragment, so we'd have to send it once as part
+ * of the payload in the first fragment (as we're doing now anyway).
+ * The various structures might look something like:
+ */
+
+struct pkt_hdr {
+    u32 lnk_proto    : 8;
+    u32 dst_key      : 24;
+    u16 len;
+    u16 private;
+} __attribute__((packed));
+
+struct frag_hdr {
+    u32 offset;
+    u32 conn_id      : 8;
+    u32 src_key      : 24;
+} __attribute__((packed));
+
+struct frag_hdr_col {
+    struct pkt_hdr pkt;
+    struct frag_hdr frag;
+} __attribute__((packed));
+
+struct frag_hdr_torus {
+    union torus_fifo_hw_header fifo;
+    struct frag_hdr frag;
+} __attribute__((packed));
+#endif
+
+#endif /* !__DRIVERS__BLUEGENE__LINK_H__ */
diff --git a/drivers/net/bgp_collective/bgnet.c b/drivers/net/bgp_collective/bgnet.c
new file mode 100644
index 0000000..21602ed
--- /dev/null
+++ b/drivers/net/bgp_collective/bgnet.c
@@ -0,0 +1,835 @@
+/*********************************************************************
+ *
+ * Description: Blue Gene driver exposing col and torus as a NIC
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors:
+ * Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/workqueue.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/bgp_personality.h>
+#include <asm/delay.h>
+
+#include <asm/bluegene.h>
+
+#include "bglink.h"
+#include "bgnet.h"
+#include "bgcol.h"
+/* #include "bgtor.h" */
+
+
+/**********************************************************************
+ *                           defines
+ **********************************************************************/
+
+#define DRV_NAME	"bgnet"
+#define DRV_VERSION	"0.5"
+#define DRV_DESC	"Blue Gene NIC (IBM)"
+
+MODULE_DESCRIPTION(DRV_DESC);
+MODULE_AUTHOR("IBM");
+
+/* #define TRUST_TREE_CRC */
+
+#include <linux/KernelFxLog.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+
+#define XTRACEN(i,x...)
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...) { printk(KERN_EMERG x) ; }
+#define TRACE1(x...) { printk(KERN_EMERG x) ; }
+#define TRACE2(x...) { printk(KERN_EMERG x) ; }
+#define TRACEN(i,x...) { printk(KERN_EMERG x) ; }
+#define TRACED(x...) { printk(KERN_EMERG x) ; }
+#elif  defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+#define TRACE(x...)    KernelFxLog(bgcol_debug_tracemask & k_t_general,x)
+#define TRACE1(x...)   KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...)   KernelFxLog(bgcol_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (i),x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+/*  An IPv4 address for slotting into a trace message */
+#define NIPQ(X) ((X)>>24)&0xff,((X)>>16)&0xff,((X)>>8)&0xff,(X)&0xff
+
+#define BGNET_FRAG_MTU		240
+#define BGNET_MAX_MTU		(BGNET_FRAG_MTU * 254)
+#define BGNET_DEFAULT_MTU	ETH_DATA_LEN
+
+
+static BGP_Personality_t bgnet_personality;
+/* static struct net_device *static_dev ; */
+
+/* static struct bglink_proto bgnet_lnk; */
+
+/* static DEFINE_SPINLOCK(bgnet_lock); */
+static LIST_HEAD(bgnet_list);
+
+struct skb_cb_lnk {
+    struct bglink_hdr_col lnkhdr;
+    union bgcol_header dest;
+};
+
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int x, unsigned int y, unsigned int z) ;
+
+/**********************************************************************
+ *                         Linux module
+ **********************************************************************/
+
+MODULE_DESCRIPTION("BlueGene Ethernet driver");
+MODULE_LICENSE("GPL");
+
+int bgnic_driverparm = 0 ;
+
+static void dumpmem(const void *address, unsigned int length, const char * label)
+  {
+    int x ;
+    TRACEN(k_t_fifocontents,"Memory dump, length=%d: %s",length,label) ;
+    if( length > 256 ) {
+      length = 256 ;
+    }
+    for (x=0;x<length;x+=32)
+      {
+        int *v = (int *)(address+x) ;
+        TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x",
+            v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+            ) ;
+      }
+  }
+
+
+/**********************************************************************
+ *                   Linux' packet and skb management
+ **********************************************************************/
+
+
+static int bgnet_open(struct net_device* dev)
+{
+     struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev);
+     bgcol_eth_up(bgnet->bgcol) ; /* Indicate that we want to operate as ethernet */
+
+/*     bgcol_enable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+
+    TRACEN(k_t_napi,"netif_start_queue(dev=%p)",dev) ;
+    netif_start_queue(dev);
+
+    return 0;
+}
+
+static int bgnet_stop(struct net_device* dev)
+{
+    struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev);
+    bgcol_eth_down(bgnet->bgcol) ; /* Indicate that we want to stop operating as ethernet */
+
+    TRACEN(k_t_napi,"netif_stop_queue(dev=%p)",dev) ;
+    netif_stop_queue(dev);
+/*     bgcol_disable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+/*     bgcol_disable_inj_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+
+    return 0;
+}
+
+
+static int bgnet_change_mtu(struct net_device *dev, int new_mtu)
+{
+  struct bgnet_dev *bgnet = netdev_priv(dev);
+    if (new_mtu < 60 || new_mtu > BGNET_MAX_MTU )
+	return -EINVAL;
+    dev->mtu = new_mtu;
+    bgcol_set_mtu(bgnet->bgcol, new_mtu+sizeof(struct ethhdr)) ;
+    return 0;
+}
+
+
+static inline void stamp_checksum_place_in_skb(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+        unsigned int eth_proto = eth->h_proto ;
+        struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+        /* unsigned int iphlen = 4*iph->ihl ; */
+        /* struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); */
+        /* struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) ); */
+        unsigned int ip_proto = iph->protocol ;
+        skb->csum_start = skb_transport_header(skb) - skb->head;
+
+        if( eth_proto == ETH_P_IP) {
+        	if( ip_proto == IPPROTO_TCP) skb->csum_offset = offsetof(struct tcphdr, check);
+        	else if( ip_proto == IPPROTO_UDP) skb->csum_offset = offsetof(struct udphdr, check);
+        }
+
+}
+
+/*
+ * The hardware data rate on 'collective' is 6 bits/cycle, i.e. 5100Mb/s .
+ * We carry 240 bytes of payload in each 256 byte packet, and there are some bytes of 'overhead' as well
+ * (CRC, opcode, and a few others); giving a 'peak performance' TCP/IP data rate of a little under 4781 Mb/s .
+ * The 'collective' hardware should be able to do this in both directions simultaneously.
+ *
+ * Driving data into the compute fabric from the 10gE link can achieve more or less this, by using one core as
+ * interrupt handler for the 10gE and another core as interrupt handler for the collective, if you run (say)
+ * 16 TCP/IP sessions through the 10gE and the IO node, one to each compute node in the PSET.
+ *
+ * Driving data out of the compute fabric and into the 10gE in the normal way for linux device drivers causes
+ * the core handling the collective interrupt to go 100% busy; there are not enough cycles to drain the collective
+ * FIFO and also go through the linux networking stack. I have seen about 4Gb/s this way.
+ * To get the last 15% or so, it seems necessary to have more than one core helping with this work.
+ *
+ * I'm trying to do this by having one core handle the 'collective' interrupt and drain the FIFO, and then
+ * hand the sk_buff off to another core via a 'work queue', so that this second core can drive the linux
+ * network stack.
+ *
+ * I haven't measured the simultaneous-bidirectional data rate capability.
+ *
+ */
+static int bgnet_receive(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto* proto)
+{
+  TRACE("(>) skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
+  if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
+  {
+    struct net_device *dev = (struct net_device*)proto->private;
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+/*     struct net_device *dev = (struct net_device*)((void *)bgnet - */
+/*                                                   netdev_priv(NULL)); */
+
+    TRACE("bgnet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
+          skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
+           lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
+
+/*     if (skb->len % BGNET_FRAG_MTU != 0) */
+/*       printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
+
+    /* skb_pull and trim check for over/underruns. For 0 size the
+     * add/subtract is the same as a test */
+    __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+    __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+
+    if (lnkhdr->src_key == bgnet->bgcol_vector) {
+        /* drop ether packets that are from ourselves */
+        /* bg tree device sends packets to itself when broadcasting */
+        kfree_skb(skb);
+        return 0;
+    }
+
+     /* dump_skb(skb); */
+
+    dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
+
+    skb->dev = dev;
+    skb->protocol = eth_type_trans(skb, dev);
+
+    if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+    stamp_checksum_place_in_skb(skb) ;
+
+/* #if defined(TRUST_TREE_CRC) */
+/*     skb->ip_summed = CHECKSUM_PARTIAL ; // Frame was checked by CRC, but we would need a checksum if it is being forwarded off the BGP fabric */
+/* //    // Packets from tree-local addresses have been verified by tree hardware */
+/* //      { */
+/* //        struct ethhdr *eth = (struct ethhdr *)skb->data; */
+/* //        if (bgnet->eth_mask == 0 || */
+/* //            ((bgnet->eth_mask & *(unsigned int *)(&eth->h_source[0])) == */
+/* //             (bgnet->eth_local))) */
+/* //          { */
+/* //               skb->ip_summed = CHECKSUM_UNNECESSARY ; */
+/* //        } */
+/* //        else */
+/* //          { */
+/* //            skb->ip_summed = CHECKSUM_NONE ; */
+/* //          } */
+/* //      } */
+/* #endif */
+
+    TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
+        skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
+    dumpmem(skb->data,skb->len,"Frame after stripping header") ;
+    dev->last_rx = jiffies;
+    bgnet->stats.rx_packets++;
+    bgnet->stats.rx_bytes += skb->len;
+
+    TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+        bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
+/*     TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
+/*     if( k_deliver_via_workqueue &&  bgnet->bgcol->deliver_via_workqueue ) */
+/* 	    { */
+/* 		  bgnet_deliver_via_workqueue(skb) ; */
+/* 	    } */
+/*     else */
+/* 	    { */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+    if( bgcol_diagnostic_use_napi)
+	    {
+			    {
+				    TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
+				    netif_receive_skb(skb) ;
+			    }
+	    }
+    else
+	    {
+		    netif_rx(skb);
+	    }
+#else
+    netif_rx(skb);
+#endif
+/* 	    } */
+    TRACE("bgnet_receive after-netif-rx  bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+        bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+
+  }
+  TRACE("(<)") ;
+
+    return 0;
+}
+
+static int bgnet_receive_trimmed(struct bg_col *bgcol, struct sk_buff *skb,  struct bglink_proto* proto, unsigned int src_key )
+{
+  TRACE("(>) skb=%p proto=%p", skb,proto) ;
+  if( skb != NULL && proto != NULL && -1)
+  {
+    struct net_device *dev = (struct net_device*)proto->private;
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+/*     struct net_device *dev = (struct net_device*)((void *)bgnet - */
+/*                                                   netdev_priv(NULL)); */
+
+    TRACE("bgnet rcvd pkt: data=%p, len=%d",
+          skb->data, skb->len);
+    if( src_key != bgnet->bgcol_vector)
+	    {
+		    dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
+
+		    skb->dev = dev;
+		    skb->protocol = eth_type_trans(skb, dev);
+
+		    if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+		    stamp_checksum_place_in_skb(skb) ;
+
+
+		    TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
+			skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
+		    dumpmem(skb->data,skb->len,"Frame after stripping header") ;
+		    dev->last_rx = jiffies;
+		    bgnet->stats.rx_packets++;
+		    bgnet->stats.rx_bytes += skb->len;
+
+		    TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+			bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
+		/*     TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
+		#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+		    if( bgcol_diagnostic_use_napi)
+			    {
+					    {
+						    TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
+						    netif_receive_skb(skb) ;
+					    }
+			    }
+		    else
+			    {
+				    netif_rx(skb);
+			    }
+		#else
+		    netif_rx_ni(skb); // In a workqueue handler ...
+		#endif
+		    TRACE("bgnet_receive after-netif-rx  bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+			bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+	  }
+    else
+  	  {
+  		   /*   a discardable self-send */
+  		  dev_kfree_skb(skb) ;
+  	  }
+
+  }
+  TRACE("(<)") ;
+
+    return 0;
+}
+
+
+/*  A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
+static int col_reflect(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr,
+       struct bglink_proto* proto)
+{
+  TRACE("(>) col_reflect skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
+  if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
+  {
+    struct net_device *dev = (struct net_device*)proto->private;
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+
+
+    TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
+    skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
+     lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
+
+/*     if (skb->len % BGNET_FRAG_MTU != 0) */
+/*   printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
+
+    /* skb_pull and trim check for over/underruns. For 0 size the
+     * add/subtract is the same as a test */
+    __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+    __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+     /*  A 'broadcast' packet needs delivering locally as well as reflecting */
+      {
+        struct ethhdr *eth = (struct ethhdr *)skb->data;
+        if (is_broadcast_ether_addr(eth->h_dest)) {
+          struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
+          if( localskb )
+            {
+              dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
+              localskb->dev = dev;
+              localskb->protocol = eth_type_trans(localskb, dev);
+
+              localskb->ip_summed = CHECKSUM_UNNECESSARY ;  /*  Packet was from tree, h/w verified it */
+
+              TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
+                  localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
+              dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
+              dev->last_rx = jiffies;
+              bgnet->stats.rx_packets++;
+              bgnet->stats.rx_bytes += localskb->len;
+              TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+                  bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+/*               TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+		    if( bgcol_diagnostic_use_napi)
+			    {
+				    TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
+				    netif_receive_skb(localskb) ;
+			    }
+		    else
+			    {
+				    netif_rx(localskb);
+			    }
+#else
+              netif_rx(localskb) ;
+#endif
+             TRACE("col_reflect after-netif-rx  bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+                  bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+          }
+        }
+      }
+
+
+     /* dump_skb(skb); */
+    col_start_xmit(skb, dev) ;
+  }
+
+  TRACE("(<) col_reflect") ;
+
+    return 0;
+}
+
+/*  A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
+static int col_reflect_trimmed(struct bg_col *bgcol, struct sk_buff *skb,
+       struct bglink_proto* proto, unsigned int src_key )
+{
+  TRACE("(>) col_reflect skb=%p proto=%p", skb,proto) ;
+  if( skb != NULL && proto != NULL && -1 != (int) proto )
+  {
+    struct net_device *dev = (struct net_device*)proto->private;
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+
+
+    TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d",
+    skb->data, skb->len);
+
+
+     /*  A 'broadcast' packet needs delivering locally as well as reflecting */
+      {
+        struct ethhdr *eth = (struct ethhdr *)skb->data;
+        if (is_broadcast_ether_addr(eth->h_dest)) {
+          struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
+          if( localskb )
+            {
+              dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
+              localskb->dev = dev;
+              localskb->protocol = eth_type_trans(localskb, dev);
+
+              localskb->ip_summed = CHECKSUM_UNNECESSARY ;  /*  Packet was from tree, h/w verified it */
+
+              TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
+                  localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
+              dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
+              dev->last_rx = jiffies;
+              bgnet->stats.rx_packets++;
+              bgnet->stats.rx_bytes += localskb->len;
+              TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+                  bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+/*               TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+		    if( bgcol_diagnostic_use_napi)
+			    {
+				    TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
+				    netif_receive_skb(localskb) ;
+			    }
+		    else
+			    {
+				    netif_rx(localskb);
+			    }
+#else
+              netif_rx(localskb) ;
+#endif
+             TRACE("col_reflect after-netif-rx  bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+                  bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+          }
+        }
+      }
+
+
+     /* dump_skb(skb); */
+    col_start_xmit(skb, dev) ;
+  }
+
+  TRACE("(<) col_reflect") ;
+
+    return 0;
+}
+
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void bgnet_poll(struct net_device *dev)
+{
+    /* no-op; packets are fed by the col device */
+}
+#endif
+
+static inline int is_torus_ether_addr(const u8 *addr)
+{
+    return ((addr[0] & 0x7) == 0x6);
+}
+
+
+unsigned int find_xyz_address(unsigned int ip) ;
+
+
+static int bgnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+  col_start_xmit(skb, dev) ;
+  return 0 ;
+}
+
+static void bgnet_uninit(struct net_device *dev)
+{
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+
+	bglink_unregister_proto(&bgnet->lnk);
+	bglink_unregister_proto(&bgnet->lnkreflect);
+
+}
+
+static struct net_device_stats *bgnet_get_stats(struct net_device *dev)
+{
+    struct bgnet_dev* bgnet = netdev_priv(dev);
+
+    return &bgnet->stats;
+}
+
+
+static int bgnet_set_mac_addr(struct net_device* netDev,
+			      void* p)
+{
+       struct sockaddr* addr = p;
+
+        if (!is_valid_ether_addr(addr->sa_data))
+                return -EADDRNOTAVAIL;
+
+        memcpy(netDev->dev_addr, addr->sa_data, netDev->addr_len);
+
+	return 0;
+}
+
+
+static int bgnet_set_config(struct net_device* netDev,
+			    struct ifmap* map)
+{
+	int rc = 0;
+	struct bgnet_dev* bgnet = netdev_priv(netDev);
+
+	 /*  Set this with ifconfig <interface> port <collective virtual channel> */
+	if (map->port)
+		bgnet->bgcol_channel = map->port;
+
+	 /*  Set this with ifconfig <interface> io_addr <collective route> */
+	if (map->base_addr)
+		bgnet->bgcol_route = map->base_addr;
+
+	return rc;
+}
+
+
+static int  bgnet_init(struct net_device *dev)
+{
+    struct bgnet_dev *bgnet = netdev_priv(dev);
+    TRACE("(>) bgnet_init") ;
+    bgnet->bgcol_route = 0 /*15*/;
+#define ETH_COL_CHANNEL 0
+    bgnet->bgcol_channel = 0 ;
+/*     bgnet->bgcol_channel = (bgnet_personality.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0; */
+/*     bgnet->eth_bridge_vector = -1; */
+/*     bgnet->link_protocol = BGLINK_P_NET; */
+/*     bgnet->net_device = dev; */
+
+    bgnet->bgcol = bgcol_get_dev();
+    TRACE("(=) bgnet->bgcol=%p",bgnet->bgcol) ;
+
+    if (!bgnet->bgcol)
+	return -1;
+
+    bgnet->bgcol->bgnet_channel = bgnet->bgcol_channel ;
+/*     bgnet->phandle_tree = 3; */
+/*     bgnet->phandle_torus = 0; */
+/* //    bgnet->tree_route = 15;  // 15 is 'partition flood' */
+/*     bgnet->tree_route = 0 ;    // 0 is 'compute to IO' or 'IO to compute' */
+/*     bgnet->tree_channel = BGNET_TREE_CHANNEL ; */
+/*     bgnet->eth_mask = 0; */
+/* //    bgnet->eth_bridge_vector = 0; // route through the I/O node? (personality.Network_Config.IONodeRank) */
+/*     bgnet->eth_bridge_vector = personality.Network_Config.IOnodeRank; // route through the I/O node? (personality.Network_Config.IONodeRank) */
+    bgnet->eth_bridge_vector = bgnet_personality.Network_Config.IOnodeRank;  /*  route through the I/O node? (personality.Network_Config.IONodeRank) */
+    bgnet->bgcol_protocol = 1;
+    bgnet->bgcol_reflector_protocol = 2 ;  /*  CN requests reflection from ION */
+
+    if( bgnet_personality.Network_Config.Rank != bgnet_personality.Network_Config.IOnodeRank)
+      {
+        // On compute nodes, run a global interrupt barrier here with a view to aligning the printk timestamps
+        bgcol_align_timebase() ;
+      }
+
+/*     bgnet->i_am_ionode = ( personality.Network_Config.IOnodeRank == personality.Network_Config.Rank) ; */
+#if 0
+    p = get_property(np, "local-mac-address", NULL);
+    if (p == NULL) {
+        printk(KERN_ERR "%s: Can't find local-mac-address property\n",
+               np->full_name);
+        goto err;
+    }
+    memcpy(dev->dev_addr, p, 6);
+#endif
+    dev->dev_addr[0] = 0x00;
+    dev->dev_addr[1] = 0x80;
+    *((unsigned*)(&dev->dev_addr[2])) = 0x46000000u | bgnet_personality.Network_Config.Rank;  /*  why 0x46yyyyyy ??? */
+
+    bgnet->bgcol_vector = *(unsigned int *)(&dev->dev_addr[2]);
+    bgnet->eth_local = bgnet->eth_mask & *(unsigned int *)&dev->dev_addr[0];
+
+/*     spin_lock(&bgnet_lock); */
+    if (list_empty(&bgnet_list)) {
+	 /*  register with col */
+/* 	bgnet_lnk.lnk_proto = bgnet->link_protocol; */
+/* 	bgnet_lnk.receive_from_self = 0; */
+/* 	bgnet_lnk.col_rcv = col_receive; */
+/* 	bgnet_lnk.col_flush = col_flush; */
+/* 	bgnet_lnk.torus_rcv = torus_receive; */
+/* 	bglink_register_proto(&bgnet_lnk); */
+	    bgnet->lnk.lnk_proto = bgnet->bgcol_protocol;
+	    bgnet->lnk.col_rcv = bgnet_receive;
+	    bgnet->lnk.col_rcv_trimmed = bgnet_receive_trimmed;
+	    bgnet->lnk.private = dev;
+	    bglink_register_proto(&bgnet->lnk);
+
+	    bgnet->lnkreflect.lnk_proto = bgnet->bgcol_reflector_protocol;
+	    bgnet->lnkreflect.col_rcv = col_reflect;
+	    bgnet->lnkreflect.col_rcv_trimmed = col_reflect_trimmed;
+	    bgnet->lnkreflect.private = dev;
+	    bglink_register_proto(&bgnet->lnkreflect);
+
+	 /*  Hook for the tree interrupt handler to find the 'bgnet' */
+	    bgnet->bgcol->bgnet = bgnet ;
+    }
+/*     list_add_rcu(&bgnet->list, &bgnet_list); */
+/*  */
+/*     spin_unlock(&bgnet_lock); */
+/*  */
+/*     skb_queue_head_init(&bgnet->pending_skb_list); */
+    bgcol_enable_interrupts(bgnet->bgcol) ;   /*  Should be able to run tree interrupts now */
+
+
+    TRACE("(<) bgnet_init") ;
+    return 0;
+}
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+static int bgnet_poll_napi(struct napi_struct * napi, int budget)
+{
+	TRACEN(k_t_napi,"(>) napi=%p budget%d",napi,budget) ;
+	bgcol_duplex_slih(0) ;
+	TRACEN(k_t_napi,"(<)") ;
+	return 0 ;
+}
+#endif
+
+#if defined(HAVE_NET_DEVICE_OPS)
+static const struct net_device_ops netdev_ops = {
+    .ndo_change_mtu = bgnet_change_mtu ,
+    .ndo_get_stats = bgnet_get_stats ,
+    .ndo_start_xmit = bgnet_start_xmit ,
+    .ndo_init = bgnet_init ,
+    .ndo_uninit = bgnet_uninit ,
+    .ndo_open = bgnet_open ,
+    .ndo_stop = bgnet_stop ,
+    .ndo_set_config = bgnet_set_config ,
+    .ndo_set_mac_address = bgnet_set_mac_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+    .ndo_poll_controller  = bgnet_poll,
+#endif
+};
+#endif
+static int __init
+bgnet_module_init(void)
+{
+    struct bgnet_dev *bgnet;
+    struct net_device *dev;
+
+    TRACEN(k_t_general, "(>) bgnet_module_init") ;
+    dev = alloc_etherdev(sizeof(struct bgnet_dev));
+    TRACEN(k_t_general, "(=) bgnet_module_init dev=%p", dev) ;
+    if (!dev)
+	return -ENOMEM;
+
+/*     SET_MODULE_OWNER(dev); // Anachronism */
+
+     /*  Read personality. */
+    bluegene_getPersonality((void*) &bgnet_personality, sizeof(bgnet_personality));
+    bgnet = (struct bgnet_dev*) netdev_priv(dev);
+    memset(bgnet, 0, sizeof(*bgnet));
+    bgcol_module_init() ;
+/*     bgnet_init(dev); */
+
+/*     // Set the MAC address for this interface. */
+/*     if (bluegene_isIONode()) { */
+/* 	unsigned char ipOctet2 = (bgnet_personality.Ethernet_Config.IPAddress.octet[13] + 1) & 0xfc; */
+/*  */
+/* 	dev->dev_addr[0] = ipOctet2 | 2; */
+/* 	dev->dev_addr[1] = bgnet_personality.Ethernet_Config.IPAddress.octet[14]; */
+/* 	dev->dev_addr[2] = bgnet_personality.Ethernet_Config.IPAddress.octet[15]; */
+/* 	dev->dev_addr[3] = ((bgnet_personality.Network_Config.Rank >> 16) & 0x3f) | (ipOctet2 << 6); */
+/* 	dev->dev_addr[4] = (unsigned char) ((bgnet_personality.Network_Config.Rank >> 8)); */
+/* 	dev->dev_addr[5] = (unsigned char) bgnet_personality.Network_Config.Rank; */
+/*     } else */
+/* 	memcpy(dev->dev_addr, bgnet_personality.Ethernet_Config.EmacID, sizeof(dev->dev_addr)); */
+
+#if defined(HAVE_NET_DEVICE_OPS)
+    dev->netdev_ops = &netdev_ops ;
+#else
+    dev->init			= bgnet_init;
+    dev->uninit			= bgnet_uninit;
+    dev->get_stats	        = bgnet_get_stats;
+    dev->hard_start_xmit	= bgnet_start_xmit;
+    dev->change_mtu		= bgnet_change_mtu;
+    dev->open			= bgnet_open;
+    dev->stop			= bgnet_stop;
+    dev->set_config		= bgnet_set_config;
+    dev->set_mac_address	= bgnet_set_mac_addr;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+    dev->poll_controller	= bgnet_poll;
+#endif
+#endif
+    dev->mtu      = BGNET_DEFAULT_MTU;
+
+/*  Tried turning checksum generation off, but this resulted in packets routed off the BGP not having checksums */
+/*  and lack of interoperability with front-end nodes */
+/*  (try CHECKSUM_PARTIAL above to see if the TOMAL will generate an IP checksum in this circumstance) */
+    dev->features  = k_trust_collective_crc
+                   ? (NETIF_F_HIGHDMA | NETIF_F_NO_CSUM)
+                   :  NETIF_F_HIGHDMA ;
+/*     if( k_trust_collective_crc) */
+/* 	    { */
+/* 		    dev->features  = NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM ; */
+/* 	    } */
+/*     else */
+/* 	    { */
+/* 		    dev->features = NETIF_F_HIGHDMA ; */
+/* 	    } */
+
+/* #if defined(TRUST_TREE_CRC) */
+/*     dev->features               = NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA ; */
+/* #endif */
+/*     dev->features |= NETIF_F_NO_CSUM; */
+
+    TRACEN(k_t_general,"(=) dev->name=%s",
+        dev->name
+        ) ;
+    {
+      int rc = register_netdev(dev) ;
+      TRACEN(k_t_general, "(=) bgnet_module_init register_netdev rc=%d", rc) ;
+      if( rc != 0 )
+	goto err;
+    }
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+    netif_napi_add(dev,&bgnet->napi, bgnet_poll_napi, k_collective_budget) ;
+    napi_enable(&bgnet->napi) ;
+#endif
+     /* increase header size to fit torus hardware header */
+/*     if (bgnet->torus) */
+/* 	dev->hard_header_len	+= 16; */
+
+    if (bgnet->eth_bridge_vector != -1)
+        printk(KERN_INFO "      bridge 0x%06x\n", bgnet->eth_bridge_vector);
+
+    TRACEN(k_t_general, "(<) bgnet_module_init rc=0") ;
+    return 0;
+
+ err:
+    free_netdev(dev);
+    TRACEN(k_t_general, "(<) bgnet_module_init err rc=-1") ;
+    return -1;
+}
+
+
+/* static void __exit */
+/* bgnet_module_exit (void) */
+/* { */
+/* 	return; */
+/* } */
+
+module_init(bgnet_module_init);
+/* module_exit(bgnet_module_exit); */
diff --git a/drivers/net/bgp_collective/bgnet.h b/drivers/net/bgp_collective/bgnet.h
new file mode 100644
index 0000000..1ba805f
--- /dev/null
+++ b/drivers/net/bgp_collective/bgnet.h
@@ -0,0 +1,151 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Volkmar Uhlig <vuhlig@us.ibm.com>
+ *          Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description:   definitions for BG networks
+ *
+ *
+ ********************************************************************/
+
+#ifndef __DRIVERS__NET__BLUEGENE__BGNET_H__
+#define __DRIVERS__NET__BLUEGENE__BGNET_H__
+
+/* static inline unsigned int BG_IRQ(unsigned int group, unsigned int irq) */
+/* { */
+/* 	return ((group) << 5 | (irq)) ; */
+/* } */
+/* #define BG_IRQ(group, irq)	((group) << 5 | (irq)) */
+
+
+/**********************************************************************
+ * link layer
+ **********************************************************************/
+
+/* enum { */
+/* 	BGNET_P_ETH0 = 1 , */
+/* 	BGNET_P_ETH1 = 2 , */
+/* 	BGNET_P_ETH2 = 3 , */
+/* 	BGNET_P_ETH3 = 4 , */
+/* 	BGNET_P_ETH4 = 5 , */
+/* 	BGNET_P_ETH5 = 6 , */
+/* 	BGNET_P_ETH6 = 7 , */
+/* 	BGNET_P_ETH7 = 8 , */
+/* 	BGNET_P_ETH8 = 9 , */
+/* 	BGNET_P_LAST_ETH = BGNET_P_ETH8 , */
+/* 	BGNET_P_CONSOLE = 20 */
+/* }; */
+/* //#define BGNET_P_ETH0		1 */
+/* //#define BGNET_P_ETH1            2 */
+/* //#define BGNET_P_ETH2            3 */
+/* //#define BGNET_P_ETH3            4 */
+/* //#define BGNET_P_ETH4            5 */
+/* //#define BGNET_P_ETH5            6 */
+/* //#define BGNET_P_ETH6            7 */
+/* //#define BGNET_P_ETH7            8 */
+/* //#define BGNET_P_ETH8            9 */
+/* //#define BGNET_P_LAST_ETH        BGNET_P_ETH8 */
+/* // */
+/* //#define BGNET_P_CONSOLE		20 */
+
+/* Facility for using multiple cores in support of 'collective', only make it happen if multiple cores are available ... */
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC)
+#define COLLECTIVE_TREE_AFFINITY
+#endif
+
+#if defined(COLLECTIVE_TREE_AFFINITY)
+/* On IO nodes, 10gE will be using core 0. On Compute nodes, torus will be using core 2. So exploit cores 1 and 3 for collective ... */
+enum {
+	k_TreeAffinityCPU = 1 ,
+	k_WorkqueueDeliveryCPU = 3
+};
+#else
+enum {
+	k_TreeAffinityCPU = 0 ,
+	k_WorkqueueDeliveryCPU = 0
+};
+#endif
+
+enum {
+	BGNET_FRAG_MTU = 240 ,
+/* 	BGNET_MAX_MTU = BGNET_FRAG_MTU * 128 , */
+	BGNET_DEFAULT_MTU = ETH_DATA_LEN
+};
+/* #define BGNET_FRAG_MTU		240 */
+/* #define BGNET_MAX_MTU		(BGNET_FRAG_MTU * 128) */
+/* //#define BGNET_DEFAULT_MTU	(BGNET_FRAG_MTU * 30 - 12) */
+/* #define BGNET_DEFAULT_MTU	ETH_DATA_LEN */
+
+/* // Which bgcol channel to use for the driver */
+/* #define BGNET_TREE_CHANNEL 0 */
+
+enum {
+	k_trust_collective_crc =
+#if defined(BGP_COLLECTIVE_IP_CHECKSUM)
+		0
+#else
+		1
+#endif
+		/*  Whether the IP layer should trust the BGP hardware CRC on the collective network */
+};
+
+enum {
+	k_collective_budget = 1000  /*  Number of frames we are willing to collect from the tree before we 'yield' */
+};
+
+enum {
+	k_deliver_via_workqueue = 1 /* Whether to deliver via a work queue (on another core) */
+};
+struct bgnet_dev
+{
+    struct bg_col *bgcol;
+    unsigned int bgcol_route;
+    unsigned int bgcol_channel;
+    unsigned short bgcol_protocol;
+    unsigned short bgcol_reflector_protocol ;
+    unsigned int bgcol_vector;
+    unsigned int eth_mask;
+    unsigned int eth_local;
+    unsigned int eth_bridge_vector;
+    struct bglink_proto lnk;
+    struct bglink_proto lnkreflect;
+    struct net_device_stats stats;
+    u32 phandle_bgcol;
+    u32 phandle_torus;
+    struct sk_buff_head xmit_list ;   /* List of skb's to be sent */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+    struct napi_struct napi ;
+#endif
+/*     unsigned int i_am_ionode ; */
+};
+
+extern inline unsigned int eth_to_key(char *addr)
+{
+    unsigned int key;
+    if (is_broadcast_ether_addr(addr))
+        key = ~0U;
+    else
+        key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0);
+    return key;
+}
+
+
+/* extern struct list_head bglink_proto; */
+/* extern struct bglink_proto bgnet_eth; */
+
+#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */
diff --git a/drivers/net/bgp_collective/bgp_dcr.h b/drivers/net/bgp_collective/bgp_dcr.h
new file mode 100644
index 0000000..121553d
--- /dev/null
+++ b/drivers/net/bgp_collective/bgp_dcr.h
@@ -0,0 +1,1041 @@
+/*********************************************************************
+ *                
+ * Description:   BGP DCR map (copied from bpcore)
+ *                
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *                
+ ********************************************************************/
+
+#ifndef _BGP_DCR_H_
+#define _BGP_DCR_H_
+
+#define _BN(b)    ((1<<(31-(b))))
+#define _B1(b,x)  (((x)&0x1)<<(31-(b)))
+#define _B2(b,x)  (((x)&0x3)<<(31-(b)))
+#define _B3(b,x)  (((x)&0x7)<<(31-(b)))
+#define _B4(b,x)  (((x)&0xF)<<(31-(b)))
+#define _B5(b,x)  (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x)  (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x)  (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x)  (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x)  (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#if 0
+#define _BGP_DCR_BIC        (0x000)                      /*  0x000-0x1ff: BIC (includes MCCU functionality) */
+#define _BGP_DCR_BIC_END    (_BGP_DCR_BIC + 0x1FF)       /*  0x1ff: BIC (includes MCCU functionality) */
+
+#define _BGP_DCR_SERDES     (0x200)                      /*  0x200-0x3ff: Serdes Config */
+#define _BGP_DCR_SERDES_END (_BGP_DCR_SERDES + 0x1FF)    /*  0x3ff: Serdes Config End */
+
+#define _BGP_DCR_TEST       (0x400)                      /*  0x400-0x47f: Test Interface */
+#define _BGP_DCR_TEST_END   (_BGP_DCR_TEST + 0x07F)      /*  0x400-0x47f: Test Interface End */
+
+#define _BGP_DCR_L30        (0x500)                      /*  0x500-0x53f: L3-Cache 0 */
+#define _BGP_DCR_L30_END    (_BGP_DCR_L30 + 0x03F)       /*  0x53f: L3-Cache 0 End */
+
+#define _BGP_DCR_L31        (0x540)                      /*  0x540-0x57f: L3-Cache 1 */
+#define _BGP_DCR_L31_END    (_BGP_DCR_L31 + 0x03F)       /*  0x57f: L3-Cache 1 End */
+
+#define _BGP_DCR_XAUI       (0x580)                      /*  0x580-0x5bf: XAUI config */
+#define _BGP_DCR_XAUI_END   (_BGP_DCR_XAUI + 0x03F)      /*  0x5bf: XAUI config End */
+
+#define _BGP_DCR_SRAM       (0x610)                      /*  0x610-0x61f: SRAM unit (Includes Lockbox functionality) */
+#define _BGP_DCR_SRAM_END   (_BGP_DCR_SRAM + 0x00F)      /*  0x61f: SRAM unit (Includes Lockbox functionality) */
+
+#define _BGP_DCR_DEVBUS     (0x620)                      /*  0x620-0x62f: DevBus Arbiter */
+#define _BGP_DCR_DEVBUS_END (_BGP_DCR_DEVBUS + 0x00F)    /*  0x62f: DevBus Arbiter End */
+
+#define _BGP_DCR_NETBUS     (0x630)                      /*  0x630-0x63f: NetBus Arbiter */
+#define _BGP_DCR_NETBUS_END (_BGP_DCR_NETBUS + 0x00F)    /*  0x63f: NetBus Arbiter End */
+
+#define _BGP_DCR_DMAARB     (0x640)                      /*  0x640-0x64f: DMA arbiter (former PLB slave) */
+#define _BGP_DCR_DMAARB_END (_BGP_DCR_DMAARB + 0x00F)    /*  0x64f: DMA arbiter (former PLB slave) End */
+
+#define _BGP_DCR_DCRARB     (0x650)                      /*  0x650-0x65f: DCR arbiter */
+#define _BGP_DCR_DCRARB_END (_BGP_DCR_DCRARB + 0x00F)    /*  0x65f: DCR arbiter End */
+
+#define _BGP_DCR_GLOBINT     (0x660)                     /*  0x660-0x66F: Global Interrupts */
+#define _BGP_DCR_GLOBINT_END (_BGP_DCR_GLOBINT + 0x00F)  /*  0x66F: Global Interrupts End */
+
+#define _BGP_DCR_CLOCKSTOP     (0x670)                       /*  0x670-0x67F: Clock Stop */
+#define _BGP_DCR_CLOCKSTOP_END (_BGP_DCR_CLOCKSTOP + 0x00F)  /*  0x67F: Clock Stop End */
+
+#define _BGP_DCR_ENVMON      (0x680)                     /*  0x670-0x67F: Environmental Monitor */
+#define _BGP_DCR_ENVMON_END  (_BGP_DCR_ENVMON + 0x00F)   /*  0x67F: Env Mon End */
+
+#define _BGP_DCR_FPU        (0x700)                      /*  0x700-0x77f: Hummer3 00/01/10/11 */
+#define _BGP_DCR_FPU_END    (_BGP_DCR_FPU + 0x07F)       /*  0x77f: Hummer3 00/01/10/11 End */
+
+#define _BGP_DCR_L2         (0x780)                      /*  0x780-0x7ff: L2-Cache 00/01/10/11 */
+#define _BGP_DCR_L2_END     (_BGP_DCR_L2 + 0x07F)        /*  0x7ff: L2-Cache 00/01/10/11 End */
+
+#define _BGP_DCR_SNOOP      (0x800)                      /*  0x800-0xbff: Snoop 00/01/10/11 */
+#define _BGP_DCR_SNOOP0     (0x800)                      /*  0x800-0x8ff: Snoop 00 */
+#define _BGP_DCR_SNOOP1     (0x900)                      /*  0x900-0x9ff: Snoop 01 */
+#define _BGP_DCR_SNOOP2     (0xA00)                      /*  0xa00-0xaff: Snoop 10 */
+#define _BGP_DCR_SNOOP3     (0xB00)                      /*  0xb00-0xbff: Snoop 11 */
+#define _BGP_DCR_SNOOP_END  (_BGP_DCR_SNOOP + 0x3FF)     /*  0xbff: Snoop 00/01/10/11 End */
+
+#define _BGP_DCR_COL       (0xc00)                      /*  0xc00-0xc7f: Tree */
+#define _BGP_DCR_COL_END   (_BGP_DCR_COL + 0x07F)      /*  0xc7f: Tree End */
+
+#define _BGP_DCR_TORUS      (0xc80)                      /*  0xc80-0xcff: Torus */
+#define _BGP_DCR_TORUS_END  (_BGP_DCR_TORUS + 0x07F)     /*  0xcff: Torus End */
+
+#define _BGP_DCR_DMA        (0xd00)                      /*  0xd00-0xdff: DMA */
+#define _BGP_DCR_DMA_END    (_BGP_DCR_DMA + 0x0FF)       /*  0xdff: DMA End */
+
+#define _BGP_DCR_DDR0       (0xe00)                      /*  0xe00-0xeff: DDR controller 0 */
+#define _BGP_DCR_DDR0_END   (_BGP_DCR_DDR0 + 0x0FF)      /*  0xeff: DDR controller 0 End */
+
+#define _BGP_DCR_DDR1       (0xf00)                      /*  0xf00-0xfff: DDR controller 1 */
+#define _BGP_DCR_DDR1_END   (_BGP_DCR_DDR1 + 0x0FF)      /*  0xfff: DDR controller 1 End */
+
+#endif
+
+/*
+ * Tree
+ */
+
+#define _BGP_TRx_DI      (0x00)     /*  Offset from Tree VCx for Data   Injection   (WO,Quad) */
+#define _BGP_TRx_HI      (0x10)     /*  Offset from Tree VCx for Header Injection   (WO,Word) */
+#define _BGP_TRx_DR      (0x20)     /*  Offset from Tree VCx for Data   Reception   (RO,Quad) */
+#define _BGP_TRx_HR      (0x30)     /*  Offset from Tree VCx for Header Reception   (RO,Word) */
+#define _BGP_TRx_Sx      (0x40)     /*  Offset from Tree VCx for Status             (RO,Word) */
+#define _BGP_TRx_SO      (0x50)     /*  Offset from Tree VCx for Status of Other VC (RO,Word) */
+
+/*  Virtual Addresses for Tree VC0 */
+#define _BGP_TR0_DI    (_BGP_VA_COL0 | _BGP_TRx_DI)
+#define _BGP_TR0_HI    (_BGP_VA_COL0 | _BGP_TRx_HI)
+#define _BGP_TR0_DR    (_BGP_VA_COL0 | _BGP_TRx_DR)
+#define _BGP_TR0_HR    (_BGP_VA_COL0 | _BGP_TRx_HR)
+#define _BGP_TR0_S0    (_BGP_VA_COL0 | _BGP_TRx_Sx)
+#define _BGP_TR0_S1    (_BGP_VA_COL0 | _BGP_TRx_SO)
+
+/*  Virtual Addresses for Tree VC1 */
+#define _BGP_TR1_DI    (_BGP_VA_COL1 | _BGP_TRx_DI)
+#define _BGP_TR1_HI    (_BGP_VA_COL1 | _BGP_TRx_HI)
+#define _BGP_TR1_DR    (_BGP_VA_COL1 | _BGP_TRx_DR)
+#define _BGP_TR1_HR    (_BGP_VA_COL1 | _BGP_TRx_HR)
+#define _BGP_TR1_S1    (_BGP_VA_COL1 | _BGP_TRx_Sx)
+#define _BGP_TR1_S0    (_BGP_VA_COL1 | _BGP_TRx_SO)
+
+/*  Packet Payload: fixed size for all Tree packets */
+#define _BGP_COL_PKT_MAX_BYTES    (256)        /*  bytes in a tree packet */
+#define _BGP_COL_PKT_MAX_SHORT    (128)
+#define _BGP_COL_PKT_MAX_LONG      (64)
+#define _BGP_COL_PKT_MAX_LONGLONG  (32)
+#define _BGP_COL_PKT_MAX_QUADS     (16)        /*  quads in a tree packet */
+
+
+/*  Packet header */
+#define  _BGP_TR_HDR_CLASS(x)           _B4( 3,x)       /*   Packet class (virtual tree) */
+#define  _BGP_TR_HDR_P2P                _BN( 4)         /*   Point-to-point enable */
+#define  _BGP_TR_HDR_IRQ                _BN( 5)         /*   Interrupt request (at receiver) enable */
+#define  _BGP_TR_HDR_OPCODE(x)          _B3( 8,x)       /*   ALU opcode */
+#define    _BGP_TR_OP_NONE                0x0           /*     No operand.  Use for ordinary routed packets. */
+#define    _BGP_TR_OP_OR                  0x1           /*     Bitwise logical OR. */
+#define    _BGP_TR_OP_AND                 0x2           /*     Bitwise logical AND. */
+#define    _BGP_TR_OP_XOR                 0x3           /*     Bitwise logical XOR. */
+#define    _BGP_TR_OP_MAX                 0x5           /*     Unsigned integer maximum. */
+#define    _BGP_TR_OP_ADD                 0x6           /*     Unsigned integer addition. */
+#define  _BGP_TR_HDR_OPSIZE(x)          _B7(15,x)       /*   Operand size (# of 16-bit words minus 1) */
+#define  _BGP_TR_HDR_TAG(x)             _B14(29,x)      /*   User-specified tag (for ordinary routed packets only) */
+#define  _BGP_TR_HDR_NADDR(x)           _B24(29,x)      /*   Target address (for P2P packets only) */
+#define  _BGP_TR_HDR_CSUM(x)            _B2(31,x)       /*   Injection checksum mode */
+#define    _BGP_TR_CSUM_NONE              0x0           /*     Do not include packet in checksums. */
+#define    _BGP_TR_CSUM_SOME              0x1           /*     Include header in header checksum.  Include all but */
+                                                        /*      first quadword in payload checksum. */
+#define    _BGP_TR_CSUM_CFG               0x2           /*     Include header in header checksum.  Include all but */
+                                                        /*      specified number of 16-bit words in payload checksum. */
+#define    _BGP_TR_CSUM_ALL               0x3           /*     Include entire packet in checksums. */
+
+/*  Packet status */
+#define  _BGP_TR_STAT_IPY_CNT(x)        _B8( 7,x)       /*   Injection payload qword count */
+#define  _BGP_TR_STAT_IHD_CNT(x)        _B4(15,x)       /*   Injection header word count */
+#define  _BGP_TR_STAT_RPY_CNT(x)        _B8(23,x)       /*   Reception payload qword count */
+#define  _BGP_TR_STAT_IRQ               _BN(27)         /*   One or more reception headers with IRQ bit set */
+#define  _BGP_TR_STAT_RHD_CNT(x)        _B4(31,x)       /*   Reception header word count */
+
+/*  Tree Map of DCR Groupings */
+#define _BGP_DCR_TR_CLASS  (_BGP_DCR_COL + 0x00)       /*  Class Definition Registers (R/W) */
+#define _BGP_DCR_TR_DMA    (_BGP_DCR_COL + 0x0C)       /*  Network Port Diagnostic Memory Access Registers (R/W) */
+#define _BGP_DCR_TR_ARB    (_BGP_DCR_COL + 0x10)       /*  Arbiter Control Registers (R/W) */
+#define _BGP_DCR_TR_CH0    (_BGP_DCR_COL + 0x20)       /*  Channel 0 Control Registers (R/W) */
+#define _BGP_DCR_TR_CH1    (_BGP_DCR_COL + 0x28)       /*  Channel 1 Control Registers (R/W) */
+#define _BGP_DCR_TR_CH2    (_BGP_DCR_COL + 0x30)       /*  Channel 2 Control Registers (R/W) */
+#define _BGP_DCR_TR_GLOB   (_BGP_DCR_COL + 0x40)       /*  Global Registers (R/W) */
+#define _BGP_DCR_TR_REC    (_BGP_DCR_COL + 0x44)       /*  Processor Reception Registers (R/W) */
+#define _BGP_DCR_TR_INJ    (_BGP_DCR_COL + 0x48)       /*  Processor Injection Registers (R/W) */
+#define _BGP_DCR_TR_LCRC   (_BGP_DCR_COL + 0x50)       /*  Link CRC's */
+#define _BGP_DCR_TR_ERR    (_BGP_DCR_COL + 0x60)       /*  Internal error counters */
+
+
+/*  Tree Class Registers */
+/*  Note: each route descriptor register contains two class descriptors.  "LO" will refer to the lower-numbered */
+/*        of the two and "HI" will refer to the higher numbered. */
+#define _BGP_DCR_TR_CLASS_RDR0     (_BGP_DCR_TR_CLASS + 0x00)   /*  CLASS: Route Descriptor Register for classes 0,  1 */
+#define _BGP_DCR_TR_CLASS_RDR1     (_BGP_DCR_TR_CLASS + 0x01)   /*  CLASS: Route Descriptor Register for classes 2,  3 */
+#define _BGP_DCR_TR_CLASS_RDR2     (_BGP_DCR_TR_CLASS + 0x02)   /*  CLASS: Route Descriptor Register for classes 4,  5 */
+#define _BGP_DCR_TR_CLASS_RDR3     (_BGP_DCR_TR_CLASS + 0x03)   /*  CLASS: Route Descriptor Register for classes 6,  7 */
+#define _BGP_DCR_TR_CLASS_RDR4     (_BGP_DCR_TR_CLASS + 0x04)   /*  CLASS: Route Descriptor Register for classes 8,  9 */
+#define _BGP_DCR_TR_CLASS_RDR5     (_BGP_DCR_TR_CLASS + 0x05)   /*  CLASS: Route Descriptor Register for classes 10, 11 */
+#define _BGP_DCR_TR_CLASS_RDR6     (_BGP_DCR_TR_CLASS + 0x06)   /*  CLASS: Route Descriptor Register for classes 12, 13 */
+#define _BGP_DCR_TR_CLASS_RDR7     (_BGP_DCR_TR_CLASS + 0x07)   /*  CLASS: Route Descriptor Register for classes 14, 15 */
+#define  _TR_CLASS_RDR_LO_SRC2      _BN( 1)                     /*   Class low,  source channel 2 */
+#define  _TR_CLASS_RDR_LO_SRC1      _BN( 2)                     /*   Class low,  source channel 1 */
+#define  _TR_CLASS_RDR_LO_SRC0      _BN( 3)                     /*   Class low,  source channel 0 */
+#define  _TR_CLASS_RDR_LO_TGT2      _BN( 5)                     /*   Class low,  target channel 2 */
+#define  _TR_CLASS_RDR_LO_TGT1      _BN( 6)                     /*   Class low,  target channel 1 */
+#define  _TR_CLASS_RDR_LO_TGT0      _BN( 7)                     /*   Class low,  target channel 0 */
+#define  _TR_CLASS_RDR_LO_SRCL      _BN(14)                     /*   Class low,  source local client (injection) */
+#define  _TR_CLASS_RDR_LO_TGTL      _BN(15)                     /*   Class low,  target local client (reception) */
+#define  _TR_CLASS_RDR_HI_SRC2      _BN(17)                     /*   Class high, source channel 2 */
+#define  _TR_CLASS_RDR_HI_SRC1      _BN(18)                     /*   Class high, source channel 1 */
+#define  _TR_CLASS_RDR_HI_SRC0      _BN(19)                     /*   Class high, source channel 0 */
+#define  _TR_CLASS_RDR_HI_TGT2      _BN(21)                     /*   Class high, target channel 2 */
+#define  _TR_CLASS_RDR_HI_TGT1      _BN(22)                     /*   Class high, target channel 1 */
+#define  _TR_CLASS_RDR_HI_TGT0      _BN(23)                     /*   Class high, target channel 0 */
+#define  _TR_CLASS_RDR_HI_SRCL      _BN(30)                     /*   Class high, source local client (injection) */
+#define  _TR_CLASS_RDR_HI_TGTL      _BN(31)                     /*   Class high, target local client (reception) */
+#define _BGP_DCR_TR_CLASS_ISRA     (_BGP_DCR_TR_CLASS + 0x08)   /*  CLASS: Bits 0-31 of 64-bit idle pattern */
+#define _BGP_DCR_TR_CLASS_ISRB     (_BGP_DCR_TR_CLASS + 0x09)   /*  CLASS: Bits 32-63 of 64-bit idle pattern */
+
+/*  Tree Network Port Diagnostic Memory Access Registers */
+/*  Note: Diagnostic access to processor injection and reception fifos is through TR_REC and TR_INJ registers. */
+#define _BGP_DCR_TR_DMA_DMAA       (_BGP_DCR_TR_DMA + 0x00)    /*  DMA: Diagnostic SRAM address */
+#define  _TR_DMA_DMAA_TGT(x)        _B3(21,x)                  /*   Target */
+#define   _TR_DMAA_TGT_RCV0           0x0                      /*    Channel 0 receiver */
+#define   _TR_DMAA_TGT_RCV1           0x1                      /*    Channel 1 receiver */
+#define   _TR_DMAA_TGT_RCV2           0x2                      /*    Channel 2 receiver */
+#define   _TR_DMAA_TGT_SND0           0x4                      /*    Channel 0 sender */
+#define   _TR_DMAA_TGT_SND1           0x5                      /*    Channel 1 sender */
+#define   _TR_DMAA_TGT_SND2           0x6                      /*    Channel 2 sender */
+#define  _TR_DMA_DMAA_VC(x)         _B1(22,x)                  /*   Virtual channel */
+#define  _TR_DMA_DMAA_PCKT(x)       _B2(24,x)                  /*   Packet number */
+#define  _TR_DMA_DMAA_WORD(x)       _B7(31,x)                  /*   Word offset within packet */
+#define _BGP_DCR_TR_DMA_DMAD       (_BGP_DCR_TR_DMA + 0x01)    /*  DMA: Diagnostic SRAM data */
+#define _BGP_DCR_TR_DMA_DMADI      (_BGP_DCR_TR_DMA + 0x02)    /*  DMA: Diagnostic SRAM data with address increment */
+#define  _TR_DMA_DMAD_ECC(x)        _B6(15,x)                  /*   ECC */
+#define  _TR_DMA_DMAD_DATA(x)       _B16(31,x)                 /*   Data */
+#define _BGP_DCR_TR_DMA_DMAH       (_BGP_DCR_TR_DMA + 0x03)    /*  DMA: Diagnostic header access */
+
+/*  Tree Arbiter Control Registers */
+#define _BGP_DCR_TR_ARB_RCFG       (_BGP_DCR_TR_ARB + 0x00)    /*  ARB: General router configuration */
+#define  _TR_ARB_RCFG_SRC00         _BN( 0)                    /*   Disable source channel 0, VC0 */
+#define  _TR_ARB_RCFG_SRC01         _BN( 1)                    /*   Disable source channel 0, VC1 */
+#define  _TR_ARB_RCFG_TGT00         _BN( 2)                    /*   Disable target channel 0, VC0 */
+#define  _TR_ARB_RCFG_TGT01         _BN( 3)                    /*   Disable target channel 0, VC1 */
+#define  _TR_ARB_RCFG_SRC10         _BN( 4)                    /*   Disable source channel 1, VC0 */
+#define  _TR_ARB_RCFG_SRC11         _BN( 5)                    /*   Disable source channel 1, VC1 */
+#define  _TR_ARB_RCFG_TGT10         _BN( 6)                    /*   Disable target channel 1, VC0 */
+#define  _TR_ARB_RCFG_TGT11         _BN( 7)                    /*   Disable target channel 1, VC1 */
+#define  _TR_ARB_RCFG_SRC20         _BN( 8)                    /*   Disable source channel 2, VC0 */
+#define  _TR_ARB_RCFG_SRC21         _BN( 9)                    /*   Disable source channel 2, VC1 */
+#define  _TR_ARB_RCFG_TGT20         _BN(10)                    /*   Disable target channel 2, VC0 */
+#define  _TR_ARB_RCFG_TGT21         _BN(11)                    /*   Disable target channel 2, VC1 */
+#define  _TR_ARB_RCFG_LB2           _BN(25)                    /*   Channel 2 loopback enable */
+#define  _TR_ARB_RCFG_LB1           _BN(26)                    /*   Channel 1 loopback enable */
+#define  _TR_ARB_RCFG_LB0           _BN(27)                    /*   Channel 0 loopback enable */
+#define  _TR_ARB_RCFG_TOM(x)        _B2(29,x)                  /*   Timeout mode */
+#define   _TR_RCFG_TOM_NONE           0x0                      /*    Disable. */
+#define   _TR_RCFG_TOM_NRML           0x1                      /*    Normal mode, irq enabled. */
+#define   _TR_RCFG_TOM_WD             0x2                      /*    Watchdog mode, irq enabled. */
+#define  _TR_ARB_RCFG_MAN           _BN(30)                    /*   Manual mode (router is disabled). */
+#define  _TR_ARB_RCFG_RST           _BN(31)                    /*   Full arbiter reset. */
+#define _BGP_DCR_TR_ARB_RTO        (_BGP_DCR_TR_ARB + 0x01)    /*  ARB: 32 MSBs of router timeout value */
+#define _BGP_DCR_TR_ARB_RTIME      (_BGP_DCR_TR_ARB + 0x02)    /*  ARB: Value of router timeout counter */
+#define _BGP_DCR_TR_ARB_RSTAT      (_BGP_DCR_TR_ARB + 0x03)    /*  ARB: General router status */
+#define  _TR_ARB_RSTAT_REQ20        _BN( 0)                    /*   Packet available in channel 2, VC0 */
+#define  _TR_ARB_RSTAT_REQ10        _BN( 1)                    /*   Packet available in channel 1, VC0 */
+#define  _TR_ARB_RSTAT_REQ00        _BN( 2)                    /*   Packet available in channel 0, VC0 */
+#define  _TR_ARB_RSTAT_REQP0        _BN( 3)                    /*   Packet available in local client, VC0 */
+#define  _TR_ARB_RSTAT_REQ21        _BN( 4)                    /*   Packet available in channel 2, VC1 */
+#define  _TR_ARB_RSTAT_REQ11        _BN( 5)                    /*   Packet available in channel 1, VC1 */
+#define  _TR_ARB_RSTAT_REQ01        _BN( 6)                    /*   Packet available in channel 0, VC1 */
+#define  _TR_ARB_RSTAT_REQP1        _BN( 7)                    /*   Packet available in local client, VC1 */
+#define  _TR_ARB_RSTAT_FUL20        _BN( 8)                    /*   Channel 2, VC0 is full */
+#define  _TR_ARB_RSTAT_FUL10        _BN( 9)                    /*   Channel 1, VC0 is full */
+#define  _TR_ARB_RSTAT_FUL00        _BN(10)                    /*   Channel 0, VC0 is full */
+#define  _TR_ARB_RSTAT_FULP0        _BN(11)                    /*   Local client, VC0 is full */
+#define  _TR_ARB_RSTAT_FUL21        _BN(12)                    /*   Channel 2, VC1 is full */
+#define  _TR_ARB_RSTAT_FUL11        _BN(13)                    /*   Channel 1, VC1 is full */
+#define  _TR_ARB_RSTAT_FUL01        _BN(14)                    /*   Channel 0, VC1 is full */
+#define  _TR_ARB_RSTAT_FULP1        _BN(15)                    /*   Local client, VC1 is full */
+#define  _TR_ARB_RSTAT_MAT20        _BN(16)                    /*   Channel 2, VC0 is mature */
+#define  _TR_ARB_RSTAT_MAT10        _BN(17)                    /*   Channel 1, VC0 is mature */
+#define  _TR_ARB_RSTAT_MAT00        _BN(18)                    /*   Channel 0, VC0 is mature */
+#define  _TR_ARB_RSTAT_MATP0        _BN(19)                    /*   Local client, VC0 is mature */
+#define  _TR_ARB_RSTAT_MAT21        _BN(20)                    /*   Channel 2, VC1 is mature */
+#define  _TR_ARB_RSTAT_MAT11        _BN(21)                    /*   Channel 1, VC1 is mature */
+#define  _TR_ARB_RSTAT_MAT01        _BN(22)                    /*   Channel 0, VC1 is mature */
+#define  _TR_ARB_RSTAT_MATP1        _BN(23)                    /*   Local client, VC1 is mature */
+#define  _TR_ARB_RSTAT_BSY20        _BN(24)                    /*   Channel 2, VC0 is busy */
+#define  _TR_ARB_RSTAT_BSY10        _BN(25)                    /*   Channel 1, VC0 is busy */
+#define  _TR_ARB_RSTAT_BSY00        _BN(26)                    /*   Channel 0, VC0 is busy */
+#define  _TR_ARB_RSTAT_BSYP0        _BN(27)                    /*   Local client, VC0 is busy */
+#define  _TR_ARB_RSTAT_BSY21        _BN(28)                    /*   Channel 2, VC1 is busy */
+#define  _TR_ARB_RSTAT_BSY11        _BN(29)                    /*   Channel 1, VC1 is busy */
+#define  _TR_ARB_RSTAT_BSY01        _BN(30)                    /*   Channel 0, VC1 is busy */
+#define  _TR_ARB_RSTAT_BSYP1        _BN(31)                    /*   Local client, VC1 is busy */
+#define _BGP_DCR_TR_ARB_HD00       (_BGP_DCR_TR_ARB + 0x04)    /*  ARB: Next header, channel 0, VC0 */
+#define _BGP_DCR_TR_ARB_HD01       (_BGP_DCR_TR_ARB + 0x05)    /*  ARB: Next header, channel 0, VC1 */
+#define _BGP_DCR_TR_ARB_HD10       (_BGP_DCR_TR_ARB + 0x06)    /*  ARB: Next header, channel 1, VC0 */
+#define _BGP_DCR_TR_ARB_HD11       (_BGP_DCR_TR_ARB + 0x07)    /*  ARB: Next header, channel 1, VC1 */
+#define _BGP_DCR_TR_ARB_HD20       (_BGP_DCR_TR_ARB + 0x08)    /*  ARB: Next header, channel 2, VC0 */
+#define _BGP_DCR_TR_ARB_HD21       (_BGP_DCR_TR_ARB + 0x09)    /*  ARB: Next header, channel 2, VC1 */
+#define _BGP_DCR_TR_ARB_HDI0       (_BGP_DCR_TR_ARB + 0x0A)    /*  ARB: Next header, injection, VC0 */
+#define _BGP_DCR_TR_ARB_HDI1       (_BGP_DCR_TR_ARB + 0x0B)    /*  ARB: Next header, injection, VC1 */
+#define _BGP_DCR_TR_ARB_FORCEC     (_BGP_DCR_TR_ARB + 0x0C)    /*  ARB: Force control for manual mode */
+#define  _TR_ARB_FORCEC_CH0         _BN( 0)                    /*   Channel 0 is a target */
+#define  _TR_ARB_FORCEC_CH1         _BN( 1)                    /*   Channel 1 is a target */
+#define  _TR_ARB_FORCEC_CH2         _BN( 2)                    /*   Channel 2 is a target */
+#define  _TR_ARB_FORCEC_P           _BN( 3)                    /*   Local client is a target */
+#define  _TR_ARB_FORCEC_ALU         _BN( 4)                    /*   ALU is a target */
+#define  _TR_ARB_FORCEC_RT          _BN( 5)                    /*   Force route immediately */
+#define  _TR_ARB_FORCEC_STK         _BN( 6)                    /*   Sticky route: always force route */
+#define _BGP_DCR_TR_ARB_FORCER     (_BGP_DCR_TR_ARB + 0x0D)    /*  ARB: Forced route for manual mode */
+#define  _TR_ARB_FORCER_CH20        _BN( 0)                    /*   Channel 2 is a source for channel 0 */
+#define  _TR_ARB_FORCER_CH10        _BN( 1)                    /*   Channel 1 is a source for channel 0 */
+#define  _TR_ARB_FORCER_CH00        _BN( 2)                    /*   Channel 0 is a source for channel 0 */
+#define  _TR_ARB_FORCER_CHP0        _BN( 3)                    /*   Local client is a source for channel 0 */
+#define  _TR_ARB_FORCER_CHA0        _BN( 4)                    /*   ALU is a source for channel 0 */
+#define  _TR_ARB_FORCER_VC0         _BN( 5)                    /*   VC that is source for channel 0 */
+#define  _TR_ARB_FORCER_CH21        _BN( 6)                    /*   Channel 2 is a source for channel 1 */
+#define  _TR_ARB_FORCER_CH11        _BN( 7)                    /*   Channel 1 is a source for channel 1 */
+#define  _TR_ARB_FORCER_CH01        _BN( 8)                    /*   Channel 0 is a source for channel 1 */
+#define  _TR_ARB_FORCER_CHP1        _BN( 9)                    /*   Local client is a source for channel 1 */
+#define  _TR_ARB_FORCER_CHA1        _BN(10)                    /*   ALU is a source for channel 1 */
+#define  _TR_ARB_FORCER_VC1         _BN(11)                    /*   VC that is source for channel 1 */
+#define  _TR_ARB_FORCER_CH22        _BN(12)                    /*   Channel 2 is a source for channel 2 */
+#define  _TR_ARB_FORCER_CH12        _BN(13)                    /*   Channel 1 is a source for channel 2 */
+#define  _TR_ARB_FORCER_CH02        _BN(14)                    /*   Channel 0 is a source for channel 2 */
+#define  _TR_ARB_FORCER_CHP2        _BN(15)                    /*   Local client is a source for channel 2 */
+#define  _TR_ARB_FORCER_CHA2        _BN(16)                    /*   ALU is a source for channel 2 */
+#define  _TR_ARB_FORCER_VC2         _BN(17)                    /*   VC that is source for channel 2 */
+#define  _TR_ARB_FORCER_CH2P        _BN(18)                    /*   Channel 2 is a source for local client */
+#define  _TR_ARB_FORCER_CH1P        _BN(19)                    /*   Channel 1 is a source for local client */
+#define  _TR_ARB_FORCER_CH0P        _BN(20)                    /*   Channel 0 is a source for local client */
+#define  _TR_ARB_FORCER_CHPP        _BN(21)                    /*   Local client is a source for local client */
+#define  _TR_ARB_FORCER_CHAP        _BN(22)                    /*   ALU is a source for local client */
+#define  _TR_ARB_FORCER_VCP         _BN(23)                    /*   VC that is source for local client */
+#define  _TR_ARB_FORCER_CH2A        _BN(24)                    /*   Channel 2 is a source for ALU */
+#define  _TR_ARB_FORCER_CH1A        _BN(25)                    /*   Channel 1 is a source for ALU */
+#define  _TR_ARB_FORCER_CH0A        _BN(26)                    /*   Channel 0 is a source for ALU */
+#define  _TR_ARB_FORCER_CHPA        _BN(27)                    /*   Local client is a source for ALU */
+#define  _TR_ARB_FORCER_CHAA        _BN(28)                    /*   ALU is a source for ALU */
+#define  _TR_ARB_FORCER_VCA         _BN(29)                    /*   VC that is source for ALU */
+#define _BGP_DCR_TR_ARB_FORCEH     (_BGP_DCR_TR_ARB + 0x0E)    /*  ARB: Forced header for manual mode */
+#define _BGP_DCR_TR_ARB_XSTAT      (_BGP_DCR_TR_ARB + 0x0F)    /*  ARB: Extended router status */
+#define  _TR_ARB_XSTAT_BLK20        _BN( 0)                    /*   Request from channel 2, VC0 is blocked */
+#define  _TR_ARB_XSTAT_BLK10        _BN( 1)                    /*   Request from channel 1, VC0 is blocked */
+#define  _TR_ARB_XSTAT_BLK00        _BN( 2)                    /*   Request from channel 0, VC0 is blocked */
+#define  _TR_ARB_XSTAT_BLKP0        _BN( 3)                    /*   Request from local client, VC0 is blocked */
+#define  _TR_ARB_XSTAT_BLK21        _BN( 4)                    /*   Request from channel 2, VC1 is blocked */
+#define  _TR_ARB_XSTAT_BLK11        _BN( 5)                    /*   Request from channel 1, VC1 is blocked */
+#define  _TR_ARB_XSTAT_BLK01        _BN( 6)                    /*   Request from channel 0, VC1 is blocked */
+#define  _TR_ARB_XSTAT_BLKP1        _BN( 7)                    /*   Request from local client, VC1 is blocked */
+#define  _TR_ARB_XSTAT_BSYR2        _BN( 8)                    /*   Channel 2 receiver is busy */
+#define  _TR_ARB_XSTAT_BSYR1        _BN( 9)                    /*   Channel 1 receiver is busy */
+#define  _TR_ARB_XSTAT_BSYR0        _BN(10)                    /*   Channel 0 receiver is busy */
+#define  _TR_ARB_XSTAT_BSYPI        _BN(11)                    /*   Local client injection is busy */
+#define  _TR_ARB_XSTAT_BSYA         _BN(12)                    /*   ALU is busy */
+#define  _TR_ARB_XSTAT_BSYS2        _BN(13)                    /*   Channel 2 sender is busy */
+#define  _TR_ARB_XSTAT_BSYS1        _BN(14)                    /*   Channel 1 sender is busy */
+#define  _TR_ARB_XSTAT_BSYS0        _BN(15)                    /*   Channel 0 sender is busy */
+#define  _TR_ARB_XSTAT_BSYPR        _BN(16)                    /*   Local client reception is busy */
+#define  _TR_ARB_XSTAT_ARB_TO(x)    _B15(31,x)                 /*   Greedy-Arbitration timeout */
+
+/*  Tree Channel 0 Control Registers */
+#define _BGP_DCR_TR_CH0_RSTAT      (_BGP_DCR_TR_CH0 + 0x00)    /*  CH0: Receiver status */
+#define  _TR_RSTAT_RCVERR           _BN( 0)                    /*   Receiver error */
+#define  _TR_RSTAT_LHEXP            _BN( 1)                    /*   Expect link header */
+#define  _TR_RSTAT_PH0EXP           _BN( 2)                    /*   Expect packet header 0 */
+#define  _TR_RSTAT_PH1EXP           _BN( 3)                    /*   Expect packet header 1 */
+#define  _TR_RSTAT_PDRCV            _BN( 4)                    /*   Receive packet data */
+#define  _TR_RSTAT_CWEXP            _BN( 5)                    /*   Expect packet control word */
+#define  _TR_RSTAT_CSEXP            _BN( 6)                    /*   Expect packet checksum */
+#define  _TR_RSTAT_SCRBRD0          _B8(14,0xff)               /*   VC0 fifo scoreboard */
+#define  _TR_RSTAT_SCRBRD1          _B8(22,0xff)               /*   VC1 fifo scoreboard */
+#define  _TR_RSTAT_RMTSTAT          _B9(31,0x1ff)              /*   Remote status */
+#define _BGP_DCR_TR_CH0_RCTRL      (_BGP_DCR_TR_CH0 + 0x01)    /*  CH0: Receiver control */
+#define  _TR_RCTRL_FERR             _BN( 0)                    /*   Force receiver into error state */
+#define  _TR_RCTRL_RST              _BN( 1)                    /*   Reset all internal pointers */
+#define  _TR_RCTRL_FRZ0             _BN( 2)                    /*   Freeze VC0 */
+#define  _TR_RCTRL_FRZ1             _BN( 3)                    /*   Freeze VC1 */
+#define  _TR_RCTRL_RCVALL           _BN( 4)                    /*   Disable receiver CRC check and accept all packets */
+#define _BGP_DCR_TR_CH0_SSTAT      (_BGP_DCR_TR_CH0 + 0x02)    /*  CH0: Sender status */
+#define  _TR_SSTAT_SYNC             _BN( 0)                    /*   Phase of sender */
+#define  _TR_SSTAT_ARB              _BN( 1)                    /*   Arbitrating */
+#define  _TR_SSTAT_PH0SND           _BN( 2)                    /*   Sending packet header 0 */
+#define  _TR_SSTAT_PH1SND           _BN( 3)                    /*   Sending packet header 1 */
+#define  _TR_SSTAT_PDSND            _BN( 4)                    /*   Sending packet payload */
+#define  _TR_SSTAT_CWSND            _BN( 5)                    /*   Sending packet control word */
+#define  _TR_SSTAT_CSSND            _BN( 6)                    /*   Sending packet checksum */
+#define  _TR_SSTAT_IDLSND           _BN( 7)                    /*   Sending idle packet */
+#define  _TR_SSTAT_RPTR0            _B3(10,0x7)                /*   VC0 read pointer */
+#define  _TR_SSTAT_WPTR0            _B3(13,0x7)                /*   VC0 write pointer */
+#define  _TR_SSTAT_RPTR1            _B3(16,0x7)                /*   VC1 read pointer */
+#define  _TR_SSTAT_WPTR1            _B3(19,0x7)                /*   VC1 write pointer */
+#define _BGP_DCR_TR_CH0_SCTRL      (_BGP_DCR_TR_CH0 + 0x03)    /*  CH0: Sender control */
+#define  _TR_SCTRL_SYNC             _BN( 0)                    /*   Force sender to send SYNC */
+#define  _TR_SCTRL_IDLE             _BN( 1)                    /*   Force sender to send IDLE */
+#define  _TR_SCTRL_RST              _BN( 2)                    /*   Reset all internal pointers */
+#define  _TR_SCTRL_INVMSB           _BN( 3)                    /*   Invert MSB of class for loopback packets */
+#define  _TR_SCTRL_OFF              _BN( 4)                    /*   Disable (black hole) the sender */
+#define _BGP_DCR_TR_CH0_TNACK      (_BGP_DCR_TR_CH0 + 0x04)    /*  CH0: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH0_CNACK      (_BGP_DCR_TR_CH0 + 0x05)    /*  CH0: Time since last NACK received */
+#define _BGP_DCR_TR_CH0_TIDLE      (_BGP_DCR_TR_CH0 + 0x06)    /*  CH0: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH0_CIDLE      (_BGP_DCR_TR_CH0 + 0x07)    /*  CH0: Time since last IDLE sent */
+
+/*  Tree Channel 1 Control Registers */
+/*  Note: Register definitions are the same as those of channel 0. */
+#define _BGP_DCR_TR_CH1_RSTAT      (_BGP_DCR_TR_CH1 + 0x00)    /*  CH1: Receiver status */
+#define _BGP_DCR_TR_CH1_RCTRL      (_BGP_DCR_TR_CH1 + 0x01)    /*  CH1: Receiver control */
+#define _BGP_DCR_TR_CH1_SSTAT      (_BGP_DCR_TR_CH1 + 0x02)    /*  CH1: Sender status */
+#define _BGP_DCR_TR_CH1_SCTRL      (_BGP_DCR_TR_CH1 + 0x03)    /*  CH1: Sender control */
+#define _BGP_DCR_TR_CH1_TNACK      (_BGP_DCR_TR_CH1 + 0x04)    /*  CH1: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH1_CNACK      (_BGP_DCR_TR_CH1 + 0x05)    /*  CH1: Time since last NACK received */
+#define _BGP_DCR_TR_CH1_TIDLE      (_BGP_DCR_TR_CH1 + 0x06)    /*  CH1: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH1_CIDLE      (_BGP_DCR_TR_CH1 + 0x07)    /*  CH1: Time since last IDLE sent */
+
+/*  Tree Channel 2 Control Registers */
+/*  Note: Register definitions are the same as those of channel 0. */
+#define _BGP_DCR_TR_CH2_RSTAT      (_BGP_DCR_TR_CH2 + 0x00)    /*  CH2: Receiver status */
+#define _BGP_DCR_TR_CH2_RCTRL      (_BGP_DCR_TR_CH2 + 0x01)    /*  CH2: Receiver control */
+#define _BGP_DCR_TR_CH2_SSTAT      (_BGP_DCR_TR_CH2 + 0x02)    /*  CH2: Sender status */
+#define _BGP_DCR_TR_CH2_SCTRL      (_BGP_DCR_TR_CH2 + 0x03)    /*  CH2: Sender control */
+#define _BGP_DCR_TR_CH2_TNACK      (_BGP_DCR_TR_CH2 + 0x04)    /*  CH2: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH2_CNACK      (_BGP_DCR_TR_CH2 + 0x05)    /*  CH2: Time since last NACK received */
+#define _BGP_DCR_TR_CH2_TIDLE      (_BGP_DCR_TR_CH2 + 0x06)    /*  CH2: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH2_CIDLE      (_BGP_DCR_TR_CH2 + 0x07)    /*  CH2: Time since last IDLE sent */
+
+/*  Tree Global Registers */
+#define _BGP_DCR_TR_GLOB_FPTR      (_BGP_DCR_TR_GLOB + 0x00)   /*  GLOB: Fifo Pointer Register */
+#define  _TR_GLOB_FPTR_IPY0(x)      _B3( 3,x)                  /*   VC0 injection payload FIFO packet write pointer */
+#define  _TR_GLOB_FPTR_IHD0(x)      _B3( 7,x)                  /*   VC0 injection header  FIFO packet write pointer */
+#define  _TR_GLOB_FPTR_IPY1(x)      _B3(11,x)                  /*   VC1 injection payload FIFO packet write pointer */
+#define  _TR_GLOB_FPTR_IHD1(x)      _B3(15,x)                  /*   VC1 injection header  FIFO packet write pointer */
+#define  _TR_GLOB_FPTR_RPY0(x)      _B3(19,x)                  /*   VC0 reception payload FIFO packet read  pointer */
+#define  _TR_GLOB_FPTR_RHD0(x)      _B3(23,x)                  /*   VC0 reception header  FIFO packet read  pointer */
+#define  _TR_GLOB_FPTR_RPY1(x)      _B3(27,x)                  /*   VC1 reception payload FIFO packet read  pointer */
+#define  _TR_GLOB_FPTR_RHD1(x)      _B3(31,x)                  /*   VC1 reception header  FIFO packet read  pointer */
+#define _BGP_DCR_TR_GLOB_NADDR     (_BGP_DCR_TR_GLOB + 0x01)   /*  GLOB: Node Address Register */
+#define  _TR_GLOB_NADDR(x)          _B24(31,x)                 /*   Node address */
+#define _BGP_DCR_TR_GLOB_VCFG0     (_BGP_DCR_TR_GLOB + 0x02)   /*  GLOB: VC0 Configuration Register (use macros below) */
+#define _BGP_DCR_TR_GLOB_VCFG1     (_BGP_DCR_TR_GLOB + 0x03)   /*  GLOB: VC1 Configuration Register */
+#define  _TR_GLOB_VCFG_RCVALL       _BN( 0)                    /*   Disable P2P reception filering */
+#define  _TR_GLOB_VCFG_CSUMX(x)     _B8(15,x)                  /*   Injection checksum mode 2 exclusion */
+#define  _TR_GLOB_VCFG_RWM(x)       _B3(23,x)                  /*   Payload reception FIFO watermark */
+#define  _TR_GLOB_VCFG_IWM(x)       _B3(31,x)                  /*   Payload injection FIFO watermark */
+
+/*  Tree Processor Reception Registers */
+#define _BGP_DCR_TR_REC_PRXF       (_BGP_DCR_TR_REC + 0x00)    /*  REC: Receive Exception Flag Register */
+#define _BGP_DCR_TR_REC_PRXEN      (_BGP_DCR_TR_REC + 0x01)    /*  REC: Receive Exception Enable Register */
+#define  _TR_REC_PRX_APAR0          _BN( 8)                    /*   P0 address parity error */
+#define  _TR_REC_PRX_APAR1          _BN( 9)                    /*   P1 address parity error */
+#define  _TR_REC_PRX_ALIGN0         _BN(10)                    /*   P0 address alignment error */
+#define  _TR_REC_PRX_ALIGN1         _BN(11)                    /*   P1 address alignment error */
+#define  _TR_REC_PRX_ADDR0          _BN(12)                    /*   P0 bad (unrecognized) address error */
+#define  _TR_REC_PRX_ADDR1          _BN(13)                    /*   P1 bad (unrecognized) address error */
+#define  _TR_REC_PRX_COLL           _BN(14)                    /*   FIFO read collision error */
+#define  _TR_REC_PRX_UE             _BN(15)                    /*   Uncorrectable SRAM ECC error */
+#define  _TR_REC_PRX_PFU0           _BN(26)                    /*   VC0 payload FIFO under-run error */
+#define  _TR_REC_PRX_PFU1           _BN(27)                    /*   VC1 payload FIFO under-run error */
+#define  _TR_REC_PRX_HFU0           _BN(28)                    /*   VC0 header FIFO under-run error */
+#define  _TR_REC_PRX_HFU1           _BN(29)                    /*   VC1 header FIFO under-run error */
+#define  _TR_REC_PRX_WM0            _BN(30)                    /*   VC0 payload FIFO above watermark */
+#define  _TR_REC_PRX_WM1            _BN(31)                    /*   VC1 payload FIFO above watermark */
+#define _BGP_DCR_TR_REC_PRDA       (_BGP_DCR_TR_REC + 0x02)    /*  REC: Receive Diagnostic Address Register */
+#define  _TR_PRDA_VC(x)             _B1(21,x)                  /*   Select VC to access */
+#define  _TR_PRDA_MAC(x)            _B1(22,x)                  /*   Select SRAM macro to access */
+#define  _TR_PRDA_LINE(x)           _B7(29,x)                  /*   Select line in SRAM or RA */
+#define  _TR_PRDA_TGT(x)            _B2(31,x)                  /*   Select target sub-line or RA */
+#define   _TR_PRDA_TGT_LO             0x0                      /*    Least significant word of SRAM */
+#define   _TR_PRDA_TGT_HI             0x1                      /*    Most significant word of SRAM */
+#define   _TR_PRDA_TGT_ECC            0x2                      /*    ECC syndrome of SRAM */
+#define   _TR_PRDA_TGT_HDR            0x3                      /*    Header fifo */
+#define _BGP_DCR_TR_REC_PRDD       (_BGP_DCR_TR_REC + 0x03)    /*  REC: Receive Diagnostic Data Register */
+#define  _TR_PRDD_ECC(x)            _B8(31,x)                  /*   ECC */
+#define  _TR_PRDD_DATA(x)           (x)                        /*   Data */
+
+/*  Tree Processor Injection Registers */
+#define _BGP_DCR_TR_INJ_PIXF       (_BGP_DCR_TR_INJ + 0x00)    /*  INJ: Injection Exception Flag Register */
+#define _BGP_DCR_TR_INJ_PIXEN      (_BGP_DCR_TR_INJ + 0x01)    /*  INJ: Injection Exception Enable Register */
+#define  _TR_INJ_PIX_APAR0          _BN( 6)                    /*   P0 address parity error */
+#define  _TR_INJ_PIX_APAR1          _BN( 7)                    /*   P1 address parity error */
+#define  _TR_INJ_PIX_ALIGN0         _BN( 8)                    /*   P0 address alignment error */
+#define  _TR_INJ_PIX_ALIGN1         _BN( 9)                    /*   P1 address alignment error */
+#define  _TR_INJ_PIX_ADDR0          _BN(10)                    /*   P0 bad (unrecognized) address error */
+#define  _TR_INJ_PIX_ADDR1          _BN(11)                    /*   P1 bad (unrecognized) address error */
+#define  _TR_INJ_PIX_DPAR0          _BN(12)                    /*   P0 data parity error */
+#define  _TR_INJ_PIX_DPAR1          _BN(13)                    /*   P1 data parity error */
+#define  _TR_INJ_PIX_COLL           _BN(14)                    /*   FIFO write collision error */
+#define  _TR_INJ_PIX_UE             _BN(15)                    /*   Uncorrectable SRAM ECC error */
+#define  _TR_INJ_PIX_PFO0           _BN(25)                    /*   VC0 payload FIFO overflow error */
+#define  _TR_INJ_PIX_PFO1           _BN(26)                    /*   VC1 payload FIFO overflow error */
+#define  _TR_INJ_PIX_HFO0           _BN(27)                    /*   VC0 header FIFO overflow error */
+#define  _TR_INJ_PIX_HFO1           _BN(28)                    /*   VC1 header FIFO overflow error */
+#define  _TR_INJ_PIX_WM0            _BN(29)                    /*   VC0 payload FIFO at or below watermark */
+#define  _TR_INJ_PIX_WM1            _BN(30)                    /*   VC1 payload FIFO at or below watermark */
+#define  _TR_INJ_PIX_ENABLE         _BN(31)                    /*   Injection interface enable (if enabled in PIXEN) */
+#define _BGP_DCR_TR_INJ_PIDA       (_BGP_DCR_TR_INJ + 0x02)    /*  INJ: Injection Diagnostic Address Register */
+/*         Use _TR_PRDA_* defined above. */
+#define _BGP_DCR_TR_INJ_PIDD       (_BGP_DCR_TR_INJ + 0x03)    /*  INJ: Injection Diagnostic Data Register */
+/*         Use _TR_PRDD_* defined above. */
+#define _BGP_DCR_TR_INJ_CSPY0      (_BGP_DCR_TR_INJ + 0x04)    /*  INJ: VC0 payload checksum */
+#define _BGP_DCR_TR_INJ_CSHD0      (_BGP_DCR_TR_INJ + 0x05)    /*  INJ: VC0 header checksum */
+#define _BGP_DCR_TR_INJ_CSPY1      (_BGP_DCR_TR_INJ + 0x06)    /*  INJ: VC1 payload checksum */
+#define _BGP_DCR_TR_INJ_CSHD1      (_BGP_DCR_TR_INJ + 0x07)    /*  INJ: VC1 header checksum */
+
+
+/*  Link CRC's for the receivers 0..2 (vc0,1) */
+#define _BGP_DCR_TR_LCRC_R00  (_BGP_DCR_TR_LCRC + 0)
+#define _BGP_DCR_TR_LCRC_R01  (_BGP_DCR_TR_LCRC + 1)
+#define _BGP_DCR_TR_LCRC_R10  (_BGP_DCR_TR_LCRC + 2)
+#define _BGP_DCR_TR_LCRC_R11  (_BGP_DCR_TR_LCRC + 3)
+#define _BGP_DCR_TR_LCRC_R20  (_BGP_DCR_TR_LCRC + 4)
+#define _BGP_DCR_TR_LCRC_R21  (_BGP_DCR_TR_LCRC + 5)
+
+/*  Link CRC'c for the senders 0..2 (vc0,1) */
+#define _BGP_DCR_TR_LCRC_S00  (_BGP_DCR_TR_LCRC + 8)
+#define _BGP_DCR_TR_LCRC_S01  (_BGP_DCR_TR_LCRC + 9)
+#define _BGP_DCR_TR_LCRC_S10  (_BGP_DCR_TR_LCRC + 10)
+#define _BGP_DCR_TR_LCRC_S11  (_BGP_DCR_TR_LCRC + 11)
+#define _BGP_DCR_TR_LCRC_S20  (_BGP_DCR_TR_LCRC + 12)
+#define _BGP_DCR_TR_LCRC_S21  (_BGP_DCR_TR_LCRC + 13)
+
+/*  Internal error counters and thresholds */
+#define _BGP_DCR_TR_ERR_R0_CRC   (_BGP_DCR_TR_ERR + 0x00)     /*  CH0: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R0_CE    (_BGP_DCR_TR_ERR + 0x01)     /*  CH0: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S0_RETRY (_BGP_DCR_TR_ERR + 0x02)     /*  CH0: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S0_CE    (_BGP_DCR_TR_ERR + 0x03)     /*  CH0: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_R1_CRC   (_BGP_DCR_TR_ERR + 0x04)     /*  CH1: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R1_CE    (_BGP_DCR_TR_ERR + 0x05)     /*  CH1: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S1_RETRY (_BGP_DCR_TR_ERR + 0x06)     /*  CH1: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S1_CE    (_BGP_DCR_TR_ERR + 0x07)     /*  CH1: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_R2_CRC   (_BGP_DCR_TR_ERR + 0x08)     /*  CH2: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R2_CE    (_BGP_DCR_TR_ERR + 0x09)     /*  CH2: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S2_RETRY (_BGP_DCR_TR_ERR + 0x0A)     /*  CH2: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S2_CE    (_BGP_DCR_TR_ERR + 0x0B)     /*  CH2: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_INJ_SE   (_BGP_DCR_TR_ERR + 0x0C)     /*  INJ: SRAM errors (correctable and uncorrectable) */
+#define _BGP_DCR_TR_ERR_REC_SE   (_BGP_DCR_TR_ERR + 0x0D)     /*  REC: SRAM errors (correctable and uncorrectable) */
+
+#define _BGP_DCR_TR_ERR_R0_CRC_T   (_BGP_DCR_TR_ERR + 0x10)   /*  Interrupt thresholds for corresponding error */
+#define _BGP_DCR_TR_ERR_R0_CE_T    (_BGP_DCR_TR_ERR + 0x11)   /*  counters. */
+#define _BGP_DCR_TR_ERR_S0_RETRY_T (_BGP_DCR_TR_ERR + 0x12)
+#define _BGP_DCR_TR_ERR_S0_CE_T    (_BGP_DCR_TR_ERR + 0x13)
+#define _BGP_DCR_TR_ERR_R1_CRC_T   (_BGP_DCR_TR_ERR + 0x14)
+#define _BGP_DCR_TR_ERR_R1_CE_T    (_BGP_DCR_TR_ERR + 0x15)
+#define _BGP_DCR_TR_ERR_S1_RETRY_T (_BGP_DCR_TR_ERR + 0x16)
+#define _BGP_DCR_TR_ERR_S1_CE_T    (_BGP_DCR_TR_ERR + 0x17)
+#define _BGP_DCR_TR_ERR_R2_CRC_T   (_BGP_DCR_TR_ERR + 0x18)
+#define _BGP_DCR_TR_ERR_R2_CE_T    (_BGP_DCR_TR_ERR + 0x19)
+#define _BGP_DCR_TR_ERR_S2_RETRY_T (_BGP_DCR_TR_ERR + 0x1A)
+#define _BGP_DCR_TR_ERR_S2_CE_T    (_BGP_DCR_TR_ERR + 0x1B)
+#define _BGP_DCR_TR_ERR_INJ_SE_T   (_BGP_DCR_TR_ERR + 0x1C)
+#define _BGP_DCR_TR_ERR_REC_SE_T   (_BGP_DCR_TR_ERR + 0x1D)
+
+/*  For _bgp_tree_configure_class */
+#define _BGP_COL_RDR_NUM      (16)   /*  classes are 0..15 */
+
+/*  The following interface allows for fine-grain control of the RDR register */
+/*  contents.  Use bit-wize OR'd together to create a route specification. */
+#define _BGP_COL_RDR_SRC0    (0x1000)   /*  Bit Number  3 (MSb is bit number 0) */
+#define _BGP_COL_RDR_SRC1    (0x2000)   /*  Bit Number  2 */
+#define _BGP_COL_RDR_SRC2    (0x4000)   /*  Bit Number  1 */
+#define _BGP_COL_RDR_SRCL    (0x0002)   /*  Bit Number 14 */
+#define _BGP_COL_RDR_TGT0    (0x0100)   /*  Bit Number  7 */
+#define _BGP_COL_RDR_TGT1    (0x0200)   /*  Bit Number  6 */
+#define _BGP_COL_RDR_TGT2    (0x0400)   /*  Bit Number  5 */
+#define _BGP_COL_RDR_TGTL    (0x0001)   /*  Bit Number 15 */
+
+/*  OR of all valid Source and Target bits for SrtTgtEnable validation. */
+#define _BGP_COL_RDR_ACCEPT (0x7703)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**********************************************************************
+ *
+ * Torus
+ *
+ **********************************************************************/
+
+#define _BGP_DCR_DMA_NUM_VALID_ADDR       8              /*  g range */
+#define _BGP_DCR_iDMA_NUM_TS_FIFO_WM      2              /*  j range */
+#define _BGP_DCR_rDMA_NUM_TS_FIFO_WM      4              /*  p range */
+#define _BGP_DCR_iDMA_NUM_FIFO_REGS       4              /*  i range */
+#define _BGP_DCR_iDMA_NUM_FIFO_MAP_REGS   32             /*  k range */
+
+
+/* use g for repeated 8X, i repeated 4x, j repeated 2X, k repeated 32x, p repeated 4x */
+
+/*  ------------------- */
+/*  ---- Controls ----- */
+/*  ------------------- */
+
+#define _BGP_DCR_DMA_RESET             (_BGP_DCR_DMA+0x00)  /*  All bits reset to 1. */
+#define  _DMA_RESET_DCR                 _BN( 0)             /*  Reset the DMA's DCR unit */
+#define  _DMA_RESET_PQUE                _BN( 1)             /*  Reset the DMA's Processor Queue unit */
+#define  _DMA_RESET_IMFU                _BN( 2)             /*  Reset the DMA's Injection Memory Fifo/Counter Unit */
+#define  _DMA_RESET_RMFU                _BN( 3)             /*  Reset the DMA's Reception Memory Fifo/Counter Unit */
+#define  _DMA_RESET_LF                  _BN( 4)             /*  Reset the DMA's Local Fifo */
+#define  _DMA_RESET_ITIU                _BN( 5)             /*  Reset the DMA's Injection Torus Interface Unit */
+#define  _DMA_RESET_ICONU               _BN( 6)             /*  Reset the DMA's Injection Transfer Control Unit */
+#define  _DMA_RESET_IDAU                _BN( 7)             /*  Reset the DMA's Injection Data Alignment Unit */
+#define  _DMA_RESET_IMIU                _BN( 8)             /*  Reset the DMA's Injection L3 Memory Interface Unit */
+#define  _DMA_RESET_RTIU                _BN( 9)             /*  Reset the DMA's Reception Torus Interface Unit */
+#define  _DMA_RESET_RCONU               _BN(10)             /*  Reset the DMA's Reception Transfer Control Unit */
+#define  _DMA_RESET_RDAU                _BN(11)             /*  Reset the DMA's Reception Data Alignment Unit */
+#define  _DMA_RESET_RMIU                _BN(12)             /*  Reset the DMA's Reception L3 Memory Interface Unit */
+#define  _DMA_RESET_PF                  _BN(13)             /*  Reset the DMA's Torus Prefetch Unit */
+                                                            /*   14-30 reserved. */
+#define  _DMA_RESET_LNKCHK              _BN(31)             /*  Reset the DMA's Torus Link Packet Capture Unit */
+
+#define _BGP_DCR_DMA_BASE_CONTROL      (_BGP_DCR_DMA+0x01)
+#define   _DMA_BASE_CONTROL_USE_DMA     _BN( 0)             /*  Use DMA and *not* the Torus if 1, reset state is 0. */
+#define   _DMA_BASE_CONTROL_STORE_HDR   _BN( 1)             /*  Store DMA Headers in Reception Header Fifo (debugging) */
+#define   _DMA_BASE_CONTROL_PF_DIS      _BN( 2)             /*  Disable Torus Prefetch Unit (should be 0) */
+#define   _DMA_BASE_CONTROL_L3BURST_EN  _BN( 3)             /*  Enable L3 Burst when 1 (should be enabled, except for debugging) */
+#define   _DMA_BASE_CONTROL_ITIU_EN     _BN( 4)             /*  Enable Torus Injection Data Transfer Unit (never make this zero) */
+#define   _DMA_BASE_CONTROL_RTIU_EN     _BN( 5)             /*  Enable Torus Reception Data Transfer Unit */
+#define   _DMA_BASE_CONTROL_IMFU_EN     _BN( 6)             /*  Enable DMA Injection Fifo Unit Arbiter */
+#define   _DMA_BASE_CONTROL_RMFU_EN     _BN( 7)             /*  Enable DMA Reception fifo Unit Arbiter */
+#define   _DMA_BASE_CONTROL_L3PF_DIS    _BN( 8)             /*  Disable L3 Read Prefetch (should be 0) */
+                                                            /*   9..27 reserved. */
+#define   _DMA_BASE_CONTROL_REC_FIFO_FULL_STOP_RDMA   _BN( 28)  /*  DD2 Only, ECO 777, RDMA stops when fifo is full */
+#define   _DMA_BASE_CONTROL_REC_FIFO_CROSSTHRESH_NOTSTICKY  _BN( 29)  /*  DD2 Only, ECO 777, Rec. Fifo Threshold crossed is not sticky */
+#define   _DMA_BASE_CONTROL_INJ_FIFO_CROSSTHRESH_NOTSTICKY  _BN( 30)  /*  DD2 Only, ECO 777, Inj. Fifo Threshold crossed is not sticky */
+                                                            /*  31 - ECO 653, leave at 0 */
+#define _BGP_DCR_DMA_BASE_CONTROL_INIT  ( _DMA_BASE_CONTROL_USE_DMA    | \
+                                          _DMA_BASE_CONTROL_L3BURST_EN | \
+                                          _DMA_BASE_CONTROL_ITIU_EN    | \
+                                          _DMA_BASE_CONTROL_RTIU_EN    | \
+                                          _DMA_BASE_CONTROL_IMFU_EN    | \
+                                          _DMA_BASE_CONTROL_RMFU_EN)
+
+/*  g in the interval [0:7]: */
+/*   32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */
+#define _BGP_DCR_iDMA_MIN_VALID_ADDR(g)                 (_BGP_DCR_DMA+((2*(g))+0x02))
+#define _BGP_DCR_iDMA_MAX_VALID_ADDR(g)                 (_BGP_DCR_DMA+((2*(g))+0x03))
+
+#define _BGP_DCR_iDMA_INJ_RANGE_TLB                     (_BGP_DCR_DMA+0x12)
+#define   _iDMA_INT_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4))    /*  (oops typo) 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define   _iDMA_INT_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4))    /*  (oops typo) 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+#define   _iDMA_INJ_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4))    /*  'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define   _iDMA_INJ_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4))    /*  'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+                                                          /*  Bits 2,3 of each range are reserved. */
+
+#define _BGP_DCR_rDMA_REC_RANGE_TLB                     (_BGP_DCR_DMA+0x13)
+#define   _rDMA_REC_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4))    /*  'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define   _rDMA_REC_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4))    /*  'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+
+/*  g in the interval [0:7] */
+/*   32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */
+#define _BGP_DCR_rDMA_MIN_VALID_ADDR(g)                 (_BGP_DCR_DMA+((2*(g))+0x14))
+#define _BGP_DCR_rDMA_MAX_VALID_ADDR(g)                 (_BGP_DCR_DMA+((2*(g))+0x15))
+
+/*  j in the interval [0:1] */
+#define _BGP_DCR_iDMA_TS_FIFO_WM(j)                     (_BGP_DCR_DMA+(0x24+(j)))
+#define  _iDMA_TS_FIFO_WM_N0(x) 	_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_N1(x)         _B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_N2(x)         _B6(23,(x))	 /*  bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_P0(x)         _B6(31,(x))	 /*  bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_N3(x)		_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_N4(x)		_B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_N5(x)		_B6(23,(x))	 /*  bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define  _iDMA_TS_FIFO_WM_P1(x)         _B6(31,(x))	 /*  bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+
+#define  _iDMA_TS_FIFO_WM0_INIT   	(_iDMA_TS_FIFO_WM_N0(20) | \
+					 _iDMA_TS_FIFO_WM_N1(20) | \
+					 _iDMA_TS_FIFO_WM_N2(20) | \
+					 _iDMA_TS_FIFO_WM_P0(20))
+#define  _iDMA_TS_FIFO_WM1_INIT   	(_iDMA_TS_FIFO_WM_N3(20) | \
+					 _iDMA_TS_FIFO_WM_N4(20) | \
+					 _iDMA_TS_FIFO_WM_N5(20) | \
+					 _iDMA_TS_FIFO_WM_P1(20))
+
+#define _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY             (_BGP_DCR_DMA+0x26)
+#define  _iDMA_LOCAL_FIFO_WM(x)		_B7(7,(x))	 /*  bit {1..7}   of _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT, set to decimal 55, 0x37 */
+#define  _iDMA_HP_INJ_FIFO_RPT_CNT(x)   _B4(11,(x))	 /*  bit {8..11}  dma repeat count for using torus high priority injection fifo */
+#define  _iDMA_NP_INJ_FIFO_RPT_CNT(x)   _B4(15,(x))	 /*  bit {12..15} dma repeat count for using torus normal priority injection fifo */
+#define  _iDMA_INJ_DELAY(x)		_B4(23,(x))	 /*  bit {20..23} dma delay this amount of clock_x2 cycles before injecting next packet */
+
+#define  _iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT	(_iDMA_LOCAL_FIFO_WM(55) | \
+						 _iDMA_HP_INJ_FIFO_RPT_CNT(0) | \
+						 _iDMA_NP_INJ_FIFO_RPT_CNT(0) | \
+						 _iDMA_INJ_DELAY(0))
+
+/*  p in the interval [0:3] */
+#define _BGP_DCR_rDMA_TS_FIFO_WM(p)                     (_BGP_DCR_DMA+(0x27+(p)))
+#define  _rDMA_TS_FIFO_WM_G0N0(x)	_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0N1(x)	_B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0N2(x)	_B6(23,(x))	 /*  bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0N3(x)	_B6(31,(x))	 /*  bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0N4(x)	_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0N5(x)	_B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G0P(x)	_B6(23,(x))      /*  bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N0(x)	_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N1(x)	_B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N2(x)	_B6(23,(x))	 /*  bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N3(x)	_B6(31,(x))	 /*  bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N4(x)	_B6(7,(x))	 /*  bit {2..7}   of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1N5(x)	_B6(15,(x))	 /*  bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+#define  _rDMA_TS_FIFO_WM_G1P(x)	_B6(23,(x))      /*  bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+
+#define  _rDMA_TS_FIFO_WM0_INIT		(_rDMA_TS_FIFO_WM_G0N0(0) | \
+					 _rDMA_TS_FIFO_WM_G0N1(0) | \
+					 _rDMA_TS_FIFO_WM_G0N2(0) | \
+					 _rDMA_TS_FIFO_WM_G0N3(0))
+#define  _rDMA_TS_FIFO_WM1_INIT		(_rDMA_TS_FIFO_WM_G0N4(0) | \
+					 _rDMA_TS_FIFO_WM_G0N5(0) | \
+					 _rDMA_TS_FIFO_WM_G0P(0))
+#define  _rDMA_TS_FIFO_WM2_INIT		(_rDMA_TS_FIFO_WM_G1N0(0) | \
+					 _rDMA_TS_FIFO_WM_G1N1(0) | \
+					 _rDMA_TS_FIFO_WM_G1N2(0) | \
+					 _rDMA_TS_FIFO_WM_G1N3(0))
+#define  _rDMA_TS_FIFO_WM3_INIT		(_rDMA_TS_FIFO_WM_G1N4(0) | \
+					 _rDMA_TS_FIFO_WM_G1N5(0) | \
+					 _rDMA_TS_FIFO_WM_G1P(0))
+
+#define _BGP_DCR_rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY       (_BGP_DCR_DMA+0x2b)
+#define  _rDMA_LOCAL_FIFO_WM(x)		_B7(7,(x))	 /*  bit {1..7}, local fifo watermark, must be 0 */
+#define  _rDMA_HP_REC_FIFO_RPT_CNT(x)	_B4(11,(x))	 /*  bit {8..11}, dma repeat count for torus high priority reception fifos */
+#define  _rDMA_NP_REC_FIFO_RPT_CNT(x)	_B4(15,(x))	 /*  bit {12..15}, dma repeat count for torus normal priority reception fifos */
+#define  _rDMA_DELAY(x)			_B4(23,(x))	 /*  bit {20..23}, dma delay this amount of clock_x2 cycles between packets */
+
+#define  _rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT	(_rDMA_LOCAL_FIFO_WM(0) | \
+						 _rDMA_HP_REC_FIFO_RPT_CNT(0) | \
+						 _rDMA_NP_REC_FIFO_RPT_CNT(0) | \
+						 _rDMA_DELAY(0))
+
+/*  i in the interval [0:3] */
+#define _BGP_DCR_iDMA_FIFO_ENABLE(i)                    (_BGP_DCR_DMA+(0x2c+(i)))  /*  each bit, if '1', enables an injection fifo */
+#define _BGP_DCR_rDMA_FIFO_ENABLE                       (_BGP_DCR_DMA+0x30)	 /*  each bit, if '1', enables a reception fifo */
+#define _BGP_DCR_rDMA_FIFO_ENABLE_HEADER                (_BGP_DCR_DMA+0x31)
+#define  _rDMA_FIFO_ENABLE_HEADER0	_BN(28)
+#define  _rDMA_FIFO_ENABLE_HEADER1	_BN(29)
+#define  _rDMA_FIFO_ENABLE_HEADER2	_BN(30)
+#define  _rDMA_FIFO_ENABLE_HEADER3	_BN(31)
+
+/*  i in the interval [0:3] */
+#define _BGP_DCR_iDMA_FIFO_PRIORITY(i)                  (_BGP_DCR_DMA+(0x32+(i)))
+#define _BGP_DCR_iDMA_FIFO_RGET_THRESHOLD               (_BGP_DCR_DMA+0x36)
+#define _BGP_DCR_iDMA_SERVICE_QUANTA                    (_BGP_DCR_DMA+0x37)
+#define  _iDMA_SERVICE_QUANTA_HP(x)	_B16(15,(x))
+#define  _iDMA_SERVICE_QUANTA_NP(x)	_B16(31,(x))
+#define  _iDMA_SERVICE_QUANTA_INIT	(_iDMA_SERVICE_QUANTA_HP(0) | _iDMA_SERVICE_QUANTA_NP(0))
+
+#define _BGP_DCR_rDMA_FIFO_TYPE                         (_BGP_DCR_DMA+0x38)
+#define _BGP_DCR_rDMA_FIFO_TYPE_HEADER                  (_BGP_DCR_DMA+0x39)
+#define  _rDMA_FIFO_TYPE_HEADER0	_BN(28)
+#define  _rDMA_FIFO_TYPE_HEADER1	_BN(29)
+#define  _rDMA_FIFO_TYPE_HEADER2	_BN(30)
+#define  _rDMA_FIFO_TYPE_HEADER3	_BN(31)
+#define _BGP_DCR_rDMA_FIFO_THRESH0                      (_BGP_DCR_DMA+0x3a)
+#define _BGP_DCR_rDMA_FIFO_THRESH1                      (_BGP_DCR_DMA+0x3b)
+
+/*  k in the interval [0:31] */
+#define _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k)                (_BGP_DCR_DMA+(0x3c+(k)))   /*  8 bits for every dma injection fifo */
+/* @ Dong, for MG, is the following line good? */
+/*  j in the interval [0:3] */
+#define  _iDMA_TS_INJ_FIFO_MAP_FIELD(j, x)	_B8((7+(j)*8), (x))
+/*  i in the interval [0:3] */
+#define _BGP_DCR_iDMA_LOCAL_COPY(i)                     (_BGP_DCR_DMA+(0x5c+(i)))   /*  one bit for every dma injection fifo */
+
+/*  XY  = X, Y */
+/*  ZHL = Z, High Priority, Local Copy */
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY	(_BGP_DCR_DMA+0x60)  /*  torus recv group 0, (pid0, pid1) = "00" */
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL	(_BGP_DCR_DMA+0x61)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY	(_BGP_DCR_DMA+0x62)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL	(_BGP_DCR_DMA+0x63)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY       (_BGP_DCR_DMA+0x64)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL      (_BGP_DCR_DMA+0x65)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY       (_BGP_DCR_DMA+0x66)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL      (_BGP_DCR_DMA+0x67)
+#define  _rDMA_TS_REC_FIFO_MAP_XP(x)		_B8(7,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_XM(x)		_B8(15,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_YP(x)		_B8(23,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_YM(x)		_B8(31,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_ZP(x)		_B8(7,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_ZM(x)		_B8(15,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_HIGH(x)		_B8(23,(x))
+#define  _rDMA_TS_REC_FIFO_MAP_LOCAL(x)		_B8(31,(x))
+
+/*  ii in the interval [0:3]  group 0, group 1, ..., group 3 */
+#define _BGP_DCR_rDMA_FIFO_CLEAR_MASK(ii)               (_BGP_DCR_DMA+(0x68+(ii)))
+#define  _rDMA_FIFO_CLEAR_MASK0_INIT		0xFF000000
+#define  _rDMA_FIFO_CLEAR_MASK1_INIT		0x00FF0000
+#define  _rDMA_FIFO_CLEAR_MASK2_INIT		0x0000FF00
+#define  _rDMA_FIFO_CLEAR_MASK3_INIT		0x000000FF
+#define _BGP_DCR_rDMA_FIFO_HEADER_CLEAR_MASK            (_BGP_DCR_DMA+0x6c)
+#define  _rDMA_FIFO_HEADER_CLEAR_MASK_INIT	0x08040201
+
+/*  g in the interval [0:3]  group 0, group 1, group2, and group 3 */
+#define _BGP_DCR_iDMA_FIFO_INT_ENABLE_GROUP(g)		(_BGP_DCR_DMA+(0x6d+(g)))
+/*  t in the interval [0:3]  type 0, type 1, ..., type 3 */
+#define _BGP_DCR_rDMA_FIFO_INT_ENABLE_TYPE(t)		(_BGP_DCR_DMA+(0x71+(t)))
+#define _BGP_DCR_rDMA_HEADER_FIFO_INT_ENABLE		(_BGP_DCR_DMA+0x75)
+#define  _rDMA_HEADER_HEADER_FIFO_INT_ENABLE_TYPE(t,x)	_B4((7+(t)*8), (x))
+
+/*  g in the interval [0:3]  group 0, group 1, ..., group 3 */
+#define _BGP_DCR_iDMA_COUNTER_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x76+(g)))
+
+/*  g in the interval [0:3]  group 0, group 1, ..., group 3 */
+#define _BGP_DCR_rDMA_COUNTER_INT_ENABLE_GROUP(g) 	(_BGP_DCR_DMA+(0x7a+(g)))
+
+/*  ---------------------------- */
+/*  ---- Fatal Error Enables ----- */
+/*  ---------------------------- */
+/*  e in the interval [0:3], bit definition in the fatal errors at 0x93 - 0x96 */
+#define _BGP_DCR_DMA_FATAL_ERROR_ENABLE(e)		(_BGP_DCR_DMA +(0x7e +(e)))
+
+/*  ------------------------------- */
+/*  ---- Backdoor Access Regs ----- */
+/*  ------------------------------- */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_CTRL		(_BGP_DCR_DMA+0x82)
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_ENABLE	_BN(0)	 /*  if '1', enable backdoor read/write */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_NOECC	_BN(1)	 /*  if '1', do not do ECC on backdoor read/write */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_RD_REQ	_BN(2)	 /*  if '1', do read */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_WR_REQ	_BN(3)	 /*  if '1', do write */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_SEL	_BN(4)	 /*  unit select, '0' local fifo, '1' imfu descriptor */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_LF_ADDR(x)	_B7(15,(x))	 /*  7 bit sram address for local fifo */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_ADDR(x)	_B8(15,(x))	 /*  8 bit sram address for imfu descriptor */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC0(x)	_B8(23,(x))	 /*  8 bit write ECC for data bits 0 to 63 */
+#define  _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC1(x)	_B8(31,(x))	 /*  8 bit write ECC for data bits 64 to 128 */
+/*  i in the interval [0:3] */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BACKDOOR_WR_DATA(i)   (_BGP_DCR_DMA+(0x83+(i)))  /* 128 bit backdoor write data */
+#define _BGP_DCR_DMA_ARRAY_BD_CTRL			(_BGP_DCR_DMA+0x87)   /*  fifo/counter array backdoor control */
+#define  _DMA_ARRAY_BD_CTRL_ENABLE		_BN(0)
+#define  _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_FIFO	_B2(2,0)	 /*  unit select for backdoor read */
+#define  _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_COUNTER	_B2(2,1)
+#define  _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_FIFO	_B2(2,2)
+#define  _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_COUNTER	_B2(2,3)
+#define  _DMA_ARRAY_BD_CTRL_WR_ECC(x)		_B7(15,(x))
+
+/*  ------------------------------------- */
+/*  ---- Torus Link Checker Control ----- */
+/*  ------------------------------------- */
+#define _BGP_DCR_DMA_TS_LINK_CHK_CTRL			(_BGP_DCR_DMA+0x88)
+#define  _DMA_TS_LINK_CHK_CTRL_SEL(x)		_B3(2,(x))	 /*  0 - xp, 1 - xm, 2 - yp, 3 - ym, 4 - zp, 5 - zm, 6, 7 disable */
+#define  _DMA_TS_LINK_CHK_CTRL_RW_ENABLE	_BN(8)		 /*  if 1, enable read/write to link checker internal sram */
+#define  _DMA_TS_LINK_CHK_CTRL_WR_REQ		_BN(12)
+#define  _DMA_TS_LINK_CHK_CTRL_RD_REQ		_BN(13)
+#define  _DMA_TS_LINK_CHK_CTRL_ADDR(x)		_B10(23,(x))
+#define  _DMA_TS_LINK_CHK_CTRL_WR_DATA(x)	_B8(31,(x))
+#define  _DMA_TS_LINK_CHK_BAD_OFFSET            (0)           /*  sram address where bad packet starts */
+#define  _DMA_TS_LINK_CHK_GOOD_OFFSET           (320)         /*  sram address where good packet starts */
+
+
+/*  -------------------- */
+/*  ---- Threshold ----- */
+/*  -------------------- */
+#define _BGP_DCR_DMA_CE_COUNT_THRESHOLD			(_BGP_DCR_DMA+0x89)  /*  correctable ecc error count threshold, reset to 0xFFFFFFFF */
+/*  default used when system comes out of reset, will have to be tuned */
+#define  _BGP_DCR_DMA_CE_COUNT_THRESHOLD_INIT  1
+
+/*  ---------------------------------- */
+/*  ---- Correctable error count ----- */
+/*  ---------------------------------- */
+/*  c in the interval [0:8]  count 0, count 1, ..., count 8 */
+#define _BGP_DCR_DMA_CE_COUNT(c)			(_BGP_DCR_DMA+(0x8A+(c)))
+#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO0			(_BGP_DCR_DMA+0x8A)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO1			(_BGP_DCR_DMA+0x8B)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_COUNTER		(_BGP_DCR_DMA+0x8C)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_DESC                  (_BGP_DCR_DMA+0x8D)
+#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO0                 (_BGP_DCR_DMA+0x8E)
+#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO1                 (_BGP_DCR_DMA+0x8F)
+#define _BGP_DCR_DMA_CE_COUNT_REC_COUNTER               (_BGP_DCR_DMA+0x90)
+#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO0               (_BGP_DCR_DMA+0x91)
+#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO1               (_BGP_DCR_DMA+0x92)
+
+/*  upon termination, create RAS event if any of the above counts are greater than this value */
+#define _BGP_DCR_DMA_CE_TERM_THRESH  0
+
+/*  ----------------- */
+/*  ---- Status ----- */
+/*  ----------------- */
+/*  e in the interval [0:3]  error0, error1, ..., error 3 */
+#define _BGP_DCR_DMA_FATAL_ERROR(e)                     (_BGP_DCR_DMA+(0x93+(e)))
+
+/*  Below are are error conditions most likely caused by software */
+#define _BGP_DCR_DMA_FATAL_ERROR0_WR0_MSB  _BN(4)    /*  pque wr0 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_RD0_MSB  _BN(8)    /*  pque rd0 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_WR1_MSB  _BN(12)   /*  pque wr1 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_RD1_MSB  _BN(16)   /*  pque rd1 msb not 0 */
+
+#define _BGP_DCR_DMA_FATAL_ERROR1_REC_MAP  _BN(22)   /*   multiple bits set for the dcr rec fifo map */
+
+
+#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL   _BN(14)  /*  fifo_sel_n error */
+#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL_FORM  _BN(15)  /*  fifo_sel_n_form error */
+#define _BGP_DCR_DMA_FATAL_ERROR2_READ_RANGE _BN(25)  /*  read from address not in one of dcr address ranges */
+
+#define _BGP_DCR_DMA_FATAL_ERROR3_DPUT_SIZE   _BN(8)   /*  direct put packet had greater than 240 bytes */
+#define _BGP_DCR_DMA_FATAL_ERROR3_RGET_SIZE   _BN(9)   /*  remote get packet had greater than 240 bytes */
+#define _BGP_DCR_DMA_FATAL_ERROR3_MAX_ADDRESS _BN(18)  /*  write to address larger than counter max */
+#define _BGP_DCR_DMA_FATAL_ERROR3_WRITE_RANGE _BN(26)  /*  write to address not in one of dcr address ranges */
+
+#define _BGP_DCR_DMA_PQUE_WR0_BAD_ADDR                  (_BGP_DCR_DMA+0x97)
+#define _BGP_DCR_DMA_PQUE_RD0_BAD_ADDR                  (_BGP_DCR_DMA+0x98)
+#define _BGP_DCR_DMA_PQUE_WR1_BAD_ADDR                  (_BGP_DCR_DMA+0x99)
+#define _BGP_DCR_DMA_PQUE_RD1_BAD_ADDR                  (_BGP_DCR_DMA+0x9a)
+
+#define _BGP_DCR_DMA_MFU_STAT0                          (_BGP_DCR_DMA+0x9b)
+#define  _DMA_MFU_STAT0_IMFU_NOT_ENABLED_COUNTER_ID(x)	_G8((x), 7)		 /*  idma not enabled counter id */
+#define  _DMA_MFU_STAT0_IMFU_UNDERFLOW_COUNTER_ID(x)	_G8((x), 15)		 /*  idma underflow counter id */
+#define  _DMA_MFU_STAT0_IMFU_OVERFLOW_NB_ADDR(x)	_G16((x), 31)		 /*  idma netbus addr that caused counter overflow */
+#define _BGP_DCR_DMA_MFU_STAT1                          (_BGP_DCR_DMA+0x9c)
+#define  _DMA_MFU_STAT1_IMFU_CUR_FIFO_ID(x)		_G7((x), 7)		 /*  current fifo id that idma is working on */
+#define  _DMA_MFU_STAT1_RMFU_UNDERFLOW_COUNTER_ID(x)	_G8((x), 15)		 /*  rdma underflow counter id */
+#define  _DMA_MFU_STAT1_RMFU_OVERFLOW_NB_ADDR(x)	_G16((x), 31)		 /*  rdma netbus addr that caused counter overflow */
+#define _BGP_DCR_DMA_MFU_STAT2                          (_BGP_DCR_DMA+0x9d)
+#define  _DMA_MFU_STAT2_RMFU_FIFO_NE_OR_NA(x)		_GN((x), 0)		 /*  rdma fifo not enabled or not all_available */
+#define  _DMA_MFU_STAT2_RMFU_HDR_FIFO_NE_OR_NA(x)	_GN((x), 1)		 /*  rdma header fifo not enabled or not all_available */
+#define  _DMA_MFU_STAT2_RMFU_INJ_FIFO_NE_OR_NA(x)	_GN((x), 2)		 /*  rdma injection fifo for remote get not enabled or not all_available */
+#define  _DMA_MFU_STAT2_RMFU_COUNTER_NE(x)		_GN((x), 3)		 /*  rdma accessing not enabled counter */
+#define  _DMA_MFU_STAT2_RMFU_PKT_PID(x)			_G2((x), 7)		 /*  rdma receiving packet pid */
+#define  _DMA_MFU_STAT2_RMFU_FIFO_BIT(x)		_G8((x), 15)		 /*  rdma receiving packet fifo bit, only one bit should be set */
+										 /*  bit orders are xp, xm, yp, ym, zp, zm, hp, local */
+#define  _DMA_MFU_STAT2_RMFU_RGET_FIFO_ID(x)		_G8((x), 23)		 /*  rdma remote get (injection) fifo id */
+#define  _DMA_MFU_STAT2_RMFU_COUNTER_ID(x)		_G8((x), 31)		 /*  rdma direct put counter id */
+#define _BGP_DCR_DMA_L3_RD_ERROR_ADDR                   (_BGP_DCR_DMA+0x9e)
+#define _BGP_DCR_DMA_L3_WR_ERROR_ADDR                   (_BGP_DCR_DMA+0x9f)
+
+/*  i in the interval [0:3] */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_DATA(i)		(_BGP_DCR_DMA+(0xa0+(i)))
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_ECC             (_BGP_DCR_DMA+0xa4)
+#define  _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD0(x)		_G8((x),23)		 /*  ecc for data bits 0 to 63 */
+#define  _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD1(x)		_G8((x),31)		 /*  ecc for data bits 64 to 127 */
+#define _BGP_DCR_DMA_ARRAY_RD_ECC                       (_BGP_DCR_DMA+0xa5)
+#define  _DMA_ARRAY_RD_ECC_WORD0(x)			_G7((x), 7)		 /*  word address offset 0 */
+#define  _DMA_ARRAY_RD_ECC_WORD1(x)			_G7((x), 15)		 /*  word address offset 1 */
+#define  _DMA_ARRAY_RD_ECC_WORD2(x)			_G7((x), 23)		 /*  word address offset 2 */
+#define  _DMA_ARRAY_RD_ECC_WORD3(x)			_G7((x), 31)		 /*  word address offset 3 */
+#define _BGP_DCR_DMA_TS_LINK_CHK_STAT                   (_BGP_DCR_DMA+0xa6)
+#define  _DMA_TS_LINK_CHK_STAT_PKT_CAPTURED(x)		_GN((x), 0) 		 /*  bad packet captured flag */
+#define  _DMA_TS_LINK_CHK_STAT_RECV_PIPE_FERR(x)	_GN((x), 1) 		 /*  receive pipe fatal error */
+#define  _DMA_TS_LINK_CHK_STAT_STATE(x)			_G4((x), 7) 		 /*  state machine state */
+#define  _DMA_TS_LINK_CHK_STAT_SRAM_ADDR(x)		_G10((x), 23) 		 /*  current sram read or write address */
+#define  _DMA_TS_LINK_CHK_STAT_SRAM_RD_DATA(x)		_G8((x), 31) 		 /*  sram read data */
+
+/*  ---- Debug ----- */
+/*  i in the interval [0:3] */
+#define _BGP_DCR_DMA_iFIFO_DESC_RD_FLAG(i)              (_BGP_DCR_DMA+(0xa7+(i)))
+/*  j in the interval [0:1] */
+#define _BGP_DCR_DMA_INTERNAL_STATE(j)                  (_BGP_DCR_DMA+(0xab+(j)))
+#define  _DMA_INTERNAL_STATE0_IMFU_SEL_STATE(x)		_G3((x), 2)
+#define  _DMA_INTERNAL_STATE0_IMFU_ARB_STATE(x)		_G5((x), 7)
+#define  _DMA_INTERNAL_STATE0_IMFU_FIFO_ARB_STATE(x)	_G5((x), 12)
+#define  _DMA_INTERNAL_STATE0_IMFU_CNT_ARB_STATE(x)	_G4((x), 16)
+#define  _DMA_INTERNAL_STATE0_RMFU_ARB_STATE(x)		_G5((x), 23)
+#define  _DMA_INTERNAL_STATE0_RMFU_FIFO_ARB_STATE(x)	_G4((x), 27)
+#define  _DMA_INTERNAL_STATE0_RMFU_CNT_ARB_STATE(x)	_G4((x), 31)
+
+#define  _DMA_INTERNAL_STATE1_PQUE_ARB_STATE(x)		_G3((x), 2)
+#define  _DMA_INTERNAL_STATE1_ICONU_SM_STATE(x)		_G4((x), 6)
+#define  _DMA_INTERNAL_STATE1_IFSU_SM_STATE(x)		_G3((x), 9)
+#define  _DMA_INTERNAL_STATE1_IDAU_L3RSM_STATE(x)	_G3((x), 12)
+#define  _DMA_INTERNAL_STATE1_IDAU_L3VSM_STATE(x)	_G3((x), 15)
+#define  _DMA_INTERNAL_STATE1_IDAU_TTSM_STATE(x)	_G3((x), 18)
+#define  _DMA_INTERNAL_STATE1_RCONU_SM_STATE(x)		_G4((x), 22)
+#define  _DMA_INTERNAL_STATE1_RFSU_SM_STATE(x)		_G3((x), 25)
+#define  _DMA_INTERNAL_STATE1_RDAU_QRSM_STATE(x)	_G3((x), 28)
+#define  _DMA_INTERNAL_STATE1_RDAU_L3SM_STATE(x)	_G3((x), 31)
+
+/*  values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */
+#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE               (0x21088111)
+
+/*  values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */
+#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE               (0x21088111)
+#define _BGP_DCR_DMA_INTERNAL_STATE_1_IDLE               (0x22492249)
+
+#define _BGP_DCR_DMA_PQUE_POINTER                       (_BGP_DCR_DMA+0xad)
+#define  _DMA_PQUE_POINTER_WR0_BEGIN(x)			_G4((x),3)
+#define  _DMA_PQUE_POINTER_WR0_END(x)			_G4((x),7)
+#define  _DMA_PQUE_POINTER_RD0_BEGIN(x)			_G4((x),11)
+#define  _DMA_PQUE_POINTER_RD0_END(x)			_G4((x),15)
+#define  _DMA_PQUE_POINTER_WR1_BEGIN(x)			_G4((x),19)
+#define  _DMA_PQUE_POINTER_WR1_END(x)			_G4((x),23)
+#define  _DMA_PQUE_POINTER_RD1_BEGIN(x)			_G4((x),27)
+#define  _DMA_PQUE_POINTER_RD1_END(x)			_G4((x),31)
+#define _BGP_DCR_DMA_LOCAL_FIFO_POINTER                 (_BGP_DCR_DMA+0xae)
+#define  _DMA_LOCAL_FIFO_POINTER_BEGIN(x)		_G8((x),7)
+#define  _DMA_LOCAL_FIFO_POINTER_END(x)			_G8((x),15)
+#define  _DMA_LOCAL_FIFO_POINTER_END_OF_PKT(x)		_G8((x),23)
+#define _BGP_DCR_DMA_WARN_ERROR                         (_BGP_DCR_DMA+0xaf)
+
+/*  offsets  0xb0 are reserved */
+
+/*  ---- Clears ----- */
+#define _BGP_DCR_DMA_CLEAR0                             (_BGP_DCR_DMA+0xb1)
+#define  _DMA_CLEAR0_IMFU_ARB_WERR		_BN(0)
+#define  _DMA_CLEAR0_IMFU_COUNTER_UNDERFLOW	_BN(1)
+#define  _DMA_CLEAR0_IMFU_COUNTER_OVERFLOW	_BN(2)
+#define  _DMA_CLEAR0_RMFU_COUNTER_UNDERFLOW	_BN(3)
+#define  _DMA_CLEAR0_RMFU_COUNTER_OVERFLOW	_BN(4)
+#define  _DMA_CLEAR0_RMFU_ARB_WERR		_BN(5)
+#define  _DMA_CLEAR0_PQUE_WR0_BEN_WERR		_BN(6)
+#define  _DMA_CLEAR0_PQUE_WR0_ADDR_CHK_WERR	_BN(7)
+#define  _DMA_CLEAR0_PQUE_RD0_ADDR_CHK_WERR	_BN(8)
+#define  _DMA_CLEAR0_PQUE_WR1_BEN_WERR		_BN(9)
+#define  _DMA_CLEAR0_PQUE_WR1_ADDR_CHK_WERR	_BN(10)
+#define  _DMA_CLEAR0_PQUE_RD1_ADDR_CHK_WERR	_BN(11)
+#define  _DMA_CLEAR0_PQUE_WR0_HOLD_BAD_ADDR	_BN(12)
+#define  _DMA_CLEAR0_PQUE_RD0_HOLD_BAD_ADDR	_BN(13)
+#define  _DMA_CLEAR0_PQUE_WR1_HOLD_BAD_ADDR	_BN(14)
+#define  _DMA_CLEAR0_PQUE_RD1_HOLD_BAD_ADDR	_BN(15)
+#define  _DMA_CLEAR0_IFIFO_ARRAY_UE0		_BN(16)
+#define  _DMA_CLEAR0_IFIFO_ARRAY_UE1		_BN(17)
+#define  _DMA_CLEAR0_ICOUNTER_ARRAY_UE		_BN(18)
+#define  _DMA_CLEAR0_IMFU_DESC_UE		_BN(19)
+#define  _DMA_CLEAR0_RFIFO_ARRAY_UE0		_BN(20)
+#define  _DMA_CLEAR0_RFIFO_ARRAY_UE1		_BN(21)
+#define  _DMA_CLEAR0_RCOUNTER_ARRAY_UE		_BN(22)
+#define  _DMA_CLEAR0_LOCAL_FIFO_UE0		_BN(23)
+#define  _DMA_CLEAR0_LOCAL_FIFO_UE1		_BN(24)
+
+#define _BGP_DCR_DMA_CLEAR1                             (_BGP_DCR_DMA+0xb2)
+#define  _DMA_CLEAR1_TS_LINK_CHK		_BN(0)
+
+
+#endif
diff --git a/drivers/net/bgp_collective/ppc450.h b/drivers/net/bgp_collective/ppc450.h
new file mode 100644
index 0000000..0f312cb
--- /dev/null
+++ b/drivers/net/bgp_collective/ppc450.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#ifndef __DRIVERS__BLUEGENE__PPC450_H__
+#define __DRIVERS__BLUEGENE__PPC450_H__
+
+/*  include asm instruction macros */
+/* #include <asm/ppc450.h> */
+
+/**********************************************************************
+ * DCR access wrapper
+ **********************************************************************/
+
+extern inline uint32_t mfdcrx(uint32_t dcrn)
+{
+    uint32_t value;
+    asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory");
+    return value;
+}
+
+extern inline void mtdcrx(uint32_t dcrn, uint32_t value)
+{
+    asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory");
+}
+
+/*  volatile 32bit read */
+extern inline uint32_t in_be32_nosync(uint32_t *vaddr)
+{
+   volatile uint32_t *va = (volatile uint32_t *) vaddr;
+    /* _bgp_mbar(); */
+   return *va;
+}
+
+
+/**********************************************************************
+ * Helper functions to access IO via double hummer
+ **********************************************************************/
+
+extern inline void fpu_memcpy_16(void *dst, void *src)
+{
+    asm volatile("lfpdx 0,0,%0\n"
+		 "stfpdx 0,0,%1\n"
+		 :
+		 : "b"(src), "b"(dst)
+		 : "fr0", "memory");
+}
+
+extern inline void out_be128(void *port, void *ptrval)
+{
+    u32 tmp[4] __attribute__((aligned(16)));
+
+    if ((u32)ptrval & 0xf) {
+	memcpy(tmp, ptrval, 16);
+	ptrval = tmp;
+    }
+
+    fpu_memcpy_16(port, ptrval);
+}
+
+extern inline void outs_be128(void *port, void *src, unsigned num)
+{
+    u32 tmp[4] __attribute__((aligned(16)));
+
+     /*  port must be 16 byte aligned */
+    BUG_ON((u32)port & 0xf);
+
+    if (unlikely((u32)src & 0xf)) {
+	 /*  unaligned destination */
+	while(num--) {
+	    memcpy(tmp, src, 16);
+	    fpu_memcpy_16(port, tmp);
+	    src += 16;
+	}
+    } else {
+	while(num--) {
+	    fpu_memcpy_16(port, src);
+	    src += 16;
+	}
+    }
+}
+
+extern inline void outs_zero128(void *port, unsigned num)
+{
+    static u32 zero[4] __attribute__((aligned(16))) = {0, };
+    BUG_ON((u32)port & 0xf);
+
+    while (num--)
+	out_be128(port, zero);
+}
+
+/*
+ * in string operation similar to x86: reads block of data from port
+ * into memory
+ */
+extern inline void ins_be128(void *dest, void *port, unsigned num)
+{
+    u32 tmp[4] __attribute__((aligned(16)));
+
+     /*  port must be 16 byte aligned */
+    BUG_ON((u32)port & 0xf);
+
+    if ((u32)dest & 0xf)
+    {
+	 /*  unaligned destination */
+	while(num--) {
+	    fpu_memcpy_16(tmp, port);
+	    memcpy(dest, tmp, 16);
+	    dest += 16;
+	}
+    }
+    else
+    {
+	while(num--) {
+	    fpu_memcpy_16(dest, port);
+	    dest += 16;
+	}
+    }
+}
+
+extern inline void in_be128(void *dest, void *port)
+{
+    char tmp[16] __attribute__((aligned(16)));
+    void *ptr = dest;
+
+    if ((u32)dest & 0xf)
+	ptr = tmp;
+
+    fpu_memcpy_16(ptr, port);
+
+    if ((u32)dest & 0xf)
+	memcpy(dest, tmp, 16);
+}
+
+#endif /* !__DRIVERS__BLUEGENE__PPC450_H__ */
diff --git a/drivers/net/bgp_e10000/Makefile b/drivers/net/bgp_e10000/Makefile
new file mode 100644
index 0000000..c33c97e
--- /dev/null
+++ b/drivers/net/bgp_e10000/Makefile
@@ -0,0 +1,5 @@
+# Makefile for BlueGene/P 10 GbE driver
+
+obj-$(CONFIG_BGP_E10000) += bgp_e10000.o
+
+bgp_e10000-objs	:= bgp_tomal.o bgp_emac.o bgp_e10000_main.o
diff --git a/drivers/net/bgp_e10000/bgp_e10000.h b/drivers/net/bgp_e10000/bgp_e10000.h
new file mode 100644
index 0000000..4049cb4
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_e10000.h
@@ -0,0 +1,175 @@
+/*
+ * bgp_e10000.h: common header file for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/proc_fs.h>
+#include <asm/io.h>
+
+#ifndef _BGP_E10000_H
+#define _BGP_E10000_H
+
+#define DBG_LEVEL1      1
+#define DBG_LEVEL2      (DBG_LEVEL1 | 2)
+#define DBG_LEVEL3      (DBG_LEVEL2 | 4)
+#define DBG_E10000	8
+#define DBG_EMAC	16
+#define DBG_TOMAL	32
+#define DBG_XSGS	64
+#define DBG_DEVBUS	128
+#define DBG_NAPI        256
+#define DBG_SCATTERGATHER          512
+
+#define BGP_E10000_MIN_MTU 256
+#define BGP_E10000_MAX_MTU 9000
+#define BGP_E10000_FCS_SIZE 4
+
+
+#ifdef CONFIG_BGP_E10000_DBG
+#include <asm/udbg.h>
+#define PRINTK(detail, format, args...) if (((detail) & CONFIG_BGP_E10000_DBG_LEVEL) == (detail)) udbg_printf("%s: " format, __FUNCTION__, ##args)
+#else
+#define PRINTK(detail, format, args...)
+#endif
+
+typedef unsigned char U8;
+typedef unsigned short U16;
+typedef unsigned int U32;
+typedef unsigned long long U64;
+
+
+typedef enum {
+	e10000_ras_none			= 0x00,
+	e10000_ras_hw_not_found		= 0x01,
+	e10000_ras_netdev_alloc_failure	= 0x02,
+	e10000_ras_netdev_reg_failure	= 0x03,
+	e10000_ras_mtu_invalid		= 0x04,
+	e10000_ras_tx_timeout		= 0x05,
+	e10000_ras_internal_error	= 0x07,
+	e10000_ras_hw_failure		= 0x09,
+	e10000_ras_link_error		= 0x0a,
+	e10000_ras_phy_reset_error	= 0x0b,
+	e10000_ras_emac_config_error	= 0x0c,
+	e10000_ras_link_loss		= 0x0d,
+
+	e10000_ras_max			= 0xff
+} e10000_ras_id;
+
+
+typedef struct _E10000_PROC_ENTRY {
+					char* name;
+					void* addr;
+					struct proc_dir_entry* entry;
+} E10000_PROC_ENTRY;
+
+
+
+/*  Generates a RAS event for ethernet. */
+void e10000_printr(U16 subComponent,
+	    	   U16 id,
+	    	   char* format,
+	    	   ...);
+
+
+static inline U32 mfdcrx(U32 dcrNum)
+{
+        U32 dcrVal = 0;
+
+        asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory");
+
+        return dcrVal;
+}
+
+
+static inline void mtdcrx(U32 dcrNum,
+                          U32 dcrVal)
+{
+        asm volatile ("mtdcrx %0,%1": :"r" (dcrNum), "r" (dcrVal) : "memory");
+        isync();
+
+        return;
+}
+
+
+static inline void msync(void)
+{
+	do { asm volatile ("msync" : : : "memory"); } while(0);
+
+	return;
+}
+
+
+static inline int e10000_proc_read(char* page,
+                   		   char** start,
+                   		   off_t off,
+				   int count,
+				   int* eof,
+				   void* data)
+{
+        int rc = 0;
+        int value;
+
+	 /*  Read the value of the associated address and print it. */
+	value = in_be32(data);
+        rc = snprintf(page, count, "%08x\n", value);
+
+        *eof = 1;
+
+        return rc;
+}
+
+
+static inline int e10000_proc_write(struct file* file,
+				    const char* buffer,
+				    unsigned long len,
+				    void* data)
+{
+        unsigned int value;
+        char valStr[128];
+        int strLen = sizeof(valStr)-1;
+
+        if (strLen > len)
+                strLen = len;
+        if (copy_from_user(valStr, buffer, strLen))
+                return -EFAULT;
+        else if (len) {
+		char* endp;
+
+                 /*  NULL terminate the string of digits and convert to its numeric value. */
+                if (valStr[strLen-1] == '\n')
+                        strLen--;
+                valStr[strLen] = '\0';
+                value = simple_strtoul(valStr, &endp, 0);
+
+		 /*  Write the value to the associated address. */
+		out_be32(data, value);
+        }
+
+        return len;
+}
+
+
+static inline struct proc_dir_entry* e10000_create_proc_entry(struct proc_dir_entry* dir,
+							      char* name,
+                          				      void* addr)
+{
+        struct proc_dir_entry* entry = create_proc_entry(name, S_IRUGO, dir);
+        if (entry) {
+                entry->nlink = 1;
+                entry->read_proc = e10000_proc_read;
+                entry->write_proc = e10000_proc_write;
+                entry->data = addr;
+        }
+
+        return entry;
+}
+
+#endif
diff --git a/drivers/net/bgp_e10000/bgp_e10000_main.c b/drivers/net/bgp_e10000/bgp_e10000_main.c
new file mode 100644
index 0000000..b8d79d3
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_e10000_main.c
@@ -0,0 +1,568 @@
+/*
+ * bgp_e10000_main.c: net_device source for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <asm/reg_booke.h>
+#include <linux/proc_fs.h>
+#include <stdarg.h>
+#include <asm/bluegene_ras.h>
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+
+#include "bgp_e10000.h"
+#include "bgp_emac.h"
+#include "bgp_tomal.h"
+
+
+static int e10000_change_mtu(struct net_device*, int);
+static int e10000_do_ioctl(struct net_device*, struct ifreq*, int);
+static struct net_device_stats* e10000_get_stats(struct net_device*);
+static int e10000_hard_start_xmit(struct sk_buff*, struct net_device*);
+static int e10000_open(struct net_device*);
+//static void e10000_set_multicast_list(struct net_device*);
+static int e10000_stop(struct net_device*);
+static void e10000_tx_timeout(struct net_device*);
+static int e10000_set_mac_address(struct net_device* netDev, void* macAddr);
+static void e10000_link_test(unsigned long);
+
+static struct net_device* e10000NetDev;
+static struct timer_list e10000LinkTimer;
+static const struct net_device_ops e10000NetDevOps = {
+        .ndo_open               = e10000_open,
+        .ndo_stop               = e10000_stop,
+        .ndo_start_xmit         = e10000_hard_start_xmit,
+        .ndo_get_stats          = e10000_get_stats,
+        .ndo_set_mac_address    = e10000_set_mac_address,
+        .ndo_tx_timeout         = e10000_tx_timeout,
+        .ndo_change_mtu         = e10000_change_mtu,
+        .ndo_do_ioctl           = e10000_do_ioctl,
+};
+
+static BGP_Personality_t bgpers;
+static void* e10000DevMapAddr;
+static unsigned int e10000DevMapLen;
+
+static int __init
+	e10000_init(void)
+{
+        int rc = 0;
+        TOMAL* tomal = NULL;
+	EMAC* emac = NULL;
+	struct proc_dir_entry* e10000Dir;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n");
+
+	 /*  Determine if Ethernet HW is present. */
+	bluegene_getPersonality((void*) &bgpers, sizeof(bgpers));
+	if (bgpers.Network_Config.RankInPSet) {   /*  No HW so exit. */
+		rc = -ENODEV;
+		goto end;
+	}
+
+	 /*  Allocate ethernet device(s). */
+	e10000NetDev = alloc_etherdev(sizeof(EMAC));
+        if (!e10000NetDev) {
+		e10000_printr(bg_subcomp_linux, e10000_ras_netdev_alloc_failure,
+			      "Failure allocating ethernet device.");
+                rc = -ENOMEM;
+                goto end;
+        }
+
+         /*  Create /proc directory. */
+        e10000Dir = proc_mkdir("driver/e10000", NULL);
+
+	 /*  Create mapping for TOMAL and XEMAC devices.  Since they are close in memory one mapping with */
+	 /*  a small hole in between will cover both.  Tell CNS where XEMAC is mapped. */
+	e10000DevMapLen = XEMAC_BASE_ADDRESS + sizeof(XEMACRegs) - TOMAL_BASE_ADDRESS;
+	e10000DevMapAddr = ioremap(TOMAL_BASE_ADDRESS, e10000DevMapLen);
+	if (!e10000DevMapAddr) {
+		rc = -ENODEV;
+		goto end;
+	}
+	rc = bluegene_mapXEMAC(e10000DevMapAddr+(XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS));
+	if (rc) {
+		e10000_printr(bg_subcomp_linux, 0xff, "Failure registering XEMAC mapping with CNS.");
+		rc = -ENODEV;
+		goto unmap_dev;
+	}
+
+         /*  Allocate and intialize TOMAL device. */
+        tomal = tomal_init(e10000DevMapAddr, e10000NetDev, CONFIG_BGP_E10000_RXB, CONFIG_BGP_E10000_TXB, NULL,
+			   0, 0, TOMAL_IRQ0, TOMAL_IRQ1, e10000Dir);
+        if (IS_ERR(tomal)) {
+                rc = (int) tomal;
+                goto unmap_dev;
+        }
+
+	 /*  Initialize XEMAC. */
+	e10000NetDev->irq = XEMAC_IRQ;
+	emac = (EMAC*) netdev_priv(e10000NetDev);
+        rc = emac_init((char*) e10000DevMapAddr + (XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS), emac, EMAC_TYPE_XEMAC,
+			tomal, 0, e10000NetDev, e10000Dir);
+        if (rc)
+                goto free_tomal;
+
+	 /*  Initialize network device operations. */
+	e10000NetDev->netdev_ops = &e10000NetDevOps;
+
+	 /*  Register the net_device. */
+	rc = register_netdev(e10000NetDev);
+	if (rc) {
+		e10000_printr(bg_subcomp_linux, e10000_ras_netdev_reg_failure,
+				"Failure registering net_device [%p].", e10000NetDev);
+		goto exit_emac;
+	}
+
+         /*  Configure EMAC. */
+        rc = emac_configure(emac);
+        if (rc) {
+		e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error,
+				"EMAC configuration error.   rc=%d", rc);
+                goto exit_emac;
+        }
+
+	 /*  Initialize the timer. */
+	e10000LinkTimer.function = e10000_link_test;
+	e10000LinkTimer.data = (unsigned int) e10000NetDev;
+	init_timer(&e10000LinkTimer);
+
+	goto end;
+
+exit_emac:
+	emac_exit(emac);
+free_tomal:
+	tomal_exit(tomal);
+unmap_dev:
+	iounmap(e10000DevMapAddr);
+	free_netdev(e10000NetDev);
+end:
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit rc=0x%x\n", rc);
+
+        return rc;
+}
+
+
+
+static int e10000_set_mac_address(struct net_device* netDev, void* macAddr)
+{
+	int rc = -EINVAL;
+	struct sockaddr* sockAddr = (struct sockaddr*) macAddr;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, macAddr=%p\n",
+		netDev, macAddr);
+
+	if (is_valid_ether_addr(((struct sockaddr*) macAddr)->sa_data)) {
+		EMAC* emac = (EMAC*) netdev_priv(netDev);
+		unsigned long flags;
+
+		memcpy(netDev->dev_addr, sockAddr->sa_data, netDev->addr_len);
+
+		spin_lock_irqsave(&emac->lock, flags);
+		rc = emac_set_mac_address(emac);
+		spin_unlock_irqrestore(&emac->lock, flags);
+	} else
+		rc = -EADDRNOTAVAIL;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+
+static int e10000_change_mtu(struct net_device* netDev,
+			     int newMTU)
+{
+	int rc = 0;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, newMTU=%d\n",
+	       netDev, newMTU);
+
+	if (newMTU < BGP_E10000_MIN_MTU || newMTU > BGP_E10000_MAX_MTU) {
+		e10000_printr(bg_subcomp_e10000, e10000_ras_mtu_invalid,
+				"Invalid MTU of [%d] specified. Valid MTU "
+				"values are [%d,%d].\n", newMTU, BGP_E10000_MIN_MTU,
+				BGP_E10000_MAX_MTU);
+		rc = -EINVAL;
+	} else if (netDev->mtu != newMTU && netif_running(netDev)) {
+/* #ifdef CONFIG_BGP_E10000_NAPI */
+/* 		netDev->weight = tomal->maxRxBuffers[channel]; */
+/* #endif */
+		netDev->mtu = newMTU;
+	}
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+static int e10000_do_ioctl(struct net_device* netDev,
+			   struct ifreq* req,
+			   int cmd)
+{
+	int rc = 0;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, req=%p, cmd=0x%x\n",
+	       netDev, req, cmd);
+
+//	printk(KERN_CRIT "IOCTL not supported yet\n");
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+static struct net_device_stats* e10000_get_stats(struct net_device* netDev)
+{
+	struct net_device_stats* stats = &((EMAC*) netdev_priv(netDev))->stats;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\nexit - stats=%p\n",
+		netDev, stats);
+
+	return stats;
+}
+#ifdef CONFIG_BGP_E10000_DBG
+int e10000_diag_count ;
+/*  If the 'skb' has fragments ( is a scatter-gather one), display them all and the base element too */
+static void diag_display_sk(struct sk_buff* skb)
+{
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+        if( skb->data_len >= 4096 ||
+        		e10000_diag_count > 0)
+        	{
+        		int f ;
+        		if( e10000_diag_count > 0 ) e10000_diag_count -= 1 ;
+        		printk(KERN_INFO "diag_display_sk skb=%p nr_frags=%d skb->data=%p skb->len=0x%08x skb->data_len=0x%08x e10000_diag_count=%d\n",
+        				skb,nr_frags,skb->data,skb->len,skb->data_len,e10000_diag_count) ;
+        		for(f=0;f<nr_frags;f += 1)
+        			{
+        				struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+        				printk(KERN_INFO " frags[%d]->(page=%p, page_offset=0x%08x, size=0x%08x)\n",
+        						f,frag->page,frag->page_offset,frag->size) ;
+        			}
+        	}
+}
+#endif
+static int e10000_hard_start_xmit(struct sk_buff* skb,
+				  struct net_device* netDev)
+{
+	int rc;
+	unsigned long flags;
+	EMAC* emac = netdev_priv(netDev);
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - skb=%p, netDev=%p\n",
+		skb, netDev);
+
+#ifdef CONFIG_BGP_E10000_DBG
+	if(DBG_SCATTERGATHER & CONFIG_BGP_E10000_DBG_LEVEL ) diag_display_sk(skb) ;
+#endif
+
+	spin_lock_irqsave(&emac->tomal->txLock[emac->channel], flags);
+	rc = tomal_xmit_tx_buffer(emac->tomal, emac->channel, skb);
+	if (likely(!rc)) {
+		emac->stats.tx_packets++;
+		emac->stats.tx_bytes += skb->len;
+		rc = NETDEV_TX_OK;
+		netDev->trans_start = jiffies;
+	} else {
+		netif_stop_queue(netDev);
+		rc = NETDEV_TX_BUSY;
+	}
+	spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], flags);
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+
+static int e10000_open(struct net_device* netDev)
+{
+	int rc = 0;
+	EMAC* emac = (EMAC*) netdev_priv(netDev);
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+	if (!emac->opened) {
+		U32 linkTimer;
+		U8 rxLink, txLink;
+		struct sockaddr sockAddr;
+
+                 /*  Set the MAC address for this interface. */
+		memcpy(sockAddr.sa_data, bgpers.Ethernet_Config.EmacID, netDev->addr_len);
+		e10000_set_mac_address(netDev, &sockAddr);
+
+                 /*  Acquire locks for EMAC and TOMAL. */
+                spin_lock(&emac->tomal->rxLock[emac->channel]);
+                spin_lock(&emac->tomal->txLock[emac->channel]);
+                spin_lock(&emac->lock);
+
+		emac->opened = 1;
+
+#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK
+		 /*  Reset TOMAL */
+		tomal_soft_reset(emac->tomal);
+
+	         /*  PHY reset. */
+		rc = bluegene_macResetPHY();
+		if (rc) {
+			e10000_printr(bg_subcomp_e10000, e10000_ras_phy_reset_error,
+					"%s: PHY reset error.", netDev->name);
+			spin_unlock(&emac->lock);
+			spin_unlock(&emac->tomal->txLock[emac->channel]);
+			spin_unlock(&emac->tomal->rxLock[emac->channel]);
+			goto exit;
+		}
+
+		 /*  Wait for link to be ready.  We wait less time for a single ION so that */
+		 /*  we timeout before the control system does. */
+		linkTimer = 240;
+		for (txLink = 0, rxLink = 0; linkTimer && (!txLink || !rxLink); linkTimer--) {
+			txLink = bluegene_macTestTxLink();
+			rxLink = bluegene_macTestRxLink();
+			udelay(100000);
+		}
+		printk(KERN_NOTICE "%s: Link status [RX%c,TX%c]\n", netDev->name,
+		       rxLink ? '+' : '-', txLink  ? '+' : '-');
+		if (!linkTimer) {
+                        e10000_printr(bg_subcomp_e10000, e10000_ras_link_error,
+                                        "%s: No link detected.", netDev->name);
+			spin_unlock(&emac->lock);
+			spin_unlock(&emac->tomal->txLock[emac->channel]);
+			spin_unlock(&emac->tomal->rxLock[emac->channel]);
+                        goto exit;
+		}
+#endif
+
+		 /*  Configure EMAC. */
+		rc = emac_configure(emac);
+		if (rc) {
+			e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error,
+				      "EMAC configuration error.   rc=%d", rc);
+			spin_unlock(&emac->lock);
+			spin_unlock(&emac->tomal->txLock[emac->channel]);
+			spin_unlock(&emac->tomal->rxLock[emac->channel]);
+			goto exit;
+		}
+
+		 /*  Enable TX and RX for TOMAL and EMAC. */
+		tomal_rx_tx_enable(emac->tomal);
+		emac_rx_enable(emac);
+		emac_tx_enable(emac);
+
+		 /*  Enable IRQs. */
+		tomal_irq_enable(emac->tomal, emac->channel);
+		emac_irq_enable(emac);
+
+		 /*  Release the locks. */
+		spin_unlock(&emac->lock);
+		spin_unlock(&emac->tomal->txLock[emac->channel]);
+		spin_unlock(&emac->tomal->rxLock[emac->channel]);
+
+		 /*  Start the queues. */
+		netif_start_queue(netDev);
+
+		 /*  Start link timer. */
+		mod_timer(&e10000LinkTimer, jiffies + HZ);
+	}
+exit:
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+
+static void e10000_link_test(unsigned long data)
+{
+	struct net_device* netDev = (struct net_device*) data;
+	static unsigned int linkLossCount = 0;
+	u8 txLink = bluegene_macTestTxLink();
+	u8 rxLink = bluegene_macTestRxLink();
+
+	if (!txLink || !rxLink) {
+		 /*  Link gone.  Have we reached the threshold where we are going to send a fatal event? */
+		if (linkLossCount == 30)
+			e10000_printr(bg_subcomp_e10000, e10000_ras_link_error,
+					"%s: Link error detected. Link status [RX%c,TX%c]\n", netDev->name,
+                       			rxLink ? '+' : '-', txLink  ? '+' : '-');
+		else if (linkLossCount == 0)
+			 /*  Send non-fatal RAS when the link first disappears. */
+			e10000_printr(bg_subcomp_e10000, e10000_ras_link_loss,
+					"%s: Loss of link detected. Link status [RX%c,TX%c]\n", netDev->name,
+                                        rxLink ? '+' : '-', txLink  ? '+' : '-');
+
+		linkLossCount++;
+	} else
+		 /*  Link present.  Reset counter. */
+		linkLossCount = 0;
+
+	mod_timer(&e10000LinkTimer, jiffies + HZ);
+
+	return;
+}
+
+
+//static void e10000_set_multicast_list(struct net_device* netDev)
+//{
+//	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+//
+//	emac_set_multicast_list((EMAC*) netdev_priv(netDev));
+//
+//	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+//
+//	return;
+//}
+
+
+static int e10000_stop(struct net_device* netDev)
+{
+	int rc = 0;
+	EMAC* emac = (EMAC*) netdev_priv(netDev);
+	unsigned long tomalRxFlags;
+	unsigned long tomalTxFlags;
+	unsigned long emacFlags;
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+         /*  Acquire locks for EMAC and TOMAL. */
+        spin_lock_irqsave(&emac->tomal->rxLock[emac->channel], tomalRxFlags);
+	spin_lock_irqsave(&emac->tomal->txLock[emac->channel], tomalTxFlags);
+	spin_lock_irqsave(&emac->lock, emacFlags);
+
+	local_bh_disable();
+	del_timer_sync(&e10000LinkTimer);
+	netif_stop_queue(netDev);
+
+	emac->opened = 0;
+	emac_rx_disable(emac);
+	emac_tx_disable(emac);
+	emac_irq_disable(emac);
+	tomal_rx_tx_disable(emac->tomal);
+	tomal_irq_disable(emac->tomal, emac->channel);
+
+         /*  Release locks for EMAC and TOMAL. */
+	spin_unlock_irqrestore(&emac->lock, emacFlags);
+        spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], tomalTxFlags);
+        spin_unlock_irqrestore(&emac->tomal->rxLock[emac->channel], tomalRxFlags);
+
+        local_bh_enable();
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+static void e10000_tx_timeout(struct net_device* netDev)
+{
+	EMAC* emac = (EMAC*) netdev_priv(netDev);
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+	e10000_printr(bg_subcomp_e10000, e10000_ras_tx_timeout,
+			"Transmission timeout at %u, elapsed time %u\n",
+			(U32) jiffies, (U32)(jiffies - netDev->trans_start));
+	emac->stats.tx_errors++;
+
+	 /*  Attempt to reset the interface. */
+	e10000_stop(netDev);
+	e10000_open(netDev);
+
+	PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+
+	return;
+}
+
+
+static void e10000_exit(void)
+{
+	EMAC* emac = netdev_priv(e10000NetDev);
+
+        PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n");
+
+	 /*  Allow the HW to clean up. */
+	if (emac) {
+		if (emac->tomal)
+			tomal_exit(emac->tomal);
+		emac_exit(emac);
+	}
+
+	 /*  Unmap HW. */
+	if (e10000DevMapAddr)
+		iounmap(e10000DevMapAddr);
+
+	 /*  Unregister and free the net_device. */
+	if (e10000NetDev) {
+		unregister_netdev(e10000NetDev);
+		free_netdev(e10000NetDev);
+	}
+
+        PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+
+        return;
+}
+
+
+extern int bgWriteRasStr(unsigned int component,
+                          unsigned int subcomponent,
+                          unsigned int errCode,
+                          char*        str,
+                          unsigned int strLen);
+
+void e10000_printr(U16 subComponent,
+            	   U16 id,
+            	   char* format,
+            	   ...)
+{
+        va_list args;
+        int n;
+        char text[BG_RAS_DATA_MAX];
+
+        va_start(args, format);
+        n = vsnprintf(text, sizeof(text)-1, format, args);
+        va_end(args);
+	if (n < 0)
+		n = 0;
+
+	text[n] = '\0';
+	printk(KERN_WARNING "%s\n", text);
+	bgWriteRasStr(bg_comp_kernel, subComponent, id, text, 0);
+
+	return;
+}
+
+
+module_init(e10000_init);
+module_exit(e10000_exit);
+
+
+
+MODULE_DESCRIPTION("10Gb Ethernet Driver for BlueGene");
+MODULE_VERSION("2.0");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andrew Tauferner");
+
diff --git a/drivers/net/bgp_e10000/bgp_emac.c b/drivers/net/bgp_e10000/bgp_emac.c
new file mode 100644
index 0000000..1afa02e
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_emac.c
@@ -0,0 +1,282 @@
+/*
+ * bgp_emac.c: XEMAC device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include "bgp_emac.h"
+#include "bgp_e10000.h"
+
+
+/*  XEMAC accessible through /proc/driver/e10000/xemac/hw. */
+static E10000_PROC_ENTRY emac_hw_proc_entry[] = {
+	{ "mode0",			(void*) 0x00,	NULL },
+	{ "mode1",			(void*) 0x04,	NULL },
+	{ "txMode0",			(void*) 0x08,	NULL },
+	{ "txMode1",			(void*) 0x0c, 	NULL },
+	{ "rxMode",			(void*) 0x10,	NULL },
+	{ "interruptStatus",		(void*) 0x14,	NULL },
+	{ "interruptStatusEnable",	(void*) 0x18, 	NULL },
+	{ "individualAddrH",		(void*) 0x1c, 	NULL },
+	{ "individualAddrL",		(void*) 0x20,	NULL },
+	{ "vlanTPID",			(void*) 0x24,	NULL },
+	{ "vlanTCI",			(void*) 0x28,	NULL },
+	{ "pauseTimerValue",		(void*) 0x2c,	NULL },
+	{ "individualAddrHashTable0",	(void*) 0x30,	NULL },
+	{ "individualAddrHashTable1",   (void*) 0x34,   NULL },
+	{ "individualAddrHashTable2",   (void*) 0x38,   NULL },
+	{ "individualAddrHashTable3",   (void*) 0x3c,   NULL },
+	{ "groupAddrHashTable0",	(void*) 0x40,	NULL },
+	{ "groupAddrHashTable1",        (void*) 0x44,   NULL },
+	{ "groupAddrHashTable2",        (void*) 0x48,   NULL },
+	{ "groupAddrHashTable3",        (void*) 0x4c,   NULL },
+	{ "lastSourceAddrH",		(void*) 0x50,	NULL },
+	{ "lastSourceAddrL",		(void*) 0x54,	NULL },
+	{ "interPacketGapValue",	(void*) 0x58,	NULL },
+	{ "staCtrl",			(void*) 0x5c,	NULL },
+	{ "txRequestThreshold",		(void*) 0x60,	NULL },
+	{ "rxLowHighWaterMark",		(void*) 0x64,	NULL },
+	{ "sopCommandMode",		(void*) 0x68, 	NULL },
+	{ "secondaryIndividualAddrH",	(void*) 0x6c,	NULL },
+	{ "secondaryIndividualAddrL",	(void*) 0x70,	NULL },
+	{ "txOctetsCounter1",		(void*) 0x74,	NULL },
+	{ "txOctetsCounter2",		(void*) 0x78,	NULL },
+	{ "rxOctetsCounter1",		(void*) 0x7c,	NULL },
+	{ "rxOctetsCounter2",		(void*) 0x80,	NULL },
+	{ "revisionID",			(void*) 0x84,	NULL },
+	{ "hwDebug",			(void*) 0x88,	NULL },
+	{ NULL,				0,		NULL }
+};
+
+
+
+static irqreturn_t emac_irq(int irq,
+			    void* data)
+{
+	struct net_device* netDev = (struct net_device*) data;
+	EMAC* emac = (EMAC*) netdev_priv(netDev);
+	U32 isr = in_be32(&emac->regs->interruptStatus);
+	irqreturn_t rc = IRQ_NONE;
+
+	if (irq == netDev->irq) {
+		if ((isr & XEMAC_IS_TXPE) ||  (isr & XEMAC_IS_DB) || (isr & XEMAC_IS_TE)) {
+			rc = IRQ_HANDLED;
+			emac->stats.tx_errors++;
+		}
+		if (isr & XEMAC_IS_RXPE) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+		}
+		if (isr & XEMAC_IS_TFEI) {
+			rc = IRQ_HANDLED;
+			emac->stats.tx_errors++;
+			emac->stats.tx_fifo_errors++;
+		}
+		if (isr & XEMAC_IS_RFFI) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+			emac->stats.rx_over_errors++;
+		}
+		if (isr & XEMAC_IS_OVR) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+			emac->stats.rx_over_errors++;
+		}
+		if ((isr & XEMAC_IS_PSF) || (isr & XEMAC_IS_RTF) || (isr & XEMAC_IS_IRE)) {   /*  pause or runt frame or in range error? */
+			rc = IRQ_HANDLED;
+		}
+		if (isr & XEMAC_IS_BDF) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+			emac->stats.rx_frame_errors++;
+		}
+		if (isr & XEMAC_IS_LF) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+		}
+		if (isr & XEMAC_IS_BFCS) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+			emac->stats.rx_crc_errors++;
+		}
+		if ((isr & XEMAC_IS_FTL) || (isr & XEMAC_IS_ORE)) {
+			rc = IRQ_HANDLED;
+			emac->stats.rx_errors++;
+			emac->stats.rx_length_errors++;
+		}
+
+		out_be32(&emac->regs->interruptStatus, isr);
+	}
+
+	if (rc != IRQ_HANDLED)
+		e10000_printr(bg_subcomp_xemac, emac_ras_irq_unknown,
+				"Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr);
+
+	return rc;
+}
+
+int __init emac_init(void* devMapAddr,
+		     EMAC* emac,
+		     U32 type,
+		     TOMAL* tomal,
+		     U8 channel,
+		     struct net_device* netDev,
+		     struct proc_dir_entry* procDir)
+{
+	int rc = -EINVAL;
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p, type=%d, tomal=%p, netDev=%p\n", emac, type,
+		tomal, netDev);
+
+	emac->type = type;
+	switch (type) {
+		case EMAC_TYPE_XEMAC: {
+			emac->regs = (XEMACRegs*) devMapAddr;
+			if (!emac->regs) {
+				e10000_printr(bg_subcomp_xemac, emac_ras_ioremap_error,
+						"Failure mapping XEMAC registers.");
+				rc = -ENXIO;
+				goto out;
+			}
+
+			 /*  Create /proc/driver/e10000/xemac/hw */
+			if (procDir) {
+				emac->parentDir = procDir;
+				emac->emacDir = proc_mkdir("xemac", procDir);
+				if (emac->emacDir) {
+					emac->hwDir = proc_mkdir("hw", emac->emacDir);
+					if (emac->hwDir) {
+						E10000_PROC_ENTRY* entry = emac_hw_proc_entry;
+
+						while (entry->name) {
+							entry->entry = e10000_create_proc_entry(emac->hwDir, entry->name,
+												(void*) ((U32) emac->regs + (U32) entry->addr));
+							if (!entry->entry)
+								printk(KERN_EMERG "Failure creating /proc entry %s\n", entry->name);
+
+							entry++;
+						}
+					}
+				}
+			}
+			break;
+		}
+
+		default:
+			e10000_printr(bg_subcomp_xemac, e10000_ras_internal_error,
+					"Invalid EMAC type [%d].", type);
+			goto out;
+	}
+
+#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK
+	 /*  Initialize the PHY. */
+	emac->phy.phy_id = 0;
+	emac->phy.full_duplex = 1;
+	emac->phy.dev = netDev;
+#endif
+
+	 /*  Request IRQ. */
+	rc = request_irq(netDev->irq, emac_irq, IRQF_DISABLED, "BGP EMAC IRQ", (void*) netDev);
+	if (rc) {
+		e10000_printr(bg_subcomp_xemac, emac_ras_irq_not_available,
+				"Failure requesting IRQ [%d] - rc = %d", netDev->irq, rc);
+		goto out;
+	}
+
+	emac->tomal = tomal;
+	emac->channel = channel;
+	emac->netDev = netDev;
+	memset(&emac->stats, 0, sizeof(emac->stats));
+	spin_lock_init(&emac->lock);
+	emac->opened = 0;
+
+	goto out;
+
+out:
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "exit rc=%d\n", rc);
+
+	return rc;
+}
+
+
+int emac_configure(EMAC* emac)
+{
+	int rc = 0;
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+	switch (emac->type) {
+		case EMAC_TYPE_XEMAC: {
+			XEMACRegs* reg = (XEMACRegs*) emac->regs;
+			U32 mode1 = XEMAC_MODE1_TRQ | XEMAC_MODE1_RFS8K |
+                                    XEMAC_MODE1_TFS8K | XEMAC_MODE1_JBEN |
+				    XEMAC_MODE1_PSEN | XEMAC_MODE1_IFEN |
+                                    XEMAC_MODE1_OPB133MHZ | 0x00001000;
+			U32 rxMode = XEMAC_RX_MODE_SPAD | XEMAC_RX_MODE_SFCS | XEMAC_RX_MODE_PMME |
+				XEMAC_RX_MODE_MAE | XEMAC_RX_MODE_IAE | XEMAC_RX_MODE_BAE | XEMAC_RX_MODE_LFD |
+				XEMAC_RX_MODE_RFAF_16_32;
+
+			 /*  We must accept multicast frames so that pause frames aren't discarded. */
+			 /*  This means that EMAC must have multicast mode enabled and promiscuous multicast  */
+			 /*  mode enabled.  */
+			if (emac->netDev->flags & IFF_PROMISC)
+				rxMode |= XEMAC_RX_MODE_PME;
+			out_be32(&reg->rxMode, rxMode);
+			out_be32(&reg->rxLowHighWaterMark, 0x00800100);
+			out_be32(&reg->pauseTimerValue, 0x1000);
+
+#ifdef CONFIG_BGP_E10000_EMAC_LOOPBACK
+			mode1 |= XEMAC_MODE1_LPEN;
+#else
+                        mode1 |= XEMAC_MODE1_SDR;
+#endif
+			out_be32(&reg->mode1, mode1);
+			out_be32(&reg->txMode1, 0x02200240);
+			out_be32(&reg->txRequestThreshold, 0x17000000);
+			break;
+		}
+	}
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+void emac_exit(EMAC* emac)
+{
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "entry\n");
+
+	 /*  Remove /proc entries. */
+	if (emac->emacDir) {
+		if (emac->hwDir) {
+			E10000_PROC_ENTRY* entry = emac_hw_proc_entry;
+
+			while (entry->name) {
+				if (entry->entry) {
+					remove_proc_entry(entry->entry->name, emac->emacDir);
+					entry->entry = NULL;
+				}
+				entry++;
+			}
+
+			remove_proc_entry(emac->hwDir->name, emac->emacDir);
+			emac->hwDir = NULL;
+		}
+		remove_proc_entry(emac->emacDir->name, emac->parentDir);
+		emac->emacDir = NULL;
+	}
+
+	 /*  Free the IRQ. */
+	free_irq(emac->netDev->irq, (void*) emac->netDev);
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "exit\n");
+
+	return;
+}
diff --git a/drivers/net/bgp_e10000/bgp_emac.h b/drivers/net/bgp_e10000/bgp_emac.h
new file mode 100644
index 0000000..769aa85
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_emac.h
@@ -0,0 +1,356 @@
+/*
+ * bgp_emac.h: XEMAC definition for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef _BGP_EMAC_H
+#define _BGP_EMAC_H
+
+#include <linux/netdevice.h>
+#include <linux/mii.h>
+#include <linux/delay.h>
+#include <linux/crc32.h>
+#include <linux/proc_fs.h>
+#include <asm/bluegene.h>
+#include <asm/bluegene_ras.h>
+
+#include "bgp_tomal.h"
+#include "bgp_e10000.h"
+
+
+#define XEMAC_IRQ_GROUP 9
+#define XEMAC_IRQ_GINT   0
+#define XEMAC_IRQ bic_hw_to_irq(XEMAC_IRQ_GROUP, XEMAC_IRQ_GINT)
+
+#define XEMAC_BASE_ADDRESS 0x720004000ULL
+
+
+
+typedef volatile struct _XEMACRegs {  /*  Offset Description */
+	U32 mode0;			 /*  00 mode register 0 */
+#define XEMAC_MODE0_RXIDL		0x80000000
+#define XEMAC_MODE0_TXIDL		0x40000000
+#define XEMAC_MODE0_SRST		0x20000000
+#define XEMAC_MODE0_TXEN		0x10000000
+#define XEMAC_MODE0_RXEN		0x08000000
+#define XEMAC_MODE0_WUEN		0x04000000
+        U32 mode1;                       /*  04 mode register 1 */
+#define XEMAC_MODE1_SDR                 0x80000000
+#define XEMAC_MODE1_LPEN		0x40000000
+#define XEMAC_MODE1_VLEN		0x20000000
+#define XEMAC_MODE1_IFEN		0x10000000
+#define XEMAC_MODE1_PSEN		0x08000000
+#define XEMAC_MODE1_RFS2K		0x00100000
+#define XEMAC_MODE1_RFS4K		0x00180000
+#define XEMAC_MODE1_RFS8K		0x00200000
+#define XEMAC_MODE1_RFS16K		0x00280000
+#define XEMAC_MODE1_RFS32K		0x00300000
+#define XEMAC_MODE1_RFS64K		0x00380000
+#define XEMAC_MODE1_TFS2K		0x00020000
+#define XEMAC_MODE1_TFS4K		0x00030000
+#define XEMAC_MODE1_TFS8K		0x00040000
+#define XEMAC_MODE1_TFS16K		0x00050000
+#define XEMAC_MODE1_TFS32K		0x00060000
+#define XEMAC_MODE1_TRQ			0x00008000
+#define XEMAC_MODE1_JBEN		0x00000800
+#define XEMAC_MODE1_OPB66MHZ		0x00000008
+#define XEMAC_MODE1_OPB83MHZ		0x00000010
+#define XEMAC_MODE1_OPB100MHZ		0x00000018
+#define XEMAC_MODE1_OPB133MHZ		0x00000020
+        U32 txMode0;                     /*  08 TX mode register 0 */
+#define XEMAC_TX_MODE0_GNP		0x80000000
+#define XEMAC_TX_MODE0_TFAE_2_4		0x00000001
+#define XEMAC_TX_MODE0_TFAE_4_8		0x00000002
+#define XEMAC_TX_MODE0_TFAE_8_16	0x00000003
+#define XEMAC_TX_MODE0_TFAE_16_32	0x00000004
+#define XEMAC_TX_MODE0_TFAE_32_64	0x00000005
+#define XEMAC_TX_MODE0_TFAE_64_128	0x00000006
+#define XEMAC_TX_MODE0_TFAE_128_256	0x00000007
+        U32 txMode1;                     /*  0C TX mode register 1 */
+        U32 rxMode;                      /*  10 RX mode register */
+#define XEMAC_RX_MODE_SPAD		0x80000000
+#define XEMAC_RX_MODE_SFCS		0x40000000
+#define XEMAC_RX_MODE_ARRF		0x20000000
+#define XEMAC_RX_MODE_ARFE		0x10000000
+#define XEMAC_RX_MODE_LFD		0x08000000
+#define XEMAC_RX_MODE_ARIE		0x04000000
+#define XEMAC_RX_MODE_PPF		0x02000000
+#define XEMAC_RX_MODE_PME		0x01000000
+#define XEMAC_RX_MODE_PMME		0x00800000
+#define XEMAC_RX_MODE_IAE		0x00400000
+#define XEMAC_RX_MODE_MIAE		0x00200000
+#define XEMAC_RX_MODE_BAE		0x00100000
+#define XEMAC_RX_MODE_MAE		0x00080000
+#define XEMAC_RX_MODE_PUME		0x00040000
+#define XEMAC_RX_MODE_SIAE		0x00020000
+#define XEMAC_RX_MODE_RFAF_2_4		0x00000001
+#define XEMAC_RX_MODE_RFAF_4_8		0x00000002
+#define XEMAC_RX_MODE_RFAF_8_16		0x00000003
+#define XEMAC_RX_MODE_RFAF_16_32		0x00000004
+#define XEMAC_RX_MODE_RFAF_32_64		0x00000005
+#define XEMAC_RX_MODE_RFAF_64_128	0x00000006
+        U32 interruptStatus;             /*  14 interrupt status register */
+#define XEMAC_IS_TXPE			0x20000000
+#define XEMAC_IS_RXPE			0x10000000
+#define XEMAC_IS_TFEI			0x08000000
+#define XEMAC_IS_RFFI			0x04000000
+#define XEMAC_IS_OVR			0x02000000
+#define XEMAC_IS_PSF			0x01000000
+#define XEMAC_IS_BDF			0x00800000
+#define XEMAC_IS_RTF			0x00400000
+#define XEMAC_IS_LF			0x00200000
+#define XEMAC_IS_BFCS			0x00080000
+#define XEMAC_IS_FTL			0x00040000
+#define XEMAC_IS_ORE			0x00020000
+#define XEMAC_IS_IRE			0x00010000
+#define XEMAC_IS_DB			0x00000100
+#define XEMAC_IS_TE			0x00000040
+#define XEMAC_IS_MMS			0x00000002
+#define XEMAC_IS_MMF			0x00000001
+        U32 interruptStatusEnable;       /*  18 interrupt status enable register */
+        U32 individualAddrH;             /*  1C bits 0-15 of main station unique address */
+        U32 individualAddrL;             /*  20 bits 16-47 of main station unique address */
+        U32 vlanTPID;                    /*  24 VLAN tag ID */
+        U32 vlanTCI;                     /*  28 VLAN TCI register */
+        U32 pauseTimerValue;             /*  2C pause timer register */
+        U32 individualAddrHashTable[4];  /*  30 individual addr. hash registers */
+        U32 groupAddrHashTable[4];       /*  40 group addr. hash register 1 */
+        U32 lastSourceAddrH;             /*  50 bits 0-15 of last source address */
+        U32 lastSourceAddrL;             /*  54 bits 16-47 of last source address */
+        U32 interPacketGapValue;         /*  58 inter packet gap register */
+        U32 staCtrl;                     /*  5C STA control register */
+#define XEMAC_STAC_MGO			0x00008000
+#define XEMAC_STAC_PHE			0x00004000
+#define XEMAC_STAC_IM			0x00002000
+#define XEMAC_STAC_MII_READ		0x00001000
+#define XEMAC_STAC_MII_WRITE		0x00000800
+#define XEMAC_STAC_MDIO_ADDRESS		0x00002000
+#define XEMAC_STAC_MDIO_WRITE		0x00002800
+#define XEMAC_STAC_MDIO_READ		0x00003800
+#define XEMAC_STAC_MDIO_READ_INC	0x00003000
+        U32 txRequestThreshold;          /*  60 TX request threshold register */
+#define XEMAC_TRT_64			0x00000000
+#define XEMAC_TRT_128			0x01000000
+#define XEMAC_TRT_192			0x02000000
+#define XEMAC_TRT_256			0x03000000
+        U32 rxLowHighWaterMark;          /*  64 RX high/low water mark register */
+        U32 sopCommandMode;              /*  68 SOP command mode register */
+        U32 secondaryIndividualAddrH;    /*  6C bits 0-15 of sec. individual addr. reg */
+        U32 secondaryIndividualAddrL;    /*  70 bits 16-47 of sec. individual addr. reg */
+        U32 txOctetsCounter1;            /*  74 bits 0-31 of total TX octets (read first) */
+        U32 txOctetsCounter2;            /*  78 bits 32-63 of total TX octets (read last) */
+        U32 rxOctetsCounter1;            /*  7C bits 0-31 of total RX octets (read first) */
+        U32 rxOctetsCounter2;            /*  80 bits 32-63 of total RX octets (read last) */
+        U32 revisionID;                  /*  84 revision ID */
+        U32 hwDbg;                       /*  88 hardware debug register */
+} XEMACRegs;
+
+
+
+
+typedef struct _EMAC {
+	U32			type;
+#define EMAC_TYPE_EMAC4		4
+#define EMAC_TYPE_XEMAC		10
+	XEMACRegs*		regs;
+	TOMAL*			tomal;
+	U8			channel;
+	struct mii_if_info	phy;
+	struct net_device*	netDev;
+	struct net_device_stats stats;
+	spinlock_t		lock;
+	U8			opened;
+	struct proc_dir_entry* 	parentDir;
+	struct proc_dir_entry* 	emacDir;
+	struct proc_dir_entry* 	hwDir;
+
+} EMAC;
+
+
+typedef enum {
+	emac_ras_none			= 0x00,
+	emac_ras_timeout		= 0x01,
+	emac_ras_ioremap_error		= 0x02,
+	emac_ras_irq_not_available	= 0x03,
+	emac_ras_sta_addr_error		= 0x04,
+	emac_ras_sta_read_error		= 0x05,
+	emac_ras_sta_write_error	= 0x06,
+	emac_ras_irq_unknown		= 0x07,
+
+	emac_ras_internal_error		= 0xfe,
+	emac_ras_max			= 0xff
+} emac_ras_id;
+
+typedef enum {
+	phy_ras_none			= 0x00,
+	phy_ras_timeout			= 0x01,
+	phy_ras_not_found		= 0x02,
+
+	phy_ras_max			= 0xff
+} phy_ras_id;
+
+
+int __init emac_init(void* devMapAddr,
+		     EMAC* emac,
+		     U32 type,
+		     TOMAL* tomal,
+		     U8 channel,
+		     struct net_device* netDev,
+		     struct proc_dir_entry* procDir);
+
+int emac_configure(EMAC* emac);
+
+
+
+
+static inline int emac_soft_reset(EMAC* emac)
+{
+	int rc = 0;
+	U32 i;
+
+        PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+	 /*  Set the reset bit and wait for it to clear. */
+	out_be32(&emac->regs->mode0, XEMAC_MODE0_SRST);
+	for (i = 200; (in_be32(&emac->regs->mode0) & XEMAC_MODE0_SRST) && i; i--)
+		udelay(10000);
+	if (!i) {
+		e10000_printr(bg_subcomp_xemac, emac_ras_timeout,
+				"XEMAC failed reset");
+		rc = -ETIME;
+	}
+
+	return rc;
+}
+
+
+
+static inline int emac_rx_enable(EMAC* emac)
+{
+	U32 reg = in_be32(&emac->regs->mode0);
+
+	out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_RXEN);
+
+	return 0;
+}
+
+
+static inline int emac_rx_disable(EMAC* emac)
+{
+	U32 reg = in_be32(&emac->regs->mode0);
+
+        out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_RXEN);
+
+        return 0;
+}
+
+
+static inline int emac_tx_enable(EMAC* emac)
+{
+	U32 reg = in_be32(&emac->regs->mode0);
+
+        out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_TXEN);
+	reg = in_be32(&emac->regs->txMode0);
+	out_be32(&emac->regs->txMode0, reg | XEMAC_TX_MODE0_GNP);
+
+        return 0;
+}
+
+
+static inline int emac_tx_disable(EMAC* emac)
+{
+        U32 reg = in_be32(&emac->regs->mode0);
+
+        out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_TXEN);
+
+        return 0;
+}
+
+static inline int emac_irq_enable(EMAC* emac)
+{
+	out_be32(&emac->regs->interruptStatusEnable, XEMAC_IS_TXPE | XEMAC_IS_RXPE |
+		 XEMAC_IS_TFEI | XEMAC_IS_RFFI | XEMAC_IS_OVR | XEMAC_IS_BDF |
+		 XEMAC_IS_RTF | XEMAC_IS_LF | XEMAC_IS_BFCS | XEMAC_IS_FTL |
+		 XEMAC_IS_ORE | XEMAC_IS_IRE | XEMAC_IS_DB | XEMAC_IS_TE);
+
+	return 0;
+}
+
+static inline int emac_irq_disable(EMAC* emac)
+{
+	out_be32(&emac->regs->interruptStatusEnable, 0);
+
+	return 0;
+}
+
+static inline int emac_set_mac_address(EMAC* emac)
+{
+        int rc = 0;
+
+        PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+        switch (emac->type) {
+        	case EMAC_TYPE_XEMAC: {
+                	XEMACRegs* reg = (XEMACRegs*) emac->regs;
+                        struct net_device* netDev = emac->netDev;
+
+                        out_be32(&reg->individualAddrH, netDev->dev_addr[0] << 8 |
+                                 netDev->dev_addr[1]);
+                        out_be32(&reg->individualAddrL, netDev->dev_addr[2] << 24 |
+                                 netDev->dev_addr[3] << 16 | netDev->dev_addr[4] << 8 |
+                                 netDev->dev_addr[5]);
+                        break;
+                }
+        }
+
+        PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+
+static inline int emac_set_multicast_list(EMAC* emac)
+{
+	int rc = 0;
+	XEMACRegs* regs = (XEMACRegs*) emac->regs;
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+	if (emac->netDev->flags & IFF_MULTICAST &&
+		emac->netDev->mc_count > 0) {
+		U16 groupAddrHashTable[4] = {0, 0, 0, 0};
+		struct dev_mc_list* dmi;
+
+		for (dmi = emac->netDev->mc_list; dmi; dmi = dmi->next) {
+			U32 crc = ether_crc(6, (char*) dmi->dmi_addr);
+			U32 bit = 63 - (crc >> 26);
+
+			groupAddrHashTable[bit >> 4] |=
+				0x8000 >> (bit & 0x0f);
+		}
+		regs->groupAddrHashTable[0] = groupAddrHashTable[0];
+                regs->groupAddrHashTable[1] = groupAddrHashTable[1];
+                regs->groupAddrHashTable[2] = groupAddrHashTable[2];
+                regs->groupAddrHashTable[3] = groupAddrHashTable[3];
+	}
+
+	PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+void emac_exit(EMAC* emac);
+
+
+
+
+#endif
diff --git a/drivers/net/bgp_e10000/bgp_tomal.c b/drivers/net/bgp_e10000/bgp_tomal.c
new file mode 100644
index 0000000..57cc90a
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_tomal.c
@@ -0,0 +1,1893 @@
+/*
+ * bgp_tomal.c: TOMAL device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify i
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+
+#include <asm/bluegene_ras.h>
+#include <asm/udbg.h>
+
+#include "bgp_e10000.h"
+#include "bgp_tomal.h"
+#include "bgp_emac.h"
+
+
+static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors);
+static void tomal_free_rx_segment(RxDescSegment* segment);
+static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors);
+static void tomal_free_tx_segment(TxDescSegment* segment);
+static irqreturn_t tomal_irq0(int irq, void* data);
+static irqreturn_t tomal_irq1(int irq, void* data);
+
+
+/*  TOMAL hardware accessible through /proc/driver/e10000/tomal/hw */
+static E10000_PROC_ENTRY tomal_hw_proc_entry[] = {
+        { "configurationCtrl",			(void*) 0x0000, NULL },
+	{ "revisionID",				(void*) 0x0060, NULL },
+	{ "packetDataEngineCtrl",		(void*) 0x0400, NULL },
+	{ "txNotificationCtrl",			(void*) 0x0600, NULL },
+	{ "txMinTimer",				(void*) 0x0610, NULL },
+	{ "txMaxTimer", 			(void*) 0x0620, NULL },
+	{ "txMaxFrameNum0",			(void*) 0x06c0, NULL },
+	{ "txMaxFrameNum1",			(void*) 0x07c0, NULL },
+	{ "txMinFrameNum0",			(void*) 0x06d0, NULL },
+	{ "txMinFrameNum1",                     (void*) 0x07d0, NULL },
+	{ "txFramePerServiceCtrl",		(void*) 0x0650, NULL },
+	{ "txHWCurrentDescriptorAddrH0",	(void*) 0x0660, NULL },
+	{ "txHWCurrentDescriptorAddrH1",        (void*) 0x0760, NULL },
+	{ "txHWCurrentDescriptorAddrL0",	(void*) 0x0670, NULL },
+	{ "txHWCurrentDescriptorAddrL1",	(void*) 0x0770, NULL },
+	{ "txPendingFrameCount0",		(void*) 0x0690, NULL },
+	{ "txPendingFrameCount1", 		(void*) 0x0790, NULL },
+	{ "txAddPostedFrames0",			(void*) 0x06a0, NULL },
+	{ "txAddPostedFrames1",			(void*) 0x07a0, NULL },
+	{ "txNumberOfTransmittedFrames0",	(void*) 0x06b0, NULL },
+	{ "txNumberOfTransmittedFrames1",       (void*) 0x07b0, NULL },
+	{ "txEventStatus0",			(void*) 0x06e0, NULL },
+	{ "txEventStatus1", 			(void*) 0x07e0, NULL },
+	{ "txEventMask0",			(void*) 0x06f0, NULL },
+	{ "txEventMask1", 			(void*) 0x07f0, NULL },
+	{ "rxNotificationCtrl",			(void*) 0x0f00, NULL },
+	{ "rxMinTimer",				(void*) 0x0f10, NULL },
+	{ "rxMaxTimer",				(void*) 0x0f20, NULL },
+	{ "rxMaxFrameNum0",			(void*) 0x1080, NULL },
+	{ "rxMaxFrameNum1", 			(void*) 0x1180, NULL },
+	{ "rxMinFrameNum0",			(void*) 0x1090, NULL },
+	{ "rxMinFrameNum1",                     (void*) 0x1190, NULL },
+	{ "rxHWCurrentDescriptorAddrH0",	(void*) 0x1020, NULL },
+	{ "rxHWCurrentDescriptorAddrH1",        (void*) 0x1120, NULL },
+	{ "rxHWCurrentDescriptorAddrL0",        (void*) 0x1030, NULL },
+	{ "rxHWCurrentDescriptorAddrL1",        (void*) 0x1130, NULL },
+	{ "rxAddFreeBytes0",			(void*) 0x1040, NULL },
+	{ "rxAddFreeBytes1",                    (void*) 0x1140, NULL },
+	{ "rxTotalBuffersSize0",		(void*) 0x1050, NULL },
+	{ "rxTotalBuffersSize1",                (void*) 0x1150, NULL },
+	{ "rxNumberOfReceivedFrames0",		(void*) 0x1060, NULL },
+	{ "rxNumberOfReceivedFrames1",          (void*) 0x1160, NULL },
+	{ "rxDroppedFramesCount0",		(void*) 0x1070, NULL },
+	{ "rxDroppedFramesCount1",              (void*) 0x1170, NULL },
+	{ "rxEventStatus0", 			(void*) 0x10a0, NULL },
+	{ "rxEventStatus1",                     (void*) 0x11a0, NULL },
+	{ "rxEventMask0", 			(void*) 0x10b0, NULL },
+	{ "rxEventMask1",                       (void*) 0x11b0, NULL },
+	{ "softwareNonCriticalErrorsStatus0",	(void*) 0x1800, NULL },
+	{ "softwareNonCriticalErrorsStatus1",   (void*) 0x1900, NULL },
+	{ "softwareNonCriticalErrorsEnable0",	(void*) 0x1810, NULL },
+	{ "softwareNonCriticalErrorsEnable1",   (void*) 0x1910, NULL },
+	{ "softwareNonCriticalErrorsMask0",	(void*) 0x1820, NULL },
+	{ "softwareNonCriticalErrorsMask1",     (void*) 0x1920, NULL },
+	{ "receiveDataBufferSpace",		(void*) 0x1900, NULL },
+	{ "transmitDataBuffer0FreeSpace",	(void*) 0x1910, NULL },
+	{ "transmitDataBuffer1FreeSpace",	(void*) 0x1920, NULL },
+	{ "rxMACStatus0",			(void*) 0x1b20, NULL },
+	{ "rxMACStatus1",			(void*) 0x1c20, NULL },
+	{ "rxMACStatusEnable0",			(void*) 0x1b30, NULL },
+	{ "rxMACStatusEnable1",                 (void*) 0x1c30, NULL },
+	{ "rxMACStatusMask0", 			(void*) 0x1b40, NULL },
+	{ "rxMACStatusMask1",                   (void*) 0x1c40, NULL },
+	{ "txMACStatus0",			(void*) 0x1b50, NULL },
+	{ "txMACStatus1",                       (void*) 0x1c50, NULL },
+	{ "txMACStatusEnable0",			(void*) 0x1b60, NULL },
+	{ "txMACStatusEnable1",                 (void*) 0x1c60, NULL },
+	{ "txMACStatusMask0",			(void*) 0x1b70, NULL },
+	{ "txMACStatusMask1",                   (void*) 0x1c70, NULL },
+	{ "hardwareErrorsStatus",		(void*) 0x1e00, NULL },
+	{ "hardwareErrorsEnable",		(void*) 0x1e10, NULL },
+	{ "hardwareErrorsMask",			(void*) 0x1e20, NULL },
+	{ "softwareCriticalErrorsStatus",	(void*) 0x1f00, NULL },
+	{ "softwareCriticalErrorsEnable",       (void*) 0x1f10, NULL },
+	{ "softwareCriticalErrorsMask",       	(void*) 0x1f20, NULL },
+	{ "receiveDescriptorBadCodeFEC",	(void*) 0x1f30, NULL },
+	{ "transmitDescriptorBadCodeFEC", 	(void*) 0x1f40, NULL },
+	{ "interruptStatus",			(void*) 0x1f80, NULL },
+	{ "interruptRoute",			(void*) 0x1f90, NULL },
+	{ "rxMACBadStatusCounter0",		(void*) 0x2060, NULL },
+	{ "rxMACBadStatusCounter1",             (void*) 0x2160, NULL },
+	{ "debugVectorsCtrl",			(void*) 0x3000, NULL },
+	{ "debugVectorsReadData",		(void*) 0x3010, NULL },
+	{ NULL,					(void*) 0, 	NULL }
+};
+
+
+/*  TOMAL software accessible through /proc/driver/e10000/tomal/sw */
+static E10000_PROC_ENTRY tomal_sw_proc_entry[] = {
+        { "rxMaxBuffers0",			NULL, 		NULL },
+	{ "rxMaxBuffers1",                      NULL, 		NULL },
+	{ "rxBufferSize0",			NULL,		NULL },
+	{ "rxBufferSize1",			NULL,		NULL },
+	{ "rxDescSegmentAddr0",			NULL,		NULL },
+	{ "rxDescSegmentAddr1",                 NULL,           NULL },
+	{ "rxOldDescSegmentAddr0",		NULL,		NULL },
+	{ "rxOldDescSegmentAddr1",              NULL,           NULL },
+	{ "txMaxBuffers0",			NULL,		NULL },
+	{ "txMaxBuffers1",                      NULL,           NULL },
+	{ "txPendingBuffers0",			NULL,		NULL },
+	{ "txPendingBuffers1",                  NULL,           NULL },
+	{ "txNumberOfTransmittedFrames0",	NULL,		NULL },
+	{ "txNumberOfTransmittedFrames1",       NULL,           NULL },
+	{ "txDescSegmentAddr0",			NULL,		NULL },
+	{ "txDescSegmentAddr1",                 NULL,           NULL },
+	{ "txOldDescSegmentAddr0",		NULL,		NULL },
+	{ "txOldDescSegmentAddr1",              NULL,           NULL },
+	{ "txFreeDescSegmentAddr0", 		NULL, 		NULL },
+	{ "txFreeDescSegmentAddr1",             NULL,           NULL },
+	{ "irq0",				NULL,		NULL },
+	{ "irq1",                               NULL,           NULL },
+	{ "numberOfNetrxDrops",                 NULL,           NULL },
+	{ "numberOfHwDrops0",                   NULL,           NULL },
+	{ "numberOfHwDrops1",                   NULL,           NULL },
+	{ "numberOfNotLast",                    NULL,           NULL },
+/* 	{ "txChecksumNONE",                     NULL,           NULL }, */
+/* 	{ "txChecksumPARTIAL",                  NULL,           NULL }, */
+/* 	{ "txChecksumUNNECESSARY",              NULL,           NULL }, */
+/* 	{ "txChecksumCOMPLETE",                 NULL,           NULL }, */
+	{ NULL,					NULL,		NULL }
+};
+
+
+/*  Allocate a single Rx descriptor segment with the specified number of descriptors. */
+static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors)
+{
+        RxDescSegment* segment = NULL;
+        RxDesc* desc;
+        size_t size = numDescriptors * sizeof(RxDesc) + sizeof(BranchDesc);
+        dma_addr_t dmaHandle;
+
+         /*  Allocate descriptor storage. */
+        desc = (RxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL);
+        if (desc) {
+		 /*  Clear the descriptors. */
+		memset((void*) desc, 0, size);
+
+                 /*  Allocate a segment. */
+                segment = kmalloc(sizeof(RxDescSegment), GFP_KERNEL);
+                if (segment) {
+                        segment->size = size;
+                        segment->dmaHandle = dmaHandle;
+                        segment->desc = desc;
+
+                        segment->branchDesc = (BranchDesc*) &desc[numDescriptors];
+                        segment->branchDesc->code = TOMAL_BRANCH_CODE;
+			segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0;
+			segment->branchDesc->nextDescAddrL = (U32) NULL;
+
+			 /*  Allocate storage for buffer pointers. */
+			segment->skb = (struct sk_buff**)
+				kmalloc(numDescriptors * sizeof(struct sk_buff*) +
+					sizeof(struct sk_buff*), GFP_KERNEL);
+			if (!segment->skb) {
+				kfree((void*) segment);
+				segment = NULL;
+				dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+			} else {
+				memset((void*) segment->skb, 0,
+					numDescriptors * sizeof(struct sk_buff*) + sizeof(struct sk_buff*));
+				segment->currDesc = segment->desc;
+				segment->currSkb = segment->skb;
+                        	segment->next = segment;
+			}
+                } else
+                        dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+        }
+
+        return segment;
+}
+
+
+/*  Allocate descriptor segment(s) until the specified number of Rx descriptors have been */
+/*  created. */
+int  tomal_alloc_rx_segments(TOMAL* tomal,
+			     U8 channel,
+	 		       U32 totalDescriptors)
+{
+	RxDescSegment* firstSegment = (RxDescSegment*) NULL;
+	RxDescSegment* prevSegment = (RxDescSegment*) NULL;
+	RxDescSegment* segment = (RxDescSegment*) NULL;
+	U32 numDescriptors = totalDescriptors;
+	U8 first = 1;
+	int rc;
+
+	 /*  Allocate RX segments until the indicated number of descriptors have been */
+	 /*  created. */
+	while (totalDescriptors && numDescriptors >= 1) {
+		 /*  Allocate an RX descriptor segment. */
+		segment = tomal_alloc_rx_segment(numDescriptors);
+		if (segment) {
+			 /*  If this was the first segment then remember it. */
+			if (first) {
+				firstSegment = prevSegment = segment;
+				first = 0;
+			}
+
+			 /*  Link the previous segment to the new segment. */
+			prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle;
+			prevSegment->next = segment;
+
+			totalDescriptors -= numDescriptors;
+		} else {
+			 /*  Failure allocating a segment of the requested size.  Reduce the size. */
+			numDescriptors /= 2;
+		}
+	}
+
+	 /*  All segments created? */
+	if (!segment) {
+		RxDescSegment* nextSegment = NULL;
+
+		 /*  Free any segments that were allocated. */
+		segment = prevSegment = firstSegment;
+		while (segment) {
+			nextSegment = segment->next;
+			BUG_ON(nextSegment == segment);
+
+			tomal_free_rx_segment(segment);
+
+			segment = nextSegment;
+		}
+		tomal->rxDescSegment[channel] = (RxDescSegment*) NULL;
+
+		e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+				"Failure allocating RX descriptor segment - totalDescriptors=%d.",
+				totalDescriptors);
+		rc = -ENOMEM;
+	} else {
+		 /*  Link the last segment to the first. */
+		segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle;
+		segment->next = firstSegment;
+
+		tomal->rxDescSegment[channel] = segment;
+		rc = 0;
+	}
+
+	 /*  Update TOMAL's view of the RX descriptors. */
+	out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrH, 0);
+	out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrL,
+		 (U32) tomal->rxDescSegment[channel]->dmaHandle);
+
+	tomal->oldRxSegment[channel] = tomal->rxDescSegment[channel];
+	tomal->oldRxSegment[channel]->currDesc = tomal->oldRxSegment[channel]->desc;
+	tomal->oldRxSegment[channel]->currSkb = tomal->oldRxSegment[channel]->skb;
+
+	return rc;
+}
+
+
+/*  Free the specified Rx descriptor segment. */
+static void tomal_free_rx_segment(RxDescSegment* segment)
+{
+	RxDesc* desc;
+	struct sk_buff** skb;
+
+	 /*  Look for any descriptors awaiting processing. */
+	for (desc = segment->desc, skb = segment->skb;
+	     desc && desc != (RxDesc*) segment->branchDesc; desc++, skb++) {
+		if (*skb) {
+			dma_unmap_single(NULL, desc->buffHeadAddrL,
+					 desc->postedLength, DMA_FROM_DEVICE);
+			dev_kfree_skb_any(*skb);
+			*skb = NULL;
+		}
+
+		desc->postedLength = 0;
+	}
+
+	 /*  Free SKB pointer storage. */
+	if (segment->skb)
+		kfree(segment->skb);
+
+	 /*  Free the descriptor storage. */
+	if (segment->desc)
+		dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle);
+
+	 /*  Free the segment. */
+	kfree((void*) segment);
+
+	return;
+}
+
+
+/*  Free all Rx descriptor segments. */
+void tomal_free_rx_segments(TOMAL* tomal,
+			    U8 channel)
+{
+	RxDescSegment* segment = tomal->rxDescSegment[channel];
+	RxDescSegment* startSegment = segment;
+	RxDescSegment* nextSegment;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+	while (segment) {
+		nextSegment = segment->next;
+
+		tomal_free_rx_segment(segment);
+		segment = nextSegment;
+
+		if (segment == startSegment)
+			break;
+	}
+	tomal->rxDescSegment[channel] = NULL;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+	return;
+}
+
+
+/*  Free all Rx buffers. */
+int tomal_free_rx_buffers(TOMAL* tomal,
+			   U8 channel)
+{
+	int rc = 0;
+	RxDescSegment* segment = tomal->rxDescSegment[channel];
+	RxDescSegment* startSegment = segment;
+	RxDesc* desc;
+	struct sk_buff** skb;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+	while (segment) {
+		 /*  Look for any descriptors awaiting processing. */
+                for (desc = segment->desc, skb = segment->skb;
+			desc != (RxDesc*) segment->branchDesc; desc++, skb++) {
+                       	if (*skb) {
+                        	dma_unmap_single(NULL, desc->buffHeadAddrL,
+						   desc->postedLength, DMA_FROM_DEVICE);
+                                dev_kfree_skb_any(*skb);
+                                *skb = NULL;
+                       	}
+
+			desc->postedLength = 0;
+		}
+
+		segment = segment->next;
+		if (segment == startSegment)
+			break;
+        }
+
+	 /*  Force TOMAL's total buffer size register back to zero.  We do this by adding */
+	 /*  enough buffer space to make this 20 bit register wrap around. */
+	while (in_be32(&tomal->regs[channel]->rxTotalBufferSize) &&
+		(0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize)) > 0x0000ffff)
+		out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff);
+	if (in_be32(&tomal->regs[channel]->rxTotalBufferSize))
+		out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize));
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+/*  Returns the number of RX buffers that are waiting to be processed.  An error is indicated */
+/*  by a negative value.  The caller should be holding the TOMAL lock for the specified channel. */
+int tomal_pending_rx_buffers(TOMAL* tomal,
+			     U8 channel)
+{
+	int rc = 0;
+	RxDescSegment* segment = tomal->rxDescSegment[channel];
+	RxDescSegment* startSegment = segment;
+	RxDesc* desc;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+	do {
+               	 /*  Look for any descriptors awaiting processing. */
+		for (desc = segment->desc; desc != (RxDesc*) segment->branchDesc; desc++)
+			if ((desc->status &  TOMAL_RX_LAST) && desc->totalFrameLength)
+				rc++;
+
+		segment = segment->next;
+	} while (segment != startSegment);
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+
+/*  Returns the number of TX buffers that are queued for transmission.  An error is indicated */
+/*  by a negative value.  The caller should be holding the TOMAL TX lock for the specified channel. */
+int tomal_pending_tx_buffers(TOMAL* tomal,
+                             U8 channel)
+{
+        int rc = 0;
+	TxDescSegment* segment = tomal->txDescSegment[channel];
+	TxDescSegment* startSegment = segment;
+	TxDesc* desc;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+        do {
+		 /*  Look for any descriptors awaiting processing. */
+                for (desc = segment->desc; desc != (TxDesc*) segment->branchDesc; desc++)
+                	if (desc->postedLength)
+                        	rc++;
+
+		segment = segment->next;
+        } while (segment != startSegment);
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+
+/*  Allocate a Tx descriptor segment with the specified number of descriptors. */
+static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors)
+{
+        TxDescSegment* segment = NULL;
+        TxDesc* desc;
+        size_t size = numDescriptors * sizeof(TxDesc) + sizeof(BranchDesc);
+        dma_addr_t dmaHandle;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - numDescriptors=%d\n", numDescriptors);
+
+         /*  Allocate descriptor storage. */
+        desc = (TxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL);
+        if (desc) {
+		 /*  Clear the descriptor storage. */
+		memset((void*) desc, 0, size);
+
+                 /*  Allocate a segment. */
+                segment = kmalloc(sizeof(TxDescSegment), GFP_KERNEL);
+                if (segment) {
+                        segment->size = size;
+                        segment->dmaHandle = dmaHandle;
+                        segment->desc = desc;
+
+                        segment->branchDesc = (BranchDesc*) &segment->desc[numDescriptors];
+                        segment->branchDesc->code = TOMAL_BRANCH_CODE;
+                        segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0;
+                        segment->branchDesc->nextDescAddrL = (U32) NULL;
+
+                         /*  Allocate storage for buffer pointers. */
+                        segment->skb = (struct sk_buff**)
+				kmalloc((numDescriptors+1) * sizeof(struct sk_buff*), GFP_KERNEL);
+                        if (!segment->skb) {
+                                kfree((void*) segment);
+                                segment = NULL;
+                                dma_free_coherent(NULL, size, (void*) segment->desc, segment->dmaHandle);
+                        } else {
+				memset((void*) segment->skb, 0,
+					(numDescriptors+1) * sizeof(struct sk_buff*));
+                                segment->oldIndex = segment->freeIndex = 0;
+                                segment->next = segment;  /*  by default point this segment at itself */
+                        }
+                } else
+                        dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+        }
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - segment=%p\n", segment);
+
+        return segment;
+}
+
+
+/*  Allocate Tx descriptor segment(s) until the specified number of descriptors have been created. */
+int  tomal_alloc_tx_segments(TOMAL* tomal,
+                             U8 channel,
+                               U32 totalDescriptors)
+{
+        TxDescSegment* firstSegment = (TxDescSegment*) NULL;
+        TxDescSegment* prevSegment = (TxDescSegment*) NULL;
+        TxDescSegment* segment = (TxDescSegment*) NULL;
+        U32 numDescriptors = totalDescriptors;
+        U8 first = 1;
+        int rc;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d, totalDescriptors=%d\n", tomal,
+		channel, totalDescriptors);
+
+         /*  Allocate TX segments until the indicated number of descriptors have been */
+         /*  created. */
+        while (totalDescriptors && numDescriptors >= 1) {
+                 /*  Allocate an TX descriptor segment. */
+                segment = tomal_alloc_tx_segment(numDescriptors);
+                if (segment) {
+                         /*  If this was the first segment then remember it. */
+                        if (first) {
+                                firstSegment = prevSegment = segment;
+                                first = 0;
+                        }
+
+                         /*  Link the previous segment to the new segment. */
+                        prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle;
+                        prevSegment->next = segment;
+
+                        totalDescriptors -= numDescriptors;
+                } else {
+                         /*  Failure allocating a segment of the requested size.  Reduce the size. */
+                        numDescriptors /= 2;
+                }
+        }
+
+         /*  All segments created? */
+        if (!segment) {
+                TxDescSegment* nextSegment = NULL;
+
+                 /*  Free any segments that were allocated. */
+                segment = prevSegment = firstSegment;
+                while (segment) {
+                        nextSegment = segment->next;
+                        BUG_ON(nextSegment == segment);
+
+                        tomal_free_tx_segment(segment);
+
+                        segment = nextSegment;
+                }
+                tomal->txDescSegment[channel] = (TxDescSegment*) NULL;
+
+		e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+				"TX descriptor allocation failure - totalDescriptors=%d.",
+				totalDescriptors);
+                rc = -ENOMEM;
+        } else {
+                 /*  Link the last segment to the first. */
+                segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle;
+                segment->next = firstSegment;
+
+                tomal->txDescSegment[channel] = segment;
+                rc = 0;
+        }
+
+	 /*  Tell TOMAL where the descriptor storage is. */
+	out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrH, 0);
+	out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrL,
+		 (U32) tomal->txDescSegment[channel]->dmaHandle);
+                        tomal->pendingTxBuffers[channel] = 0;
+	tomal->oldTxSegment[channel] = tomal->freeTxSegment[channel] = tomal->txDescSegment[channel];
+	tomal->freeTxSegment[channel]->freeIndex = tomal->freeTxSegment[channel]->oldIndex =
+                                tomal->freeTxSegment[channel]->oldIndex =
+                                tomal->numberOfTransmittedFrames[channel] = 0;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+
+/*  Free all Tx descriptor segments. */
+void tomal_free_tx_segments(TOMAL* tomal,
+                  	           U8 channel)
+{
+        TxDescSegment* segment = tomal->txDescSegment[channel];
+        TxDescSegment* startSegment = segment;
+        TxDescSegment* nextSegment;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+        while (segment) {
+                nextSegment = segment->next;
+
+                tomal_free_tx_segment(segment);
+                segment = nextSegment;
+
+                if (segment == startSegment)
+                        break;
+        }
+        tomal->txDescSegment[channel] = NULL;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+        return;
+}
+
+
+/*  Free the specified Tx segment. */
+void tomal_free_tx_segment(TxDescSegment* segment)
+{
+	TxDesc* desc;
+	struct sk_buff** skb;
+
+	 /*  Look for any descriptors with an associated buffer. */
+	for (desc = segment->desc, skb = segment->skb;
+		desc && desc != (TxDesc*) segment->branchDesc; desc++, skb++) {
+		if (*skb) {
+			dma_unmap_single(NULL, desc->buffHeadAddrL,
+					 desc->postedLength, DMA_FROM_DEVICE);
+                        dev_kfree_skb_any(*skb);
+                        *skb = NULL;
+                }
+		desc->postedLength = 0;
+	}
+
+         /*  Free SKB pointer storage. */
+        if (segment->skb)
+                kfree(segment->skb);
+
+         /*  Free the descriptor storage. */
+        if (segment->desc)
+                dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle);
+
+         /*  Free the segment. */
+        kfree((void*) segment);
+
+        return;
+}
+
+
+
+/*  Free all Tx buffers. */
+void tomal_free_tx_buffers(TOMAL* tomal,
+			   U8 channel)
+{
+	TxDescSegment* segment = tomal->txDescSegment[channel];
+        TxDescSegment* startSegment = segment;
+        TxDesc* desc;
+        struct sk_buff** skb;
+
+	while (segment) {
+		 /*  Look for any descriptors with an associated buffer. */
+                for (desc = segment->desc, skb = segment->skb;
+			desc != (TxDesc*) segment->branchDesc; desc++, skb++) {
+                        if (*skb) {
+				dma_unmap_single(NULL, desc->buffHeadAddrL,
+                                                 desc->postedLength, DMA_FROM_DEVICE);
+                                dev_kfree_skb_any(*skb);
+                                *skb = NULL;
+                        }
+
+                        desc->postedLength = 0;
+                }
+
+                segment = segment->next;
+		if (segment == startSegment)
+			break;
+        }
+
+        return;
+}
+
+
+
+int tomal_process_tx_buffers(TOMAL* tomal,
+                             U8 channel,
+                             register U32 framesToProcess)
+{
+        register TxDescSegment* segment = tomal->oldTxSegment[channel];
+        register TxDesc* desc = &segment->desc[segment->oldIndex];
+        register int skbFrag = 0;
+	register int rc = 0;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+         /*  Process the non-served descriptors, starting with the oldest. */
+	tomal->numberOfTransmittedFrames[channel] += framesToProcess;
+        while (likely(framesToProcess)) {
+                 /*  Have we reached the end of the segment? */
+                if (unlikely(desc == (TxDesc*) segment->branchDesc)) {
+                         /*  Reset the oldest descriptor pointer and move the oldest segment ahead. */
+                        segment->oldIndex = 0;
+                        tomal->oldTxSegment[channel] = segment = segment->next;
+                        desc = segment->desc;
+                }
+
+                 /*  Process the current descriptor. */
+                PRINTK(DBG_TOMAL | DBG_LEVEL3, "xmit of buffer [%x] complete\n",
+                        desc->buffHeadAddrL);
+
+                if (likely(desc->code & TOMAL_TX_LAST)) {
+                         /*  Unmap the buffer.  Free the skb.  Check descriptor status.  Increment the */
+                         /*  transmitted frame count. */
+                        dma_unmap_single(NULL, desc->buffHeadAddrL, desc->postedLength, DMA_TO_DEVICE);
+                        dev_kfree_skb_irq(segment->skb[segment->oldIndex]);
+			segment->skb[segment->oldIndex] = NULL;
+                        skbFrag = 0;
+                       	framesToProcess--;
+                        if (unlikely(!(desc->wBStatus & TOMAL_TX_STATUS_GOOD)))
+                                ((EMAC*) netdev_priv(tomal->netDev[channel]))->stats.tx_errors++;
+                } else
+                         /*  We have a fragmented skb and the first buffer is a special */
+                         /*  case because we didn't map an entire page for it.  Unmap */
+                         /*  the buffer now. */
+                        if (!skbFrag) {
+                                dma_unmap_single(NULL, desc->buffHeadAddrL,
+                                                 desc->postedLength, DMA_TO_DEVICE);
+                                skbFrag = 1;
+                        } else
+                                 /*  Unmap the page that contains the current fragment. */
+                                dma_unmap_page(NULL, desc->buffHeadAddrL,
+                                                desc->postedLength, DMA_TO_DEVICE);
+
+                 /*  Advance to next descriptor. */
+                desc++;
+                segment->oldIndex++;
+		rc++;
+        }
+
+	tomal->pendingTxBuffers[channel] -= rc;
+
+         /*  Restart the TX counters. */
+        out_be32(&tomal->regs[0]->txNotificationCtrl, (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_TX_NOTIFY_CTRL_COUNTER_START0));
+
+        if (unlikely(netif_queue_stopped(tomal->netDev[channel]) &&
+		     (tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) < tomal->maxTxBuffers[channel]))
+                netif_wake_queue(tomal->netDev[channel]);
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+
+
+/*  Disable IRQs. */
+void tomal_irq_disable(TOMAL* tomal,
+			U8 channel)
+{
+         /*  Disable TX & RX MAC event and interrupt generation. */
+        out_be32(&tomal->regs[channel]->rxMACStatusEnable, 0);
+        out_be32(&tomal->regs[channel]->txMACStatusEnable, 0);
+        out_be32(&tomal->regs[channel]->txMACStatusEnable, 0);
+        out_be32(&tomal->regs[channel]->txMACStatusMask, 0);
+
+         /*  Disable HW error event and interrupt generation. */
+        out_be32(&tomal->regs[channel]->hwErrorsEnable, 0);
+        out_be32(&tomal->regs[channel]->hwErrorsMask, 0);
+
+         /*  Disable SW critical and non-critical error event and */
+         /*  interrupt generation. */
+        out_be32(&tomal->regs[channel]->swCriticalErrorsEnable, 0);
+        out_be32(&tomal->regs[channel]->swCriticalErrorsMask, 0);
+        out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable, 0);
+        out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask, 0);
+
+         /*  Disable TX & RX event interrupts. */
+        out_be32(&tomal->regs[channel]->rxEventMask, 0);
+        out_be32(&tomal->regs[channel]->txEventMask, 0);
+
+        return;
+}
+
+
+/*  Enable IRQs and interrupt generation mechanisms. */
+void tomal_irq_enable(TOMAL* tomal,
+			U8 channel)
+{
+         /*  Enable TX & RX MAC event and interrupt generation. */
+        out_be32(&tomal->regs[channel]->rxMACStatusEnable, TOMAL_RX_MAC_XEMAC_MASK);
+        out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK);
+        out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK);
+        out_be32(&tomal->regs[channel]->txMACStatusMask, TOMAL_TX_MAC_XEMAC_MASK);
+
+         /*  Enable HW error event and interrupt generation. */
+        out_be32(&tomal->regs[channel]->hwErrorsEnable,
+		 TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE |
+                 TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE);
+        out_be32(&tomal->regs[channel]->hwErrorsMask,
+		 TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE |
+                 TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE);
+
+         /*  Enable SW critical and non-critical error event and */
+         /*  interrupt generation. */
+        out_be32(&tomal->regs[channel]->swCriticalErrorsEnable,
+		 TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC);
+        out_be32(&tomal->regs[channel]->swCriticalErrorsMask,
+		 TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC);
+        out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable,
+		 TOMAL_SW_NONCRIT_ERRORS_TPDBC |  TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+        out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask,
+		 TOMAL_SW_NONCRIT_ERRORS_TPDBC |  TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+
+         /*  Enable TX & RX event interrupts. */
+        out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT);
+        out_be32(&tomal->regs[channel]->txEventMask, TOMAL_TX_EVENT);
+
+         /*  Enable TX counters. */
+        out_be32(&tomal->regs[0]->txNotificationCtrl,
+		 (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 :
+                   TOMAL_TX_NOTIFY_CTRL_COUNTER_START0));
+
+         /*  Enable RX counters. */
+        out_be32(&tomal->regs[0]->rxNotificationCtrl,
+		 (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 :
+                  TOMAL_RX_NOTIFY_CTRL_COUNTER_START0));
+
+        return;
+}
+
+
+/*  Handle IRQs for channel 0 and any IRQs not specific to any channel. */
+static irqreturn_t tomal_irq0(int irq,
+			      void* data)
+{
+	int rc = IRQ_NONE;
+	TOMAL* tomal = (TOMAL*) data;
+	EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[0]);
+	U32 isr = in_be32(&tomal->regs[0]->interruptStatus);
+#ifdef CONFIG_BGP_E10000_NAPI
+	int pollScheduled = 0;
+#endif
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr);
+
+	if (likely(irq == tomal->irq0)) {
+		if (isr & TOMAL_INTERRUPT_RX0) {
+#ifndef CONFIG_BGP_E10000_NAPI
+			int budget = tomal->maxRxBuffers[0];
+#endif
+			PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - irq=%d isr=%08x\n", irq, isr);
+			spin_lock(&tomal->rxLock[0]);
+#ifdef CONFIG_BGP_E10000_NAPI
+			 /*  Disable further Rx interrupts. */
+			out_be32(&tomal->regs[0]->rxEventMask, 0);
+
+			 /*  Schedule Rx processing. */
+			napi_schedule(&(tomal->napi[0])) ;
+			pollScheduled = 1;
+#endif
+
+			 /*  Clear the RX interrupt. */
+			out_be32(&tomal->regs[0]->rxEventStatus, TOMAL_RX_EVENT);
+
+#ifndef CONFIG_BGP_E10000_NAPI
+			 /*  Process the buffers then allocate new ones. */
+			rc = tomal_poll(tomal->netDev[0], budget);
+			if (rc != 0)
+				printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc);
+#endif
+			spin_unlock(&tomal->rxLock[0]);
+			PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - IRQ_HANDLED\n");
+			rc = IRQ_HANDLED;
+		}
+                if (isr & TOMAL_INTERRUPT_TX0) {
+                        spin_lock(&tomal->txLock[0]);
+
+			 /*  Clear any TX interrupt. */
+			out_be32(&tomal->regs[0]->txEventStatus, TOMAL_TX_EVENT);
+
+                         /*  Process the buffers that have been transmitted. */
+                        rc = tomal_process_tx_buffers(tomal, 0,
+						      in_be32(&tomal->regs[0]->txNumberOfTransmittedFrames)-tomal->numberOfTransmittedFrames[0]);
+			if (rc <0)
+				printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc);
+
+                        spin_unlock(&tomal->txLock[0]);
+			rc = IRQ_HANDLED;
+                }
+		if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR0) {
+			U32 status = in_be32(&tomal->regs[0]->txMACStatus);
+
+			PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR0 [%08x]\n", status);
+
+			emac->stats.tx_errors++;
+
+			 /*  Clear the interrupt. */
+			out_be32(&tomal->regs[0]->txMACStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR0) {
+			U32 status = in_be32(&tomal->regs[0]->rxMACStatus);
+
+			PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR0 [%08x]\n", status);
+
+			emac->stats.rx_errors++;
+
+                         /*  Clear the interrupt. */
+                        out_be32(&tomal->regs[0]->rxMACStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) {
+			U32 status = in_be32(&tomal->regs[0]->swNonCriticalErrorsStatus);
+#ifndef CONFIG_BGP_E10000_NAPI
+			int budget = tomal->maxRxBuffers[0];
+#else
+			U32 swNonCriticalErrorsMask;
+#endif
+
+			if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC) {
+				 /*  Checksum failed on requested frame. */
+				emac->stats.tx_errors++;
+			} else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) {
+				 /*  TOMAL has exhausted all the RX buffers. */
+				U32 hwdrops = in_be32(&tomal->regs[0]->rxDroppedFramesCount);
+				emac->stats.rx_dropped += hwdrops;
+				tomal->numberOfHwDrops0 += hwdrops;
+				out_be32(&tomal->regs[0]->rxDroppedFramesCount, 0);
+				emac->stats.rx_errors++;
+#ifndef CONFIG_BGP_E10000_NAPI
+				tomal_poll(tomal->netDev[0], budget);
+#else
+				 /*  Disable too short Rx buffer interrupt and schedule Rx processing. */
+				swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask);
+				out_be32(&tomal->regs[0]->swNonCriticalErrorsMask,
+					 swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+				PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 pollScheduled=%d\n",pollScheduled);
+				if (!pollScheduled)
+					napi_schedule(&(tomal->napi[0])) ;
+
+#endif
+			}
+			else
+				e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int,
+						"Unknown non-critical SW error [0x%08x].", status);
+
+			 /*  Clear the interrupt. */
+			out_be32(&tomal->regs[0]->swNonCriticalErrorsStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_CRITICAL_ERROR) {
+			U32 swStatus = in_be32(&tomal->regs[0]->swCriticalErrorsStatus);
+			U32 hwStatus = in_be32(&tomal->regs[0]->hwErrorsStatus);
+
+			PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_CRITICAL_ERROR [SW=%08x, HW=%08x]\n",
+				swStatus, hwStatus);
+
+			 /*  Check for software errors. */
+			if (swStatus & TOMAL_SW_CRIT_ERRORS_TDBC)
+				emac->stats.tx_errors++;
+			else if (swStatus & TOMAL_SW_CRIT_ERRORS_RDBC)
+				emac->stats.rx_errors++;
+			else if (swStatus)
+				e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int,
+						"Unknown critical SW error [%08x].", swStatus);
+
+			 /*  Check for hardware errors. */
+			if (hwStatus & (TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_IDBPE))
+				emac->stats.rx_errors++;
+			else if (hwStatus & (TOMAL_HW_ERRORS_ORAPE | TOMAL_HW_ERRORS_ODBPE))
+				emac->stats.tx_errors++;
+			else if (hwStatus)
+				e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int,
+					"Unknown critical HW error [%08x].", hwStatus);
+
+			 /*  Clear the interrupt(s). */
+			out_be32(&tomal->regs[0]->hwErrorsStatus, hwStatus);
+			out_be32(&tomal->regs[0]->swCriticalErrorsStatus, swStatus);
+
+			 /*  Soft reset required here. */
+			tomal_soft_reset(tomal);
+			tomal_irq_enable(tomal, 0);
+
+			rc = IRQ_HANDLED;
+		}
+		if (rc != IRQ_HANDLED) {
+			e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+                                "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d",
+                                irq, isr, rc);
+		}
+	} else {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+				"Spurious interrupt - irq=%d, isr=0x%08x.",
+				irq, isr);
+	}
+
+	return rc;
+}
+
+/*  Handle interrupts for channel 0. */
+static irqreturn_t tomal_irq1(int irq,
+			      void* data)
+{
+	int rc = IRQ_NONE;
+	TOMAL* tomal = (TOMAL*) data;
+	EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[1]);
+	U32 isr = in_be32(&tomal->regs[0]->interruptStatus);
+#ifdef CONFIG_BGP_E10000_NAPI
+	int pollScheduled = 0;
+#endif
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr);
+
+	if (likely(irq == tomal->irq1)) {
+		if (isr & TOMAL_INTERRUPT_RX1) {
+#ifndef CONFIG_BGP_E10000_NAPI
+			int budget = tomal->maxRxBuffers[1];
+#endif
+			spin_lock(&tomal->rxLock[1]);
+
+#ifdef CONFIG_BGP_E10000_NAPI
+			 /*  Disable further Rx interrupts. */
+			out_be32(&tomal->regs[1]->rxEventMask, 0);
+
+			 /*  Schedule Rx processing. */
+			napi_schedule(&(tomal->napi[1])) ;
+			pollScheduled = 1;
+#endif
+
+			 /*  Clear the RX interrupt. */
+			out_be32(&tomal->regs[1]->rxEventStatus, TOMAL_RX_EVENT);
+
+#ifndef CONFIG_BGP_E10000_NAPI
+			 /*  Process the buffers then allocate new ones. */
+			rc = tomal_poll(tomal->netDev[1], budget);
+			if (rc != 0)
+				printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc);
+#endif
+			spin_unlock(&tomal->rxLock[1]);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_TX1) {
+			spin_lock(&tomal->txLock[1]);
+
+			 /*  Clear any TX interrupt. */
+			out_be32(&tomal->regs[1]->txEventStatus, TOMAL_TX_EVENT);
+
+			 /*  Process the buffers that have been transmitted. */
+			rc = tomal_process_tx_buffers(tomal, 1,
+						      in_be32(&tomal->regs[1]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[1]);
+			if (rc < 0)
+				printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc);
+
+			spin_unlock(&tomal->txLock[1]);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR1) {
+			U32 status = in_be32(&tomal->regs[1]->txMACStatus);
+
+			PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR1 [%08x]\n", status);
+
+			emac->stats.tx_errors++;
+
+			 /*  Clear the interrupt. */
+			out_be32(&tomal->regs[1]->txMACStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR1) {
+			U32 status = in_be32(&tomal->regs[1]->rxMACStatus);
+
+			PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR1 [%08x]\n", status);
+
+			emac->stats.rx_errors++;
+
+			 /*  Clear the interrupt. */
+			out_be32(&tomal->regs[1]->rxMACStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) {
+			U32 status = in_be32(&tomal->regs[1]->swNonCriticalErrorsStatus);
+#ifndef CONFIG_BGP_E10000_NAPI
+			int budget = tomal->maxRxBuffers[1];
+#else
+			U32 swNonCriticalErrorsMask;
+#endif
+			if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC)
+				emac->stats.tx_errors++;
+			else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) {
+				 /*  TOMAL has exhausted all the RX buffers. */
+				U32 hwdrops = in_be32(&tomal->regs[1]->rxDroppedFramesCount);
+				emac->stats.rx_dropped += hwdrops;
+				tomal->numberOfHwDrops1 += hwdrops;
+				out_be32(&tomal->regs[1]->rxDroppedFramesCount, 0);
+				emac->stats.rx_errors++;
+#ifndef CONFIG_BGP_E10000_NAPI
+				tomal_poll(tomal->netDev[1], budget);
+#else
+				 /*  Disable 'too short Rx buffer' interrupt and schedule Rx processing. */
+				swNonCriticalErrorsMask = in_be32(&tomal->regs[1]->swNonCriticalErrorsMask);
+				out_be32(&tomal->regs[1]->swNonCriticalErrorsMask,
+					 swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+				if (!pollScheduled)
+					napi_schedule(&(tomal->napi[1])) ;
+#endif
+			} else
+				e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int,
+					      "Unknown non-critical SW error [0x%08x].", status);
+
+			 /*  Clear the interrupt. */
+			out_be32(&tomal->regs[1]->swNonCriticalErrorsStatus, status);
+			rc = IRQ_HANDLED;
+		}
+		if (rc != IRQ_HANDLED) {
+			e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+				      "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d",
+				      irq, isr, rc);
+                }
+        } else {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+			      "Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr);
+	}
+
+	return rc;
+}
+
+
+/*  Configure TOMAL. */
+int tomal_configure(TOMAL* tomal)
+{
+	int rc = 0;
+	int c;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - tomal=%p\n", tomal);
+
+	out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 |
+		 TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 |
+		  TOMAL_CFG_CTRL_TX_MAC1 | TOMAL_CFG_CTRL_PLB_FREQ_250);
+	out_be32(&tomal->regs[0]->consumerMemoryBaseAddr, 0);
+	out_be32(&tomal->regs[0]->packetDataEngineCtrl, TOMAL_PDE_CTRL_RX_PREFETCH1 |
+		 TOMAL_PDE_CTRL_TX_PREFETCH1);  /*  prefetch 1 descriptor */
+        out_be32(&tomal->regs[0]->interruptRoute, TOMAL_IRQ1_MASK);  /*  route #1 ints to TOE_PLB_INT[1] */
+	for (c = 0; c < TOMAL_MAX_CHANNELS; c++)
+		if (tomal->netDev[c]) {
+                         /*  Allocate RX descriptors. */
+                        rc = tomal_alloc_rx_segments(tomal, c, tomal->maxRxBuffers[c]);
+                        if (rc) {
+                                 /*  Failure allocating requested descriptors. */
+                                BUG_ON(rc);
+                        }
+
+			 /*  Allocate RX buffers and initialize RX descriptor info. */
+			tomal->oldRxSegment[c] = tomal->rxDescSegment[c];
+
+			rc = tomal_alloc_rx_buffers(tomal, c);
+			if (rc <= 0) {
+				if (c && tomal->netDev[0])
+					tomal_free_rx_buffers(tomal, 0);
+				break;
+			}
+			else
+				rc = 0;
+
+                         /*  Allocate TX descriptors and initialize TX descriptor info. */
+                        rc = tomal_alloc_tx_segments(tomal, c, tomal->maxTxBuffers[c]);
+                        if (rc) {
+                                 /*  Failure allocating requested descriptors. */
+                                printk(KERN_CRIT "Failure allocating %d TX descriptors.\n", tomal->maxTxBuffers[c]);
+                               	BUG_ON(rc);
+                        }
+                        tomal->pendingTxBuffers[c] = 0;
+			tomal->oldTxSegment[c] = tomal->freeTxSegment[c] = tomal->txDescSegment[c];
+			tomal->freeTxSegment[c]->freeIndex = tomal->freeTxSegment[c]->oldIndex =
+				tomal->numberOfTransmittedFrames[c] = tomal->numberOfReceivedFrames[c] = 0;
+
+			 /*  Initialize the timers and counters. */
+			out_be32(&tomal->regs[c]->txMinTimer, 255);
+			out_be32(&tomal->regs[c]->txMaxTimer, 255);
+			out_be32(&tomal->regs[c]->txMaxFrameNum, tomal->maxTxBuffers[c]);
+			out_be32(&tomal->regs[c]->txMinFrameNum, 255);
+			out_be32(&tomal->regs[c]->rxMinTimer, 255);
+			out_be32(&tomal->regs[c]->rxMaxTimer, 22);
+			out_be32(&tomal->regs[c]->rxMinFrameNum, 255);
+#ifdef CONFIG_BGP_E10000_NAPI
+			out_be32(&tomal->regs[c]->rxMaxFrameNum, 4);
+#else
+			out_be32(&tomal->regs[c]->rxMaxFrameNum, 64);
+#endif
+
+                         /*  Initialize spinlocks. */
+                        spin_lock_init(&tomal->rxLock[c]);
+			spin_lock_init(&tomal->txLock[c]);
+
+#ifdef CONFIG_BGP_E10000_NAPI
+			    netif_napi_add(tomal->netDev[c],&(tomal->napi[c]),tomal_poll_napi,tomal->maxRxBuffers[c]) ;
+			    napi_enable(&(tomal->napi[c])) ;
+#endif
+		}
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+
+TOMAL* __init
+tomal_init(void* devMapAddr,
+			struct net_device* netDev0,
+                        U32 rxTotalBufferSize0,
+                        U32 numTxBuffers0,
+                        struct net_device* netDev1,
+                        U32 rxTotalBufferSize1,
+                        U32 numTxBuffers1,
+			int irq0,
+			int irq1,
+			struct proc_dir_entry* procDir)
+{
+	TOMAL* tomal;
+	int rc = 0;
+	int c;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - netDev0=%p, rxTotalBufferSize0=%d, "
+		"numTxBuffers0=%d, netDev1=%p, rxTotalBufferSize1=%d, "
+		"numTxBuffers1=%d, irq0=%d, irq1=%d, procDir=%p\n", netDev0, rxTotalBufferSize0,
+		numTxBuffers0, netDev1, rxTotalBufferSize1, numTxBuffers1, irq0, irq1, procDir);
+
+	 /*  Allocate tomal object. */
+	tomal = kmalloc(sizeof(TOMAL), GFP_KERNEL);
+	if (!tomal) {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+				"Failure allocating TOMAL device.");
+		rc = -ENOMEM;
+		goto end;
+	}
+	memset((void*) tomal, 0, sizeof(*tomal));
+
+	 /*  Map the TOMAL registers. */
+	tomal->regs[0] = (TOMALRegs*) devMapAddr;
+	if (!tomal->regs[0]) {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_ioremap_error,
+				"Failure maping TOMAL registers.");
+		rc = -ENXIO;
+		goto free_tomal;
+	}
+
+	 /*  Setup a register mapping for the second channel.  The registers that */
+	 /*  are specific to the second channel are located 0x100 bytes past the */
+	 /*  registers specific to the first channel.  Use this mapping for */
+	 /*  channel 1 specific registers only! */
+	tomal->regs[1] = (TOMALRegs*) ((U8*) tomal->regs[0]) + 0x100;
+
+         /*  Register interrupt handlers.  TOMAL has two interrupt lines. */
+	tomal->irq0 = irq0;
+	tomal->irq1 = irq1;
+        rc = request_irq(tomal->irq0, tomal_irq0, IRQF_DISABLED, "TOMAL IRQ0", (void*) tomal);
+        if (!rc) {
+                rc = request_irq(tomal->irq1, tomal_irq1, IRQF_DISABLED, "TOMAL IRQ1", (void*) tomal);
+                if (rc) {
+			e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable,
+                        		"Unable to register IRQ - irq1=0x%08x.", irq1);
+                        free_irq(tomal->irq0, tomal);
+			tomal->irq0 = 0xffffffff;
+                        goto free_irqs;
+		}
+        } else {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable,
+				"Unable to register IRQ - irq0=0x%08x.", irq0);
+                goto unmap_tomal_regs;
+        }
+
+	 /*  Create /proc/driver/e10000/tomal directory. */
+	tomal->parentDir = procDir;
+	if (procDir) {
+		tomal->tomalDir = proc_mkdir("tomal", procDir);
+		if (tomal->tomalDir) {
+			tomal->hwDir = proc_mkdir("hw", tomal->tomalDir);
+			if (tomal->hwDir) {
+				E10000_PROC_ENTRY* entry = tomal_hw_proc_entry;
+
+				while (entry->name) {
+					entry->entry = e10000_create_proc_entry(tomal->hwDir, entry->name, (void*)
+										((U32) entry->addr + (U32) tomal->regs[0]));
+					entry++;
+				}
+			}
+			tomal->swDir = proc_mkdir("sw", tomal->tomalDir);
+			if (tomal->swDir) {
+                                tomal_sw_proc_entry[0].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[0].name,
+								 (void*) &tomal->maxRxBuffers[0]);
+                                tomal_sw_proc_entry[1].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[1].name,
+								(void*) &tomal->maxRxBuffers[1]);
+                                tomal_sw_proc_entry[2].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[2].name,
+								(void*) &tomal->rxBufferSize[0]);
+                                tomal_sw_proc_entry[3].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[3].name,
+								(void*) &tomal->rxBufferSize[1]);
+                                tomal_sw_proc_entry[4].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[4].name,
+								(void*) &tomal->rxDescSegment[0]);
+                                tomal_sw_proc_entry[5].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[5].name,
+								(void*) &tomal->rxDescSegment[1]);
+                                tomal_sw_proc_entry[6].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[6].name,
+								(void*) &tomal->oldRxSegment[0]);
+                                tomal_sw_proc_entry[7].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[7].name,
+								(void*) &tomal->oldRxSegment[1]);
+                                tomal_sw_proc_entry[8].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[8].name,
+								(void*) &tomal->maxTxBuffers[0]);
+                                tomal_sw_proc_entry[9].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[9].name,
+								(void*) &tomal->maxTxBuffers[1]);
+                                tomal_sw_proc_entry[10].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[10].name,
+								(void*) &tomal->pendingTxBuffers[0]);
+                                tomal_sw_proc_entry[11].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[11].name,
+								(void*) &tomal->pendingTxBuffers[1]);
+                                tomal_sw_proc_entry[12].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[12].name,
+								(void*) &tomal->numberOfTransmittedFrames[0]);
+                                tomal_sw_proc_entry[13].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[13].name,
+								(void*) &tomal->numberOfTransmittedFrames[1]);
+                                tomal_sw_proc_entry[14].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[14].name,
+								(void*) &tomal->txDescSegment[0]);
+                                tomal_sw_proc_entry[15].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[15].name,
+								(void*) &tomal->txDescSegment[1]);
+                                tomal_sw_proc_entry[16].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[16].name,
+								(void*) &tomal->oldTxSegment[0]);
+                                tomal_sw_proc_entry[17].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[17].name,
+								(void*) &tomal->oldTxSegment[1]);
+                                tomal_sw_proc_entry[18].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[18].name,
+								(void*) &tomal->freeTxSegment[0]);
+                                tomal_sw_proc_entry[19].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[19].name,
+								(void*) &tomal->freeTxSegment[1]);
+                                tomal_sw_proc_entry[20].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[20].name,
+								(void*) &tomal->irq0);
+                                tomal_sw_proc_entry[21].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[21].name,
+								(void*) &tomal->irq1);
+                                tomal_sw_proc_entry[22].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name,
+								(void*) &tomal->numberOfNetrxDrops);
+                                tomal_sw_proc_entry[23].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name,
+								(void*) &tomal->numberOfHwDrops0);
+                                tomal_sw_proc_entry[24].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name,
+								(void*) &tomal->numberOfHwDrops1);
+                                tomal_sw_proc_entry[25].entry =
+                                        e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name,
+								(void*) &tomal->numberOfNotLast);
+/*                                 tomal_sw_proc_entry[22].entry = */
+/*                                         e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name, */
+/* 								(void*) &tomal->count_tx_checksum_type[0]); */
+/*                                 tomal_sw_proc_entry[23].entry = */
+/*                                         e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name, */
+/* 								(void*) &tomal->count_tx_checksum_type[1]); */
+/*                                 tomal_sw_proc_entry[24].entry = */
+/*                                         e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name, */
+/* 								(void*) &tomal->count_tx_checksum_type[2]); */
+/*                                 tomal_sw_proc_entry[25].entry = */
+/*                                         e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name, */
+/* 								(void*) &tomal->count_tx_checksum_type[3]); */
+			}
+		}
+	}
+
+	 /*  For each configured channel allocate descriptor segments and perform other initialization. */
+	tomal->netDev[0] = netDev0;
+	if (netDev0) {
+		tomal->rxBufferSize[0] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE;
+		tomal->maxRxBuffers[0] = (rxTotalBufferSize0 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize0 :
+					  TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[0] ;
+		tomal->maxTxBuffers[0] = numTxBuffers0;
+	}
+	tomal->netDev[1] = netDev1;
+	if (netDev1) {
+		tomal->rxBufferSize[1] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE;
+		tomal->maxRxBuffers[1] = (rxTotalBufferSize1 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize1 :
+					  TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[1];
+		tomal->maxTxBuffers[1] = numTxBuffers1;
+	}
+	for (c = 0; c < TOMAL_MAX_CHANNELS; c++) {
+		if (tomal->netDev[c]) {
+#ifdef CONFIG_BGP_E10000_IP_CHECKSUM
+			 /*  Tell the network stack that TOMAL performs IP checksum and */
+			 /*  that it can handle the transmission of scatter/gather data. */
+			tomal->netDev[c]->features |= (NETIF_F_SG | NETIF_F_IP_CSUM);
+#endif
+			tomal->netDev[c]->features |= (NETIF_F_HIGHDMA | NETIF_F_LLTX);
+
+		}
+	}
+	tomal_soft_reset(tomal);
+
+        goto end;
+
+free_irqs:
+	if (tomal->irq0)
+		free_irq(tomal->irq0, (void*) tomal);
+	if (tomal->irq1)
+		free_irq(tomal->irq1, (void*) tomal);
+
+unmap_tomal_regs:
+	tomal->regs[0] = NULL;
+
+free_tomal:
+	kfree((void*) tomal);
+
+end:
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return (rc ? ERR_PTR(rc) : tomal);
+}
+
+
+/*  Allocate an SKB for each Rx descriptor that doesn't already reference one. */
+int tomal_alloc_rx_buffers(TOMAL* tomal,
+		  	      U8 channel)
+{
+	int rc = 0;
+	RxDescSegment* segment;
+	RxDesc* desc;
+	RxDesc* startDesc;
+	struct sk_buff** skb;
+	U32 bytesAlloced = 0;
+	U32 buffersAlloced = 0;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p channel=%d\n", tomal, channel);
+
+	segment = tomal->rxDescSegment[channel];
+	desc = segment->desc;
+	startDesc = desc;
+	skb = segment->skb;
+
+	 /*  Iterate over all descriptors and allocate a buffer to any */
+	 /*  descriptors that don't already point to a buffer. */
+	do {
+		 /*  Have we reached the end of the segment? */
+		if (desc == (RxDesc*) segment->branchDesc) {
+			 /*  Move the descriptor segment pointer to the next segment. */
+			segment = segment->next;
+			desc = segment->desc;
+			skb = segment->skb;
+			if (desc == startDesc)
+				 /*  We've been through all descriptors. */
+				break;
+		}
+
+		 /*  If this descriptor is unused then allocate a buffer here. */
+		if (!desc->postedLength) {
+			 /*  Allocate a buffer. */
+			*skb = alloc_skb(tomal->rxBufferSize[channel] + 16, GFP_ATOMIC);
+			if (*skb) {
+				skb_reserve(*skb, 2);
+
+				 /*  Point a descriptor at the buffer. */
+				desc->code = TOMAL_RX_DESC_CODE;
+				desc->postedLength = tomal->rxBufferSize[channel];
+				desc->status = 0;
+				desc->totalFrameLength = 0;
+				desc->buffHeadAddrH = 0;
+				desc->buffHeadAddrL =
+					dma_map_single(NULL, (*skb)->data,
+						       desc->postedLength,
+						       DMA_FROM_DEVICE);
+				BUG_ON(!desc->buffHeadAddrL);
+
+				bytesAlloced += desc->postedLength;
+				buffersAlloced++;
+			} else {
+				e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+						"Failure allocating SKB.");
+				break;
+			}
+		}
+
+		 /*  Advance to the next descriptor and buffer. */
+		desc++;
+		skb++;
+	} while (desc != startDesc);
+
+	 /*  Now tell TOMAL about all the buffers allocated. */
+	 /*  We can add up to 64K at a time for a maximum total of 1MB. */
+	while (bytesAlloced) {
+		U32 size = (bytesAlloced <= 0xffff ? bytesAlloced : 0xffff);
+
+		BUG_ON(in_be32(&tomal->regs[channel]->rxTotalBufferSize) + size > 0x100000);
+		out_be32(&tomal->regs[channel]->rxAddFreeBytes, size);
+		bytesAlloced -= size;
+	}
+
+	rc = (rc ? rc : buffersAlloced);
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+/*  Receive frames until the indicated number of frames have been received or there are no more */
+/*  frames available. */
+#if defined(CONFIG_BGP_E10000_NAPI)
+int tomal_poll_napi(struct napi_struct * napi, int budget)  /*  struct net_device* netDev, int* budget) */
+{
+	struct net_device *netDev = napi->dev ;
+#else
+int tomal_poll(struct net_device *netDev, int budget)  /*  struct net_device* netDev, int* budget) */
+{
+#endif
+        int rc;
+        EMAC* emac = (EMAC*) netdev_priv(netDev);
+	TOMAL* tomal = emac->tomal;
+	U8 channel = emac->channel;
+        RxDescSegment* segment = tomal->oldRxSegment[channel];
+	register RxDesc* desc = segment->currDesc;
+        register struct sk_buff** skb = segment->currSkb;
+	register const U32 buffLen = tomal->rxBufferSize[channel];
+	register const U32 skbSize = buffLen + 16;
+	register U32 rxNumberOfReceivedFrames = in_be32(&tomal->regs[channel]->rxNumberOfReceivedFrames);
+	register U32 framesToProcess = rxNumberOfReceivedFrames - tomal->numberOfReceivedFrames[channel];
+	register U32 framesReceived = 0;
+	register U32 bytesPosted = 0;
+	register int quota = min(budget, (int) framesToProcess);
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - netDev=%p, budget=%d\n", netDev, budget);
+
+/* #ifdef CONFIG_BGP_E10000_NAPI */
+/* 	// Determine receive quota. */
+/* 	if (quota > netDev->quota) */
+/* 		quota = netDev->quota; */
+/* #endif */
+
+         /*  Iterate over the RX descriptors, starting with the oldest, processing each */
+         /*  data buffer that has been received until the indicated number of frames */
+         /*  have been processed. */
+	while (likely((framesReceived < quota) && framesToProcess)) {
+                 /*  Is the current descriptor describing a valid frame? */
+		if (likely(desc->status & TOMAL_RX_LAST)) {
+			PRINTK(DBG_TOMAL | DBG_LEVEL3 | DBG_NAPI, "Received %d bytes to skb %p\n", desc->totalFrameLength, *skb);
+			if (likely((desc->status & TOMAL_RX_STATUS_CHECKSUM_VALID) &&
+				   (desc->status & TOMAL_RX_STATUS_IP_CHECKSUM_PASSED) &&
+				   (desc->status & TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED)))
+				 /*  Valid checksum. */
+				(*skb)->ip_summed = CHECKSUM_UNNECESSARY;
+			else
+				(*skb)->ip_summed = CHECKSUM_NONE;
+			skb_put(*skb, desc->totalFrameLength);
+			(*skb)->dev = netDev;
+			(*skb)->protocol = eth_type_trans(*skb, netDev);
+#ifdef CONFIG_BGP_E10000_NAPI
+		        PRINTK(DBG_NAPI, "netif_receive_skb\n");
+			rc = netif_receive_skb(*skb);
+#else
+			rc = netif_rx(*skb);
+#endif
+			*skb = NULL;
+			if (likely(rc == NET_RX_SUCCESS)) {
+				framesReceived++;
+				emac->stats.rx_bytes += desc->totalFrameLength;
+			} else if (rc == NET_RX_DROP || rc == NET_RX_BAD) {
+				emac->stats.rx_dropped++;
+				tomal->numberOfNetrxDrops ++ ;
+			} else
+				emac->stats.rx_errors++;
+		} else {
+			tomal->numberOfNotLast++ ;
+		}
+
+		 /*  Make the current slot in the Rx ring useable again. */
+		if (likely(*skb == NULL)) {
+			*skb = alloc_skb(skbSize, GFP_ATOMIC);
+			if (likely(*skb)) {
+				skb_reserve(*skb, 2);  /*  align */
+				desc->buffHeadAddrL = dma_map_single(NULL, (*skb)->data, buffLen, DMA_FROM_DEVICE);
+				desc->postedLength = buffLen;
+				bytesPosted += desc->postedLength;
+			} else
+				desc->postedLength = desc->buffHeadAddrL = 0;
+		} else     /*  Reinitialize this descriptor */
+			bytesPosted += desc->postedLength;   /*  descriptor avaialable again so repost */
+		desc->status = 0;
+
+		 /*  Post additional buffers to the device if we've accumulated enough. */
+		if (unlikely(bytesPosted >= 0xffff)) {
+			out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff);
+			bytesPosted -= 0xffff;
+		}
+
+		skb++;
+		desc++;
+		framesToProcess--;
+
+		 /*  Have we reached the end of the segment? */
+		if (unlikely(desc->code != TOMAL_RX_DESC_CODE)) {
+               	         /*  Move to the next segment. */
+                       	segment->currDesc = segment->desc;
+                        segment->currSkb = segment->skb;
+       	                tomal->oldRxSegment[channel] = segment = segment->next;
+               	        desc = segment->currDesc;
+                       	skb = segment->currSkb;
+                }
+	}
+
+	 /*  Post any remaining buffers to the device. */
+        if (likely(bytesPosted))
+              out_be32(&tomal->regs[channel]->rxAddFreeBytes, bytesPosted);
+
+	 /*  Update segment information and statistics. */
+	segment->currDesc = desc;
+	segment->currSkb = skb;
+	emac->stats.rx_packets += framesReceived;
+	tomal->numberOfReceivedFrames[channel] = rxNumberOfReceivedFrames - framesToProcess;
+
+         /*  Reset the Rx notification mechanism. */
+        out_be32(&tomal->regs[0]->rxNotificationCtrl, (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_RX_NOTIFY_CTRL_COUNTER_START0));
+
+#ifdef CONFIG_BGP_E10000_NAPI
+/*         netDev->quota -= framesReceived; */
+        budget -= framesReceived;
+        if (framesReceived == quota) {
+                 /*  We processed all frames within the specified quota.  Reenable interrupts */
+		 /*  and tell the kernel that we received everything available. */
+		U32 swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask);
+	        PRINTK(DBG_NAPI, "napi_complete\n");
+		napi_complete(napi) ;
+                out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT);
+		if (!(swNonCriticalErrorsMask & TOMAL_SW_NONCRIT_ERRORS_RTSDB))
+			out_be32(&tomal->regs[0]->swNonCriticalErrorsMask,
+			 	 swNonCriticalErrorsMask | TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+                rc = 0;
+        } else
+		rc = 1;
+#else
+	rc = 0;
+#endif
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc);
+
+        return rc;
+}
+
+static inline U16 * frame_checksum_ptr(struct sk_buff* skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+        unsigned int eth_proto = eth->h_proto ;
+        struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+        unsigned int iphlen = 4*iph->ihl ;
+        struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+        struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) );
+        unsigned int ip_proto = iph->protocol ;
+        if( eth_proto == ETH_P_IP) {
+        	if( ip_proto == IPPROTO_TCP) return &(tcph->check) ;
+        	if( ip_proto == IPPROTO_UDP) return &(udph->check) ;
+        }
+        return NULL ;
+
+}
+/*  Transmit a frame. */
+/*  Caller should be holding the TOMAL lock for the specified channel. */
+int tomal_xmit_tx_buffer(TOMAL* tomal,
+			 U8 channel,
+                         struct sk_buff* skb)
+{
+	int rc = 0;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int f = -1;
+	TxDescSegment* segment = tomal->freeTxSegment[channel];
+	U32 framesToProcess;
+	U32 buffLen;
+	dma_addr_t buffAddr;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, skb=%p, channel=%d\n", tomal, skb, channel);
+
+	do {
+		 /*  Are we at the end of the segment? */
+		if (unlikely(segment->desc[segment->freeIndex].code == 0x20)) {
+			segment->freeIndex = 0;
+			tomal->freeTxSegment[channel] = segment = segment->next;
+		}
+
+		 /*  Point the next free descriptor(s) at the SKB buffer(s).  The first buffer is a special case. */
+		if (f < 0) {
+			 /*  The data is in the skb's data buffer. */
+			buffLen = skb->len - skb->data_len;
+			buffAddr = dma_map_single(NULL, skb->data, buffLen, DMA_TO_DEVICE);
+/* 			tomal->count_tx_checksum_type[skb->ip_summed] += 1 ; */
+#ifdef CONFIG_BGP_E10000_IP_CHECKSUM
+			 /*  When using the IO node as a router (collective --> ethernet ) frames are coming across marked CHECKSUM_COMPLETE */
+			 /*  even though I think they should be marked CHECKSUM_PARTIAL. Use the TOMAL checksumming hardware on the frames. */
+/* 			if (skb->ip_summed == CHECKSUM_PARTIAL) */
+			if( 1)
+			{
+				 /*  Generate IP checksum for this frame. */
+				U16 * frame_ck_ptr=frame_checksum_ptr(skb) ;
+				if( frame_ck_ptr ) *frame_ck_ptr = 0 ;
+/* 				if( frame_ck_ptr && frame_ck_ptr != (U16*)(skb->head+skb->csum_start + skb->csum_offset)) */
+/* 					{ */
+/* 						printk(KERN_INFO "(E) frame_ck_ptr=%p skb->head=%p skb->csum_start=%d skb->csum_offset=%d\n", */
+/* 								frame_ck_ptr,skb->head,skb->csum_start,skb->csum_offset) ; */
+/* 					} */
+/* 				*(U16*)(skb->head+skb->csum_start + skb->csum_offset) = 0; */
+				segment->desc[segment->freeIndex].command = TOMAL_TX_ENABLE_HW_CHECKSUM |
+						TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+			} else  {
+				segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+			}
+#else
+                                segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+#endif
+
+		} else {
+			struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+
+			 /*  Map the page that contains the current fragment. */
+			buffAddr = dma_map_page(NULL, frag->page, frag->page_offset, frag->size, DMA_TO_DEVICE);
+			buffLen = frag->size;
+		}
+
+		segment->desc[segment->freeIndex].wBStatus = 0;
+		segment->desc[segment->freeIndex].postedLength = buffLen;
+		segment->desc[segment->freeIndex].buffHeadAddrL = (U32) buffAddr;
+		segment->desc[segment->freeIndex].code = TOMAL_TX_DESC_CODE;
+		if (f == (nr_frags - 1)) {   /*  Last buffer? */
+			segment->desc[segment->freeIndex].code |=  TOMAL_TX_NOTIFY_REQ | TOMAL_TX_SIGNAL | TOMAL_TX_LAST;
+			segment->skb[segment->freeIndex] = skb;
+
+			 /*  Post buffer(s) for transmission. */
+			PRINTK(DBG_TOMAL | DBG_LEVEL3, "Enqueueing buffer 0x%08x for xmit, index=%d, desc=%p, len=%d, code=0x%x\n",
+				(U32) buffAddr, segment->freeIndex, &segment->desc[segment->freeIndex], segment->desc[segment->freeIndex].postedLength,
+				segment->desc[segment->freeIndex].code);
+			smp_wmb();
+			out_be32(&tomal->regs[channel]->txAddPostedFrames, 1);
+		}
+
+		 /*  Advance to the next free descriptor index. */
+		segment->freeIndex++;
+		f++;
+	} while (f < nr_frags);
+	tomal->pendingTxBuffers[channel] += f+1;
+
+	 /*  Clean up any buffers for frames that have been transmitted. */
+	framesToProcess = in_be32(&tomal->regs[channel]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[channel];
+	if (unlikely(framesToProcess > 32)) {
+		int bufsProcessed = tomal_process_tx_buffers(tomal, channel, framesToProcess);
+		if (unlikely(bufsProcessed < 0))
+			printk(KERN_WARNING "%s: Error processing TX buffers [%d]\n",
+				tomal->netDev[channel]->name, bufsProcessed);
+	}
+
+	 /*  Stop the queue if we lack the space to transmit another frame. */
+	if (unlikely((tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) >
+			tomal->maxTxBuffers[channel]))
+		netif_stop_queue(tomal->netDev[channel]);
+
+	tomal->netDev[channel]->trans_start = jiffies;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
+
+
+void tomal_exit(TOMAL* tomal)
+{
+	int c;
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry\n");
+
+	if (tomal) {
+	         /*  Release interrupt handlers. */
+        	free_irq(TOMAL_IRQ0, tomal);
+	        free_irq(TOMAL_IRQ1, tomal);
+
+        	 /*  Free descriptor segments for each channel. */
+	        for (c = 0; c < TOMAL_MAX_CHANNELS; c++) {
+	                tomal_free_rx_segments(tomal, c);
+        	        tomal_free_tx_segments(tomal, c);
+
+			 /*  Unregister and free net_device */
+			if (tomal->netDev[c]) {
+				EMAC* emac = netdev_priv(tomal->netDev[c]);
+
+				 /*  Allow EMAC to cleanup. */
+				if (emac)
+					emac_exit(emac);
+
+				unregister_netdev(tomal->netDev[c]);
+				free_netdev(tomal->netDev[c]);
+			}
+       		}
+
+		 /*  Remove /proc entries. */
+		if (tomal->tomalDir) {
+			if (tomal->hwDir) {
+				E10000_PROC_ENTRY* entry = tomal_hw_proc_entry;
+
+				while (entry->name) {
+					if (entry->entry) {
+						remove_proc_entry(entry->entry->name, tomal->hwDir);
+						entry->entry = NULL;
+					}
+                                        entry++;
+				}
+
+				remove_proc_entry(tomal->hwDir->name, tomal->tomalDir);
+				tomal->hwDir = NULL;
+			}
+			if (tomal->swDir) {
+				E10000_PROC_ENTRY* entry = tomal_sw_proc_entry;
+				while (entry->name) {
+					if (entry->entry) {
+						remove_proc_entry(entry->entry->name, tomal->swDir);
+						entry->entry = NULL;
+					}
+					entry++;
+				}
+
+				remove_proc_entry(tomal->swDir->name, tomal->tomalDir);
+				tomal->swDir = NULL;
+			}
+
+			remove_proc_entry(tomal->tomalDir->name, tomal->parentDir);
+			tomal->tomalDir = NULL;
+		}
+
+		 /*  Free the TOMAL object. */
+		kfree((void*) tomal);
+        }
+
+        PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+        return;
+}
+
+
+/*  Reset and reconfigure the TOMAL hardware and reinitialize Rx descriptors. */
+int tomal_soft_reset(TOMAL* tomal)
+{
+	int rc = 0;
+	int c;
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p\n", tomal);
+
+	 /*  Reset TOMAL and wait for it to finish. */
+	out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_SOFT_RESET);
+	for (c = 100; (in_be32(&tomal->regs[0]->configurationCtrl) & TOMAL_CFG_CTRL_SOFT_RESET) && c; c--)
+		udelay(10000);
+	if (!c) {
+		e10000_printr(bg_subcomp_tomal, tomal_ras_timeout,
+				"TOMAL reset failure.");
+		rc = -ETIME;
+	} else {
+		 /*  Reset EMAC(s) and free any buffers. */
+		for (c = 0; c < TOMAL_MAX_CHANNELS; c++)
+			if (tomal->netDev[c]) {
+				 /*  Free any RX and TX buffers. */
+				tomal_free_rx_buffers(tomal, c);
+				tomal_free_tx_buffers(tomal, c);
+
+				 /*  Free descriptor segments */
+				tomal_free_rx_segments(tomal, c);
+				tomal_free_tx_segments(tomal, c);
+			}
+
+		 /*  Reconfigure TOMAL. */
+		rc = tomal_configure(tomal);
+	}
+
+	PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+	return rc;
+}
+
diff --git a/drivers/net/bgp_e10000/bgp_tomal.h b/drivers/net/bgp_e10000/bgp_tomal.h
new file mode 100644
index 0000000..d45ef58
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_tomal.h
@@ -0,0 +1,423 @@
+/*
+ * bgp_tomal.h: Definition of TOMAL device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef _BGP_TOMAL_H
+#define _BGP_TOMAL_H
+
+#include <asm/io.h>
+#include <asm/bluegene.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+
+#include "bgp_e10000.h"
+
+#define TOMAL_MAX_CHANNELS 2
+
+
+#define TOMAL_RX_MAX_FRAME_NUM  10
+#define TOMAL_RX_MAX_TIMER      50
+
+
+#define TOMAL_IRQ_GROUP  8
+#define TOMAL_IRQ0_GINT  0
+#define TOMAL_IRQ1_GINT  1
+#define TOMAL_IRQ0 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ0_GINT)
+#define TOMAL_IRQ1 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ1_GINT)
+
+
+#define TOMAL_BASE_ADDRESS 0x720000000ULL
+typedef volatile struct _TOMALRegs {
+	U32 configurationCtrl;                   /*  0000 configuration control */
+#define TOMAL_CFG_CTRL_RX_MAC0			0x00800000
+#define TOMAL_CFG_CTRL_RX_MAC1			0x00400000
+#define TOMAL_CFG_CTRL_TX_MAC0			0x00200000
+#define TOMAL_CFG_CTRL_TX_MAC1			0x00100000
+#define TOMAL_CFG_CTRL_PLB_FREQ_250		0x00000000
+#define TOMAL_CFG_CTRL_PLB_FREQ_300		0x00040000
+#define TOMAL_CFG_CTRL_PLB_FREQ_350		0x00080000
+#define TOMAL_CFG_CTRL_PLB_FREQ_400		0x000c0000
+#define TOMAL_CFG_CTRL_PLB_M_POWER		0x00000080
+#define TOMAL_CFG_CTRL_SLEEP			0x00000002
+#define TOMAL_CFG_CTRL_SOFT_RESET		0x00000001
+	U32 reserved1[23];                       /*  0004 */
+	U32 revisionID;                          /*  0060 revision id */
+	U32 reserved2[103];                      /*  0064 */
+	U32 consumerMemoryBaseAddr;              /*  0200 consumer memory base address */
+	U32 reserved3[127];                      /*  0204 */
+	U32 packetDataEngineCtrl;                /*  0400 packet data engine control */
+#define TOMAL_PDE_CTRL_RX_PREFETCH8		0x00000030
+#define TOMAL_PDE_CTRL_RX_PREFETCH1		0x00000000
+#define TOMAL_PDE_CTRL_TX_PREFETCH8		0x00000003
+#define TOMAL_PDE_CTRL_TX_PREFETCH1		0x00000000
+	U32 reserved4[127];                      /*  0404 */
+	U32 txNotificationCtrl;                  /*  0600 TX notification control */
+#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START0	0x00000020
+#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START1	0x00000010
+	U32 reserved5[3];                        /*  0604 */
+	U32 txMinTimer;                          /*  0610 TX min timer */
+	U32 reserved6[3];                        /*  0614 */
+	U32 txMaxTimer;                          /*  0620 TX max timer */
+	U32 reserved7[11];                       /*  0624 */
+	U32 txFramePerServiceCtrl;               /*  0650 TX frame / service control */
+	U32 reserved8[3];                        /*  0654 */
+	U32 txHWCurrentDescriptorAddrH;          /*  0660 TX HW current desc. addr. High */
+	U32 reserved9[3];                        /*  0664 */
+	U32 txHWCurrentDescriptorAddrL;          /*  0670 TX HW current desc. addr. Low */
+	U32 reserved10[7];                       /*  0674 */
+	U32 txPendingFrameCount;                 /*  0690 TX pending frame count */
+#define TOMAL_MAX_TX_PENDING_FRAMES 216
+	U32 reserved11[3];                       /*  0694 */
+	U32 txAddPostedFrames;                   /*  06A0 TX add posted frames */
+	U32 reserved12[3];                       /*  06A4 */
+	U32 txNumberOfTransmittedFrames;         /*  06B0 TX number transmitted frames */
+	U32 reserved13[3];                       /*  06B4 */
+        U32 txMaxFrameNum;                       /*  06C0 TX max frame number */
+        U32 reserved14[3];                       /*  06C4 */
+        U32 txMinFrameNum;                       /*  06D0 TX min frame number */
+        U32 reserved15[3];                       /*  06D4 */
+        U32 txEventStatus;                       /*  06E0 TX event status */
+#define TOMAL_TX_EVENT				0x00000001
+        U32 reserved16[3];                       /*  06E4 */
+        U32 txEventMask;                         /*  06F0 TX event mask */
+        U32 reserved17[515];                     /*  06F4 */
+        U32 rxNotificationCtrl;                  /*  0F00 RX notification control */
+#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START0     0x00000080
+#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START1     0x00000040
+        U32 reserved18[3];                       /*  0F04 */
+        U32 rxMinTimer;                          /*  0F10 RX minimum timer */
+        U32 reserved19[3];                       /*  0F14 */
+        U32 rxMaxTimer;                          /*  0F20 RX maximum timer */
+        U32 reserved20[63];                      /*  0F24 */
+        U32 rxHWCurrentDescriptorAddrH;          /*  1020 RX HW current desc. addr. High */
+        U32 reserved21[3];                       /*  1024 */
+        U32 rxHWCurrentDescriptorAddrL;          /*  1030 RX HW current desc. addr. Low */
+        U32 reserved22[3];                       /*  1034 */
+        U32 rxAddFreeBytes;                      /*  1040 num bytes in RX buffers posted */
+        U32 reserved23[3];                       /*  1044 */
+        U32 rxTotalBufferSize;                   /*  1050 total size of buffers */
+#define TOMAL_RX_TOTAL_BUFFER_SIZE_MAX		0x00100000
+        U32 reserved24[3];                       /*  1054 */
+        U32 rxNumberOfReceivedFrames;            /*  1060 total frames received */
+        U32 reserved25[3];                       /*  1064 */
+        U32 rxDroppedFramesCount;                /*  1070 total frames dropped */
+        U32 reserved26[3];                       /*  1074 */
+        U32 rxMaxFrameNum;                       /*  1080 num frames RX to interrupt */
+        U32 reserved27[3];                       /*  1084 */
+        U32 rxMinFrameNum;                       /*  1090 num frames RX to int w/timer */
+        U32 reserved28[3];                       /*  1094 */
+        U32 rxEventStatus;                       /*  10A0 RX status of */
+#define TOMAL_RX_EVENT				0x00000001
+        U32 reserved29[3];                       /*  10A4 */
+        U32 rxEventMask;                         /*  10B0 RX event mask of */
+        U32 reserved30[467];                     /*  10B4 */
+        U32 swNonCriticalErrorsStatus;           /*  1800 software noncritical error status */
+#define TOMAL_SW_NONCRIT_ERRORS_TPDBC		0x00000010
+#define TOMAL_SW_NONCRIT_ERRORS_RTSDB		0x00000001
+        U32 reserved31[3];                       /*  1804 */
+        U32 swNonCriticalErrorsEnable;           /*  1810 software noncritical error enable */
+        U32 reserved32[3];                       /*  1814 */
+        U32 swNonCriticalErrorsMask;             /*  1820 software noncritical error mask */
+        U32 reserved33[55];                      /*  1824 */
+        U32 rxDataBufferFreeSpace;               /*  1900 number free entries in RX buffer */
+        U32 reserved34[3];                       /*  1904 */
+        U32 txDataBuffer0FreeSpace;              /*  1910 num free entries in TX buffer */
+        U32 reserved35[3];                       /*  1914 */
+        U32 txDataBuffer1FreeSpace;              /*  1920 num free entries in TX buffer */
+        U32 reserved36[127];                     /*  1924 */
+        U32 rxMACStatus;                         /*  1B20 status from MAC for RX packets */
+#define TOMAL_RX_MAC_CODE_ERROR			0x00001000	 /*  XEMAC */
+#define TOMAL_RX_MAC_PARITY_ERROR		0x00000400	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_OVERRUN			0x00000200	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_PAUSE_FRAME		0x00000100	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_BAD_FRAME			0x00000080	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_RUNT_FRAME			0x00000040	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_SHORT_EVENT		0x00000020	 /*  EMAC4 */
+#define TOMAL_RX_MAC_ALIGN_ERROR		0x00000010	 /*  EMAC4 */
+#define TOMAL_RX_MAC_BAD_FCS			0x00000008	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_FRAME_TOO_LONG		0x00000004	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_OUT_RANGE_ERROR		0x00000002	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_IN_RANGE_ERROR		0x00000001	 /*  XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_XEMAC_MASK (TOMAL_RX_MAC_CODE_ERROR | \
+	TOMAL_RX_MAC_PARITY_ERROR | TOMAL_RX_MAC_OVERRUN | \
+	TOMAL_RX_MAC_PAUSE_FRAME | TOMAL_RX_MAC_BAD_FRAME | \
+	TOMAL_RX_MAC_RUNT_FRAME | TOMAL_RX_MAC_BAD_FCS | \
+	TOMAL_RX_MAC_FRAME_TOO_LONG | TOMAL_RX_MAC_OUT_RANGE_ERROR | \
+	TOMAL_RX_MAC_IN_RANGE_ERROR)
+        U32 reserved37[3];                       /*  1B24 */
+        U32 rxMACStatusEnable;                   /*  1B30 enable bits in rxMACStatus */
+        U32 reserved38[3];                       /*  1B34 */
+        U32 rxMACStatusMask;                     /*  1B40 mask bits in rxMACStatus */
+        U32 reserved39[3];                       /*  1B44 */
+        U32 txMACStatus;                         /*  1B50 status from MAC for TX packets */
+#define TOMAL_TX_MAC_LOCAL_FAULT		0x00001000	 /*  XEMAC */
+#define TOMAL_TX_MAC_REMOTE_FAULT	0x00000800	 /*  XEMAC */
+#define TOMAL_TX_MAC_BAD_FCS		0x00000200	 /*  EMAC4 */
+#define TOMAL_TX_MAC_PARITY_ERROR	0x00000100	 /*  XEMAC */
+#define TOMAL_TX_MAC_LOST_CARRIER	0x00000080	 /*  EMAC4 */
+#define TOMAL_TX_MAC_EXCESSIVE_DEFERRAL	0x00000040	 /*  EMAC4 */
+#define TOMAL_TX_MAC_EXCESSIVE_COLLISION	0x00000020	 /*  EMAC4 */
+#define TOMAL_TX_MAC_LATE_COLLISION	0x00000010	 /*  EMAC4 */
+#define TOMAL_TX_MAC_UNDERRUN		0x00000002	 /*  XEMAC/EMAC4 */
+#define TOMAL_TX_MAC_SQE			0x00000001	 /*  EMAC4 */
+#define TOMAL_TX_MAC_XEMAC_MASK (TOMAL_TX_MAC_LOCAL_FAULT | \
+	TOMAL_TX_MAC_REMOTE_FAULT | TOMAL_TX_MAC_PARITY_ERROR | \
+	TOMAL_TX_MAC_UNDERRUN)
+        U32 reserved40[3];                       /*  1B54 */
+        U32 txMACStatusEnable;                   /*  1B60 enable bits in txMACStatus */
+        U32 reserved41[3];                       /*  1B64 */
+        U32 txMACStatusMask;                     /*  1B70 mask bits in txMACStatus */
+        U32 reserved42[163];                     /*  1B74 */
+        U32 hwErrorsStatus;                      /*  1E00 hardware error status */
+#define TOMAL_HW_ERRORS_IRAPE			0x00000008
+#define TOMAL_HW_ERRORS_ORAPE			0x00000004
+#define TOMAL_HW_ERRORS_IDBPE			0x00000002
+#define TOMAL_HW_ERRORS_ODBPE			0x00000001
+        U32 reserved43[3];                       /*  1E04 */
+        U32 hwErrorsEnable;                      /*  1E10 enable bits in hwErrorsStatus */
+        U32 reserved44[3];                       /*  1E14 */
+        U32 hwErrorsMask;                        /*  1E20 mask bits in hwErrorsStatus */
+        U32 reserved45[55];                      /*  1E24 */
+        U32 swCriticalErrorsStatus;              /*  1F00 software critical error status */
+#define TOMAL_SW_CRIT_ERRORS_TDBC		0x00000002
+#define TOMAL_SW_CRIT_ERRORS_RDBC		0x00000001
+        U32 reserved46[3];                       /*  1F04 */
+        U32 swCriticalErrorsEnable;              /*  1F10 enable bits in swCriticalErrorsStatus */
+        U32 reserved47[3];                       /*  1F14 */
+        U32 swCriticalErrorsMask;                /*  1F20 mask bits in swCriticalErrorsStatus */
+        U32 reserved48[3];                       /*  1F24 */
+        U32 rxDescriptorBadCodeFEC;              /*  1F30 RX channel w/bad code descriptor */
+        U32 reserved49[3];                       /*  1F34 */
+        U32 txDescriptorBadCodeFEC;              /*  1F40 TX channel w/bad code descriptor */
+        U32 reserved50[15];                      /*  1F44 */
+        U32 interruptStatus;                     /*  1F80 interrupt status register */
+#define TOMAL_INTERRUPT_TX1                     0x00020000
+#define TOMAL_INTERRUPT_TX0                     0x00010000
+#define TOMAL_INTERRUPT_RX1                     0x00000200
+#define TOMAL_INTERRUPT_RX0                     0x00000100
+#define TOMAL_INTERRUPT_TX_MAC_ERROR1           0x00000080
+#define TOMAL_INTERRUPT_TX_MAC_ERROR0           0x00000040
+#define TOMAL_INTERRUPT_RX_MAC_ERROR1           0x00000020
+#define TOMAL_INTERRUPT_RX_MAC_ERROR0           0x00000010
+#define TOMAL_INTERRUPT_PLB_PARITY_ERROR        0x00000008
+#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1   0x00000004
+#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0   0x00000002
+#define TOMAL_INTERRUPT_CRITICAL_ERROR          0x00000001
+#define TOMAL_IRQ0_MASK (TOMAL_INTERRUPT_TX0 | TOMAL_INTERRUPT_RX0 | \
+	TOMAL_INTERRUPT_TX_MAC_ERROR0 | TOMAL_INTERRUPT_RX_MAC_ERROR0 | \
+	TOMAL_INTERRUPT_PLB_PARITY_ERROR | TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 | \
+	TOMAL_INTERRUPT_CRITICAL_ERROR)
+#define TOMAL_IRQ1_MASK (TOMAL_INTERRUPT_TX1 | TOMAL_INTERRUPT_RX1 | \
+	TOMAL_INTERRUPT_TX_MAC_ERROR1 |  TOMAL_INTERRUPT_RX_MAC_ERROR1 | \
+	TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1)
+        U32 reserved51[3];			 /*  1F84 */
+        U32 interruptRoute;                      /*  1F90 interrupt line routing */
+        U32 reserved52[51];                      /*  1F94 */
+        U32 rxMACBadStatusCounter;               /*  2060 num frames with errors in MAC */
+        U32 reserved53[999];                     /*  2064 */
+        U32 debugVectorsCtrl;                    /*  3000 */
+        U32 reserved54[3];                       /*  3004 */
+        U32 debugVectorsReadData;                /*  3010 */
+} TOMALRegs;
+
+typedef volatile struct _RxDesc {
+        U16 code;
+#define TOMAL_RX_DESC_CODE 0x6000
+        U16 postedLength;
+        U16 status;
+#define TOMAL_RX_LAST                                   0x8000
+#define TOMAL_RX_STATUS_ENCODE_MASK                     0x03f0
+#define TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED         0x0008
+#define TOMAL_RX_STATUS_IP_CHECKSUM_PASSED              0x0004
+#define TOMAL_RX_STATUS_CHECKSUM_VALID                  0x0002
+        U16 totalFrameLength;
+        U16 reserved;
+        U16 buffHeadAddrH;       /*  bits 16-31 of data buffer address */
+        U32 buffHeadAddrL;       /*  bits 32-63 of data buffer address */
+} RxDesc;
+
+
+typedef volatile struct _TxDesc {
+        U8  code;
+#define TOMAL_TX_DESC_CODE              0x60
+#define TOMAL_TX_SIGNAL                 0x04
+#define TOMAL_TX_NOTIFY_REQ             0x02
+#define TOMAL_TX_LAST                   0x01
+        U8  command;
+#define TOMAL_TX_ENABLE_HW_CHECKSUM     0x40
+#define TOMAL_TX_GENERATE_FCS           0x20
+#define TOMAL_TX_GENERATE_PAD           0x30  /*  GENERATE_FCS must also be set */
+#define TOMAL_TX_INSERT_SOURCE_ADDR     0x08
+#define TOMAL_TX_REPLACE_SOURCE_ADDR    0x04
+#define TOMAL_TX_INSERT_VLAN_TAG        0x02
+#define TOMAL_TX_REPLACE_VLAN_TAG       0x01
+        U16 postedLength;
+        U32  wBStatus;
+#define TOMAL_TX_STATUS_GOOD            0x00010000
+        U16 reserved;
+        U16 buffHeadAddrH;       /*  bits 16-31 of data buffer address */
+        U32 buffHeadAddrL;       /*  bits 32-63 of data buffer address */
+} TxDesc;
+
+
+typedef volatile struct _BranchDesc {
+        U64  code;
+#define TOMAL_BRANCH_CODE       0x2000000000000000ULL
+        U16 reserved;
+        U16 nextDescAddrH;       /*  bits 16-31 of next descriptor address */
+        U32 nextDescAddrL;       /*  bits 32-63 of next descriptor address (16 byte aligned) */
+} BranchDesc;
+
+
+
+typedef struct _RxDescSegment {
+        RxDesc* desc;
+	RxDesc* currDesc;
+	struct sk_buff** skb;
+	struct sk_buff** currSkb;
+        dma_addr_t dmaHandle;
+        size_t size;
+        BranchDesc* branchDesc;
+        struct _RxDescSegment* next;
+} RxDescSegment;
+
+
+typedef struct _TxDescSegment {
+        TxDesc* desc;
+	U32 oldIndex;
+	U32 freeIndex;
+	struct sk_buff** skb;
+        dma_addr_t dmaHandle;
+        size_t size;
+        BranchDesc* branchDesc;
+        struct _TxDescSegment* next;
+} TxDescSegment;
+
+
+typedef struct _TOMAL {
+	 /*  Mapping of TOMAL's HW registers. */
+        TOMALRegs* regs[TOMAL_MAX_CHANNELS];
+
+	 /*  RX buffers, descriptors, and other data. */
+        U32 maxRxBuffers[TOMAL_MAX_CHANNELS];
+        U16 rxBufferSize[TOMAL_MAX_CHANNELS];
+        RxDescSegment* rxDescSegment[TOMAL_MAX_CHANNELS];
+        RxDescSegment* oldRxSegment[TOMAL_MAX_CHANNELS];  /*  oldest non-served RX desc segment */
+
+	 /*  TX descriptors and other data. */
+        U32 maxTxBuffers[TOMAL_MAX_CHANNELS];
+	U32 pendingTxBuffers[TOMAL_MAX_CHANNELS];
+	U32 numberOfTransmittedFrames[TOMAL_MAX_CHANNELS];
+	U32 numberOfReceivedFrames[TOMAL_MAX_CHANNELS];
+        TxDescSegment* txDescSegment[TOMAL_MAX_CHANNELS];
+        TxDescSegment* oldTxSegment[TOMAL_MAX_CHANNELS]; /*  oldest non-served TX desc segment */
+        TxDescSegment* freeTxSegment[TOMAL_MAX_CHANNELS];  /*  next free TX descriptor segment */
+
+	struct net_device* netDev[TOMAL_MAX_CHANNELS];
+	spinlock_t rxLock[TOMAL_MAX_CHANNELS];
+	spinlock_t txLock[TOMAL_MAX_CHANNELS];
+	struct napi_struct napi[TOMAL_MAX_CHANNELS] ; /* 2.6.27-ism for NAPI poll */
+	int irq0;
+	int irq1;
+	int count_tx_checksum_type[4] ;
+	struct proc_dir_entry* parentDir;
+	struct proc_dir_entry* tomalDir;
+	struct proc_dir_entry* hwDir;
+	struct proc_dir_entry* swDir;
+	U32 numberOfNetrxDrops ;
+	U32 numberOfHwDrops0 ;
+	U32 numberOfHwDrops1 ;
+	U32 numberOfNotLast ;
+
+} TOMAL;
+
+
+
+typedef enum {
+	tomal_ras_none 			= 0x00,
+	tomal_ras_timeout		= 0x01,
+	tomal_ras_alloc_error		= 0x02,
+	tomal_ras_spurious_irq		= 0x03,
+	tomal_ras_unknown_critical_int	= 0x04,
+	tomal_ras_unknown_noncrit_int	= 0x05,
+	tomal_ras_ioremap_error		= 0x06,
+	tomal_ras_irq_unavailable	= 0x07,
+
+	tomal_ras_max			= 0xff
+} tomal_ras_id;
+
+
+TOMAL* __init tomal_init(void* devMapAddr,
+			struct net_device* netDev0,
+			U32 rxTotalBufferSize0,
+			U32 numTxBuffers0,
+			struct net_device* netDev1,
+			U32 rxTotalBufferSize1,
+			U32 numTxBuffers1,
+			int irq0,
+			int irq1,
+			struct proc_dir_entry* procDir);
+
+int tomal_xmit_tx_buffer(TOMAL* tomal, U8 channel, struct sk_buff* skb);
+int tomal_alloc_rx_buffers(TOMAL* tomal, U8 channel);
+int tomal_free_rx_buffers(TOMAL* tomal, U8 channel);
+#if defined(CONFIG_BGP_E10000_NAPI)
+int tomal_poll_napi(struct napi_struct * napi, int budget);
+#else
+int tomal_poll(struct net_device *netDev, int budget);
+#endif
+int tomal_process_tx_buffers(TOMAL* tomal, U8 channel, U32 txNumTransmitDesc);
+void tomal_free_rx_segments(TOMAL* tomal, U8 channel);
+void tomal_free_tx_segments(TOMAL* tomal, U8 channel);
+void tomal_free_tx_buffers(TOMAL* tomal, U8 channel);
+int tomal_alloc_rx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors);
+int tomal_alloc_tx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors);
+
+int tomal_soft_reset(TOMAL* tomal);
+int tomal_configure(TOMAL* tomal);
+
+
+/*  Turns all RX & TX channels off. */
+static inline void tomal_rx_tx_disable(TOMAL* tomal)
+{
+	U32 ccr = in_be32(&tomal->regs[0]->configurationCtrl);
+
+	ccr &= ~(TOMAL_CFG_CTRL_RX_MAC0 | TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 |
+                  TOMAL_CFG_CTRL_TX_MAC1);
+        out_be32(&tomal->regs[0]->configurationCtrl, ccr);
+
+	return;
+}
+
+
+/*  Turns all RX & TX channels on. */
+static inline void tomal_rx_tx_enable(TOMAL* tomal)
+{
+	out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 |
+		 TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 | TOMAL_CFG_CTRL_TX_MAC1);
+
+	return;
+}
+
+void tomal_irq_enable(TOMAL* tomal, U8 channel);
+
+
+void tomal_irq_disable(TOMAL* tomal, U8 channel);
+
+
+int tomal_pending_rx_buffers(TOMAL* tomal, U8 channel);
+int tomal_pending_tx_buffers(TOMAL* tomal, U8 channel);
+
+void tomal_exit(TOMAL* tomal);
+
+
+#endif
diff --git a/drivers/net/bgp_memcpy/Makefile b/drivers/net/bgp_memcpy/Makefile
new file mode 100644
index 0000000..89f1451
--- /dev/null
+++ b/drivers/net/bgp_memcpy/Makefile
@@ -0,0 +1,7 @@
+# Makefile for BlueGene collective and torus driver
+
+EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__ -save-temps
+
+bgp_memcpy-y := bgp_fpu_memcpy.o
+
+obj-$(CONFIG_WRAP_COPY_TOFROM_USER) += bgp_memcpy.o
diff --git a/drivers/net/bgp_memcpy/bgp_fpu_memcpy.c b/drivers/net/bgp_memcpy/bgp_fpu_memcpy.c
new file mode 100644
index 0000000..70c0844
--- /dev/null
+++ b/drivers/net/bgp_memcpy/bgp_fpu_memcpy.c
@@ -0,0 +1,936 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ *
+ * Description: Blue Gene/P low-level driver for copy_tofrom_user thorough the
+ * parallel floating point unit
+ *
+ *
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <asm/time.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+#include <common/bgp_bitnumbers.h>
+//#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+#include "../bgp_network/450_tlb.h"
+/*  Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+#define COMPILED_TRACEMASK (0xffffffff)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+#include <linux/KernelFxLog.h>
+
+MODULE_DESCRIPTION("BG/P memory copy through parallel floating point registers");
+MODULE_LICENSE("GPL");
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+int bgp_fpu_memcpy_tracemask = k_t_error ;
+#define TRACEN(i,x...) KernelFxLog(bgp_fpu_memcpy_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#else
+#define TRACEN(i,x...)
+#endif
+
+#include "bgp_memcpy.h"
+
+#if defined(ADVENTUROUS_COPY_OPTIONS)
+enum {
+  k_force_mask = 0 ,
+  k_enable_mask = 0 ,
+  k_inhibit_fpu_in_slih = 0
+};
+#else
+enum {
+  k_force_mask = 0 ,
+  k_enable_mask = 1 ,
+  k_inhibit_fpu_in_slih = 0
+};
+#endif
+
+enum {
+  k_page_shift = PAGE_SHIFT ,
+  k_page_size = 1 << k_page_shift ,
+  k_page_offset_mask = k_page_size-1 ,
+  k_fpu_alignment  = 16 ,
+  k_fpu_align_mask = k_fpu_alignment - 1
+} ;
+
+
+static int source_alignment_statistics[k_fpu_alignment] ;
+static int dest_alignment_statistics[k_fpu_alignment] ;
+static int mutual_alignment_statistics[k_fpu_alignment] ;
+struct ctl_table bgp_memcpy_table[] = {
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "use_dma",
+	                .data           = &bgp_memcpy_control.use_dma,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "verify_fpu",
+	                .data           = &bgp_memcpy_control.verify_fpu,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "verify_dma",
+	                .data           = &bgp_memcpy_control.verify_dma,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "use_fpu",
+	                .data           = &bgp_memcpy_control.use_fpu,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "dma_threshold",
+	                .data           = &bgp_memcpy_control.dma_threshold,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "fpu_threshold",
+	                .data           = &bgp_memcpy_control.fpu_threshold,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "faults_until_disable",
+	                .data           = &bgp_memcpy_control.faults_until_disable,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "cycles_per_packet",
+	                .data           = &bgp_memcpy_control.cycles_per_packet,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "rate_observe_report_count",
+	                .data           = &bgp_memcpy_control.rate_observe_report_count,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "handle_pagecrossing",
+	                .data           = &bgp_memcpy_control.handle_pagecrossing,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "fpu_handle_pagecrossing_read",
+	                .data           = &bgp_memcpy_control.fpu_handle_pagecrossing_read,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "fpu_handle_pagecrossing_write",
+	                .data           = &bgp_memcpy_control.fpu_handle_pagecrossing_write,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "mask",
+	                .data           = &bgp_memcpy_control.mask,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "assist_active",
+	                .data           = &bgp_memcpy_control.assist_active,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "statistics",
+	                .data           = &bgp_dma_memcpy_statistics,
+	                .maxlen         = k_copy_statistics*sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        } ,
+          {
+                  .ctl_name       = CTL_UNNUMBERED,
+                  .procname       = "source_alignment_statistics",
+                  .data           = &source_alignment_statistics,
+                  .maxlen         = k_fpu_alignment*sizeof(int),
+                  .mode           = 0644,
+                  .proc_handler   = &proc_dointvec
+          } ,
+          {
+                  .ctl_name       = CTL_UNNUMBERED,
+                  .procname       = "dest_alignment_statistics",
+                  .data           = &dest_alignment_statistics,
+                  .maxlen         = k_fpu_alignment*sizeof(int),
+                  .mode           = 0644,
+                  .proc_handler   = &proc_dointvec
+          } ,
+          {
+                  .ctl_name       = CTL_UNNUMBERED,
+                  .procname       = "mutual_alignment_statistics",
+                  .data           = &mutual_alignment_statistics,
+                  .maxlen         = k_fpu_alignment*sizeof(int),
+                  .mode           = 0644,
+                  .proc_handler   = &proc_dointvec
+          } ,
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+          {
+                  .ctl_name       = CTL_UNNUMBERED,
+                  .procname       = "tracemask",
+                  .data           = &bgp_fpu_memcpy_tracemask,
+                  .maxlen         = sizeof(int),
+                  .mode           = 0644,
+                  .proc_handler   = &proc_dointvec
+          } ,
+#endif
+	        { 0 },
+} ;
+
+static struct ctl_path memcpy_ctl_path[] = {
+	{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "copy", .ctl_name = 0, },
+	{ },
+};
+bgp_memcpy_control_t bgp_memcpy_control =
+	{
+		.use_dma = 0 ,
+		.use_fpu = 0 , // We suspect some kind of interaction with interrupts which makes it occasionally not work ...
+		.dma_threshold = 10000 ,
+		.fpu_threshold = 512 ,
+		.verify_dma = 0 ,
+		.verify_fpu = 0 ,
+		.cycles_per_packet = 20 ,
+		.rate_observe_report_count = 0xffffffff ,
+		.faults_until_disable = 1 ,
+		.handle_pagecrossing = 1 ,
+		.fpu_handle_pagecrossing_read = 0 ,
+		.fpu_handle_pagecrossing_write = 0 ,
+		.mask = 1 ,
+		.assist_active = 0
+	};
+
+unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ;
+
+
+static void cause_fallback(void)
+{
+	TRACEN(k_t_request,"Turning off DH memcpy") ;
+	bgp_memcpy_control.use_fpu = 0 ;
+	dma_memcpy_statistic(k_copy_cause_fallback) ;
+}
+enum {
+	k_diag_not_mapped=0
+/* 	k_diagnose=1 */
+};
+
+enum {
+	k_exploit_doublehummer = 1,
+	k_verify_doublehummer = 1,
+	k_fixup_faulty_memcpy=1,
+	k_premark=0 ,
+	k_map_write_check=0 ,
+	k_map_read_check=0 ,
+	k_disable_after_too_many_faults=1 ,
+	k_inhibit_crosspage_write = 1 , // Set this if you want to not handle writes which cross a user-space page boundary
+	k_inhibit_crosspage_read = 1 // Set this if you want to not handle reads which cross a user-space page boundary
+};
+static void report_faulty_memcpy(void * dest, const void * src, unsigned long size)
+{
+	unsigned int * di = (unsigned int *) dest ;
+	const unsigned int * si = (const unsigned int *) src ;
+	unsigned char * dc = (unsigned char *) (dest) ;
+	const unsigned char * sc = (const unsigned char *) (src) ;
+	unsigned int x ;
+	unsigned int faultwordcount = 0 ;
+	if( k_disable_after_too_many_faults)
+		{
+			int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ;
+			if( faults_to_go <= 0 )
+				{
+					cause_fallback() ;
+				}
+			else
+				{
+					bgp_memcpy_control.faults_until_disable=faults_to_go ;
+				}
+		}
+	dma_memcpy_statistic(k_copy_verify_miscompares) ;
+	TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ;
+	for(x=0;x<size/sizeof(unsigned int);x+=1)
+		{
+			if( di[x] != si[x] )
+				{
+					TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x",
+							x,di+x,si+x,di[x],si[x]) ;
+					if( k_fixup_faulty_memcpy) di[x]=si[x] ;
+					faultwordcount += 1 ;
+				}
+		}
+	if( dc[size-3] != sc[size-3])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ;
+			if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ;
+		}
+	if( dc[size-2] != sc[size-2])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ;
+			if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ;
+		}
+	if( dc[size-1] != sc[size-1])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ;
+			if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ;
+		}
+	TRACEN(k_t_error,"%d/%ld words incorrectly copied",faultwordcount,size/sizeof(unsigned int)) ;
+
+}
+/*  Check that a 'memcpy' was accurately done ... */
+static void verify_memcpy(void * dest, const void * src, unsigned long size)
+{
+	unsigned int * di = (unsigned int *) dest ;
+	const unsigned int * si = (const unsigned int *) src ;
+	unsigned char * dc = (unsigned char *) (dest) ;
+	const unsigned char * sc = (const unsigned char *) (src) ;
+	unsigned int q = di[0] ^ si[0] ;
+	unsigned int x ;
+	dma_memcpy_statistic(k_copy_verify_attempts) ;
+	TRACEN(k_t_fpucopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ;
+	for(x=1;x<size/sizeof(unsigned int);x+=1)
+		{
+			q |= di[x] ^ si[x] ;
+		}
+	q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ;
+	if(q) report_faulty_memcpy(dest,src,size) ;
+}
+
+typedef struct { unsigned char c[128] ; } miniblock ;
+
+#define nl "\n"
+/* Returns 0 for a good copy, 1 if an exception (unmapped storage) occurred */
+static int doublehummer_copy_unroll(void  *to, const void *from, int count)
+{
+	int x1=0x10 ;
+	int x2=0x20 ;
+	int x3=0x30 ;
+	int x4=0x40 ;
+	int x5=0x50 ;
+	int x6=0x60 ;
+	int x7=0x70 ;
+	int x8=0x80 ;
+	int xa=0xa0 ;
+	int xc=0xc0 ;
+	int xe=0xe0 ;
+	int rc ;
+        asm  volatile (
+        		"mtctr %[count]" nl
+        		"100: lfpdx  0,0,%[src]" nl
+        		"101: lfpdx  2,%[index2],%[src]" nl
+        		"102: lfpdx  4,%[index4],%[src]" nl
+        		"103: lfpdx  6,%[index6],%[src]" nl
+        		"104: lfpdx  1,%[index1],%[src]" nl
+        		"105: lfpdx  3,%[index3],%[src]" nl
+        		"106: lfpdx  5,%[index5],%[src]" nl
+        		"107: lfpdx  7,%[index7],%[src]" nl
+        		"108: stfpdx 0,0        ,%[dst]" nl
+        		"109: lfpdx  0,%[index8],%[src]" nl
+        		"110: stfpdx 2,%[index2],%[dst]" nl
+        		"111: lfpdx  2,%[indexa],%[src]" nl
+        		"112: stfpdx 4,%[index4],%[dst]" nl
+        		"113: lfpdx  4,%[indexc],%[src]" nl
+        		"114: stfpdx 6,%[index6],%[dst]" nl
+        		"115: lfpdx  6,%[indexe],%[src]" nl
+        		"bdz 1f" nl
+
+        		"0:" nl
+        		"addi %[src],%[src],128" nl
+
+        		"116: stfpdx 1,%[index1],%[dst]" nl
+        		"117: lfpdx  1,%[index1],%[src]" nl
+        		"118: stfpdx 0,%[index8],%[dst]" nl
+        		"119: lfpdx  0,%[index8],%[src]" nl
+
+        		"120: stfpdx 3,%[index3],%[dst]" nl
+        		"121: lfpdx  3,%[index3],%[src]" nl
+        		"122: stfpdx 2,%[indexa],%[dst]" nl
+        		"123: lfpdx  2,%[indexa],%[src]" nl
+
+        		"124: stfpdx 5,%[index5],%[dst]" nl
+        		"125: lfpdx  5,%[index5],%[src]" nl
+        		"126: stfpdx 4,%[indexc],%[dst]" nl
+        		"127: lfpdx  4,%[indexc],%[src]" nl
+
+        		"128: stfpdx 7,%[index7],%[dst]" nl
+        		"129: lfpdx  7,%[index7],%[src]" nl
+        		"130: stfpdx 6,%[indexe],%[dst]" nl
+        		"addi %[dst],%[dst],128" nl
+        		"131: lfpdx  6,%[indexe],%[src]" nl
+
+        		"bdnz 0b" nl
+
+
+        		"1:" nl
+        		"addi %[src],%[src],128" nl
+
+        		"132: stfpdx 1,%[index1],%[dst]" nl
+        		"133: lfpdx  1,%[index1],%[src]" nl
+        		"134: stfpdx 0,%[index8],%[dst]" nl
+
+        		"135: stfpdx 3,%[index3],%[dst]" nl
+        		"136: lfpdx  3,%[index3],%[src]" nl
+        		"137: stfpdx 2,%[indexa],%[dst]" nl
+
+        		"138: stfpdx 5,%[index5],%[dst]" nl
+        		"139: lfpdx  5,%[index5],%[src]" nl
+        		"140: stfpdx 4,%[indexc],%[dst]" nl
+
+        		"141: stfpdx 7,%[index7],%[dst]" nl
+        		"142: lfpdx  7,%[index7],%[src]" nl
+        		"143: stfpdx 6,%[indexe],%[dst]" nl
+
+        		"addi %[dst],%[dst],128" nl
+        		"144: stfpdx 1,%[index1],%[dst]" nl
+        		"145: stfpdx 3,%[index3],%[dst]" nl
+        		"146: stfpdx 5,%[index5],%[dst]" nl
+        		"147: stfpdx 7,%[index7],%[dst]" nl
+/* Following section needed to handle exceptions (user code passing addresses which SEGV) */
+        		"li %[rc],0" nl
+        		"b 3f" nl
+
+        		"2:" nl
+        		"li %[rc],1" nl
+        		"3:" nl
+        		".section __ex_table,\"a\"" nl
+
+        		".align	2" nl
+        		".long 100b,2b" nl
+         		".long 101b,2b" nl
+        		".long 102b,2b" nl
+        		".long 103b,2b" nl
+         		".long 104b,2b" nl
+        		".long 105b,2b" nl
+        		".long 106b,2b" nl
+         		".long 107b,2b" nl
+        		".long 108b,2b" nl
+        		".long 109b,2b" nl
+        		".long 110b,2b" nl
+         		".long 111b,2b" nl
+        		".long 112b,2b" nl
+        		".long 113b,2b" nl
+         		".long 114b,2b" nl
+        		".long 115b,2b" nl
+        		".long 116b,2b" nl
+         		".long 117b,2b" nl
+        		".long 118b,2b" nl
+        		".long 119b,2b" nl
+        		".long 120b,2b" nl
+         		".long 121b,2b" nl
+        		".long 122b,2b" nl
+        		".long 123b,2b" nl
+         		".long 124b,2b" nl
+        		".long 125b,2b" nl
+        		".long 126b,2b" nl
+         		".long 127b,2b" nl
+        		".long 128b,2b" nl
+        		".long 129b,2b" nl
+        		".long 130b,2b" nl
+         		".long 131b,2b" nl
+        		".long 132b,2b" nl
+        		".long 133b,2b" nl
+         		".long 134b,2b" nl
+        		".long 135b,2b" nl
+        		".long 136b,2b" nl
+         		".long 137b,2b" nl
+        		".long 138b,2b" nl
+        		".long 139b,2b" nl
+        		".long 140b,2b" nl
+         		".long 141b,2b" nl
+        		".long 142b,2b" nl
+        		".long 143b,2b" nl
+         		".long 144b,2b" nl
+        		".long 145b,2b" nl
+        		".long 146b,2b" nl
+         		".long 147b,2b" nl
+        		".text" nl
+
+        		: /* Outputs */
+        		  [rc] "=b" (rc)
+        		: /* Inputs */
+        		  [dst] "b" (to),
+        		  [src] "b" (from),
+        		  [count] "r" (count),
+        		  [index1] "b" (x1),
+        		  [index2] "b" (x2),
+        		  [index3] "b" (x3),
+        		  [index4] "b" (x4),
+        		  [index5] "b" (x5),
+        		  [index6] "b" (x6),
+        		  [index7] "b" (x7),
+        		  [index8] "b" (x8),
+        		  [indexa] "b" (xa),
+        		  [indexc] "b" (xc),
+        		  [indexe] "b" (xe)
+        		: /* Clobbers */
+        		  "memory", "ctr",
+        		  "fr0","fr1","fr2","fr3",
+        		  "fr4","fr5","fr6","fr7"
+        		) ;
+
+  return rc ;
+}
+/* Block store, using t0 and t1 as temporaries because we need to preserve the complete FPU context */
+static void doublehummer_store_quads(void *dest, int count, const double *v0, const double *v1, double *t0, double *t1)
+{
+        asm  volatile (
+            "stfdx 0,0,%[t0]" nl
+        		"lfdx  0,0,%[v0]" nl
+        		"stfsdx  0,0,%[t1]" nl
+            "lfsdx  0,0,%[v1]" nl
+            "mtctr %[count]" nl
+        		"0: stfpdx 0,0,%[dest]" nl
+        		"addi %[dest],%[dest],16" nl
+        		"bdnz 0b" nl
+            "lfdx 0,0,%[t0]" nl
+            "lfsdx  0,0,%[t1]" nl
+        		: /* Outputs */
+              "=m" (*t0),
+              "=m" (*t1)
+        		: /* Inputs */
+        		  [dest] "b" (dest),
+        		  [v0] "b" (v0),
+        		  [v1] "b" (v1),
+              [t0] "b" (t0),
+              [t1] "b" (t1),
+        		  [count] "r" (count)
+        		: /* Clobbers */
+        		  "memory", "ctr"
+        		) ;
+
+}
+
+/*  Try a 'doublehummer' memcpy, return 0 if we could and 1 if we couldn't */
+static int doublehummer_memcpy(void * dest, const void * src, unsigned long size)
+{
+	if( k_exploit_doublehummer)
+		{
+			unsigned int di = (unsigned int) dest ;
+			unsigned int si = (unsigned int) src ;
+			unsigned int mutual_alignment = (di - si) & k_fpu_align_mask ;
+			unsigned int source_alignment = si & k_fpu_align_mask ;
+			unsigned int precopy_size = source_alignment ? (k_fpu_alignment - source_alignment) : 0 ;
+			unsigned int miniblock_di = di + precopy_size ;
+			unsigned int miniblock_si  =si + precopy_size ;
+			unsigned int miniblock_size = size - precopy_size ;
+			unsigned int miniblock_count=miniblock_size/sizeof(miniblock) ;
+			unsigned int size_floor=miniblock_count*sizeof(miniblock) ;
+			unsigned int size_tail = size - size_floor - precopy_size ;
+			int rc ;
+			if( mutual_alignment )
+				{
+					dma_memcpy_statistic(k_copy_unaligned_rejects) ;
+					source_alignment_statistics[source_alignment] += 1 ;
+					dest_alignment_statistics[di & k_fpu_align_mask] += 1 ;
+					mutual_alignment_statistics[mutual_alignment] += 1 ;
+					return 1 ; // Alignment between source and destination not good enough
+				}
+      /* Using FPU in a FLIH is 'too hard' */
+      if(in_irq())
+        {
+          dma_memcpy_statistic(k_in_irq) ;
+          return 1 ;
+        }
+      /* Using FPU in a SLIH should be OK now we have an atomicity fix to  problem in giveup_fpu */
+      if(in_softirq())
+        {
+          dma_memcpy_statistic(k_in_softirq) ;
+          if(k_inhibit_fpu_in_slih ) return 1 ;
+        }
+			/* The source and dest are mutually aligned. Do we need a 1-15 byte pre-copy to get to quad alignment ? */
+			if( precopy_size )
+				{
+					rc = __real__copy_tofrom_user(dest, src, precopy_size) ;
+					if(rc)
+						{
+							dma_memcpy_statistic(k_precopy_segv_trap) ;
+							return 1 ;
+						}
+/* 					memcpy(dest,src,precopy_size) ; */
+				}
+
+			enable_kernel_fp() ;
+
+/*  The copy should work with interrupts enabled, but whenever I tried it there were occasional errors in copying. */
+/*  TODO: Diagnose why, fix, and run the copy without disabling. Same for the 'page copy' and 'page clear later */
+      if(k_force_mask || ( k_enable_mask && bgp_memcpy_control.mask))
+        {
+          unsigned long flags ;
+          local_irq_save(flags) ;
+          rc = doublehummer_copy_unroll((void *)miniblock_di,(void *)miniblock_si,miniblock_count-1) ;
+          local_irq_restore(flags) ;
+        }
+      else
+        {
+          rc = doublehummer_copy_unroll((void *)miniblock_di,(void *)miniblock_si,miniblock_count-1) ;
+        }
+			if( rc )
+				{
+					dma_memcpy_statistic(k_copy_segv_trap) ;
+					return 1 ;
+				}
+
+			if( size_tail )
+				{
+					 /*  TODO: Fix up what happens if this causes a 'segv' */
+					rc = __real__copy_tofrom_user((void *)(miniblock_di+size_floor), (void *)(miniblock_si+size_floor), size_tail) ;
+					if(rc)
+						{
+							dma_memcpy_statistic(k_postcopy_segv_trap) ;
+							return 1 ;
+						}
+/* 					memcpy((void *)(miniblock_di+size_floor),(void *)(miniblock_si+size_floor),size_tail) ; */
+				}
+			if( k_verify_doublehummer && bgp_memcpy_control.verify_fpu)
+				{
+					verify_memcpy(dest,src,size) ;
+				}
+			return 0 ;
+		}
+	else
+		{
+			return 1 ;
+		}
+}
+
+static unsigned int operate_vcopy(unsigned long address, void * partner_vaddr, unsigned long size)
+{
+	TRACEN(k_t_detail,"address=0x%08lx partner_vaddr=%p size=0x%08lx",address,partner_vaddr,size) ;
+	return doublehummer_memcpy(partner_vaddr,(const void *)address,size) ;
+}
+
+
+static int all_pages_mapped_read(unsigned long address, unsigned long size)
+{
+	unsigned int start_page=(address >> k_page_shift) ;
+	unsigned int end_page=((address+size) >> k_page_shift) ;
+	unsigned int page_count = end_page-start_page+1 ;
+	unsigned int x ;
+	if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+	if( k_inhibit_crosspage_read && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_read)
+		{
+			 /*  TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */
+			dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+			return 1 ;
+		}
+	 /*  Defend against the possibility that the user application has posted an unmapped address */
+	for(x=0;x<page_count;x+=1)
+		{
+			int pageInt ;
+			int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift)  ;
+			if( get_user(pageInt,pageIntP) )
+				{
+					TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+					if( k_diag_not_mapped)
+					{
+						tlb_t t ;
+						unsigned int r=v_to_r_maybe((void *)address, &t) ;
+						TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+						TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+						diagnose_tlb(&t) ;
+					}
+
+					return 1;
+				}
+
+		}
+	return 0 ;
+}
+static int all_pages_mapped_write(unsigned long address, unsigned long size)
+{
+	unsigned int start_page=(address >> k_page_shift) ;
+	unsigned int end_page=((address+size) >> k_page_shift) ;
+	unsigned int page_count = end_page-start_page+1 ;
+	unsigned int x ;
+/* 	int pageInt ; */
+	char __user * pageCharP = (char __user *) address ;
+	if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+	if( k_inhibit_crosspage_write && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_write )
+		{
+			 /*  TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */
+			dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+			return 1 ;
+		}
+	if(put_user(0,pageCharP))
+		{
+			TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+			if( k_diag_not_mapped)
+			{
+				tlb_t t ;
+				unsigned int r=v_to_r_maybe((void *)address, &t) ;
+				TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+				TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+				diagnose_tlb(&t) ;
+			}
+
+			return 1;
+		}
+	 /*  Defend against the possibility that the user application has posted an unmapped address */
+	for(x=1;x<page_count;x+=1)
+		{
+/* 			int pageInt ; */
+			char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift)  ;
+/* 			put_user(current_injection_used, report) ; */
+			if( put_user(0,pageCharP) )
+				{
+					TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+					if( k_diag_not_mapped)
+					{
+						tlb_t t ;
+						unsigned int r=v_to_r_maybe((void *)address, &t) ;
+						TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+						TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+						diagnose_tlb(&t) ;
+					}
+
+					return 1;
+				}
+
+		}
+	return 0 ;
+}
+
+static int instrument_copy_user_address_by_touch(unsigned long address, unsigned long size,void * partner_vaddr)
+{
+
+	if( k_map_read_check && all_pages_mapped_read(address,size))
+		{
+			dma_memcpy_statistic(k_copy_source_rejects) ;
+			return 1 ;
+		}
+	if( k_map_write_check && all_pages_mapped_write((unsigned int) partner_vaddr,size))
+		{
+			dma_memcpy_statistic(k_copy_target_rejects) ;
+			return 1 ;
+		}
+
+	 /*  Looks like we can run the transfer with the FPU */
+	return operate_vcopy(address,partner_vaddr,size) ;
+
+}
+
+static int instrument_copy_tofrom_user(unsigned long to, unsigned long from, unsigned long size)
+{
+
+	int rc=1 ;
+	TRACEN(k_t_fpucopy,"(>)") ;
+	 /*  TODO: Check by touching and poking that all pages in 'to' and 'from' are appropriately mapped, before going into the hummer loop */
+	rc= instrument_copy_user_address_by_touch(from,size,(void *)to) ;
+	TRACEN(k_t_fpucopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+
+enum {
+	k_enable_dma_memcpy = 1 // TODO: Get DMA memcopy working, and enable it here
+};
+/* Returns 1 if we could DMA-copy things, 0 if we couldn't */
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void  *to,
+                const void __user *from, unsigned long size)
+{
+	if( k_premark && bgp_memcpy_control.verify_dma) memset(to,0x11,size) ; // Mark the memory so we know if we write it
+// No advantage yet seen by using the DMA unit to do 'memcpy'
+//#if defined(CONFIG_BLUEGENE_DMA_MEMCPY)
+//	if( k_enable_dma_memcpy && bgp_memcpy_control.use_dma)
+//		{
+//			if( bgp_memcpy_control.mask)
+//				{
+//					unsigned long flags ;
+//					unsigned long rc ;
+//					local_irq_save(flags) ;
+//					rc = bgp_dma_instrument_copy_tofrom_user(to, from, size) ;
+//					local_irq_restore(flags) ;
+//					return rc ;
+//				}
+//			else
+//				{
+//					return bgp_dma_instrument_copy_tofrom_user(to, from, size) ;
+//				}
+//		}
+//	else
+//#endif
+		{
+			dma_memcpy_statistic(k_copy_tofrom_user_calls) ;
+			if( size > 0 && bgp_memcpy_control.use_fpu && size >= bgp_memcpy_control.fpu_threshold )
+				{
+						{
+							TRACEN(k_t_fpucopy,"to=%p from=%p size=0x%08lx",to,from,size) ;
+							{
+								unsigned long rc= instrument_copy_tofrom_user((unsigned long)to,(unsigned long)from,size) ;
+								dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+
+								return rc ;
+							}
+
+						}
+				}
+			dma_memcpy_statistic(k_copy_size_rejects) ;
+			return 1 ; // Not copied, size under threshold
+		}
+}
+
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER)
+void copy_page(void  *to, void *from)
+{
+	TRACEN(k_t_fpucopy,"to=%p from=%p",to,from) ;
+	if(bgp_memcpy_control.assist_active )
+		{
+			unsigned int miniblock_count = k_page_size / sizeof(miniblock) ;
+			enable_kernel_fp() ;
+
+			if(k_force_mask || ( k_enable_mask && bgp_memcpy_control.mask))
+			  {
+		      unsigned long flags ;
+          local_irq_save(flags) ;
+          doublehummer_copy_unroll((void *)to,(void *)from,miniblock_count-1) ;
+          local_irq_restore(flags) ;
+			  }
+			else
+			  {
+			    doublehummer_copy_unroll((void *)to,(void *)from,miniblock_count-1) ;
+			  }
+		}
+	else
+		{
+			memcpy(to,from,k_page_size) ;
+		}
+
+}
+
+static const double v=0.0 ;
+void clear_pages(void *p, int order)
+{
+	TRACEN(k_t_fpucopy,"p=%p order=%d",p,order) ;
+	if(bgp_memcpy_control.assist_active )
+		{
+			unsigned int quadcount=(k_page_size/16) << order ;
+			double t0, t1 ;
+			enable_kernel_fp() ;
+/* 			double v=0.0 ; */
+      if(k_force_mask || ( k_enable_mask && bgp_memcpy_control.mask))
+        {
+          unsigned long flags ;
+          local_irq_save(flags) ;
+          doublehummer_store_quads(p,quadcount,&v,&v, &t0, &t1) ;
+          local_irq_restore(flags) ;
+        }
+      else
+        {
+          doublehummer_store_quads(p,quadcount,&v,&v, &t0, &t1) ;
+        }
+
+
+		}
+	else
+		{
+			memset(p,0,k_page_size << order)  ;
+		}
+
+
+}
+#endif
+
+static void __init
+bgp_fpu_register_memcpy_sysctl(void)
+{
+	register_sysctl_paths(memcpy_ctl_path,bgp_memcpy_table) ;
+	TRACEN(k_t_init, "memcpy sysctl registered") ;
+
+}
+
+void __init
+bgp_fpu_memcpy_init(void)
+  {
+    bgp_fpu_register_memcpy_sysctl() ;
+  }
+
+module_init(bgp_fpu_memcpy_init);
+
diff --git a/drivers/net/bgp_memcpy/bgp_memcpy.h b/drivers/net/bgp_memcpy/bgp_memcpy.h
new file mode 100644
index 0000000..9421e93
--- /dev/null
+++ b/drivers/net/bgp_memcpy/bgp_memcpy.h
@@ -0,0 +1,206 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver copy_tofrom_user using
+ * BlueGene-specific hardware
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_MEMCPY_H__
+#define __BGP_MEMCPY_H__
+
+
+typedef struct
+{
+  int use_dma ;
+  int use_fpu ;
+  int dma_threshold ; /* Use the BGP DMA unit for copy_tofrom_user this size or larger */
+  int fpu_threshold ; /* Use the BGP FPU for copy_tofrom_user this size or larger */
+  int verify_dma ; /* Whether to verify the copy (for diagnostics) */
+  int verify_fpu ; /* Whether to verify the copy (for diagnostics) */
+  int cycles_per_packet ; /* Estimate of number of cycles per packet, for local spin before looking at counters */
+  int faults_until_disable ; /* Number of faults until we disable acceleration */
+  int rate_observe_report_count ; /* Number of times out of 256 that the rate gets displayed */
+  int handle_pagecrossing ; /* Whether the DMA version should attempt to handle page-boundary-crossings */
+  int fpu_handle_pagecrossing_read ; /* Whether the FPU version should attempt to handle page-boundary-crossings on reads */
+  int fpu_handle_pagecrossing_write ;  /* Whether the FPU version should attempt to handle page-boundary-crossings on writes */
+  int mask ; /* Whether to mask interrupts */
+  int assist_active ; /* Whether to assist copypage and clearpages */
+  /* int trace_count ; */ /* Number of trace records to cut before stopping */
+} bgp_memcpy_control_t ;
+
+extern bgp_memcpy_control_t bgp_memcpy_control ;
+
+enum {
+	k_copy_cause_fallback ,
+  k_copy_verify_miscompares ,
+  k_in_irq ,
+  k_in_softirq ,
+
+  k_copy_verify_attempts ,
+  k_copy_tofrom_user_calls ,
+	k_copy_accelerate_successes ,
+	k_copy_accelerate_rejects ,
+
+	k_copy_size_rejects ,
+	k_copy_spanpage_rejects ,
+	k_copy_crosspage_limitation_rejects ,
+	k_copy_inconsistent_tlb_1_rejects ,
+
+	k_copy_inconsistent_tlb_2_rejects ,
+	k_copy_no_counter_rejects ,
+	k_copy_source_tlb_rejects ,
+	k_copy_target_tlb_rejects ,
+
+	k_copy_source_rejects ,
+	k_copy_target_rejects ,
+	k_copy_unaligned_rejects ,
+	k_copy_tlb_touches ,
+
+	k_copy_await_idle_zero ,
+	k_copy_await_idle_low ,
+	k_copy_await_idle_high ,
+	k_copy_inconsistent_tlb_1_info ,
+
+	k_copy_inconsistent_tlb_2_info ,
+	k_copy_segv_trap ,
+	k_precopy_segv_trap ,
+	k_postcopy_segv_trap ,
+
+	k_copy_statistics
+};
+
+/* The underlying assembler copy function, returns 0 iff it copies all the data */
+extern unsigned long __real__copy_tofrom_user(void  *to,
+		const void __user *from, unsigned long size) ;
+
+extern unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ;
+static inline void dma_memcpy_statistic(unsigned int X)
+{
+	bgp_dma_memcpy_statistics[X] += 1 ;
+}
+
+extern unsigned long bgp_dma_instrument_copy_tofrom_user(void  *to,
+                const void *from, unsigned long size) ;
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void  *to,
+                const void *from, unsigned long size) ;
+
+enum
+{
+	k_diagnose = 1
+};
+/* Items to record about a copy op, for diagnosing faults */
+typedef struct
+{
+	const void * vaddr ;
+	unsigned int tlb_v ;
+	unsigned int pageid ;
+	unsigned int xlat ;
+	unsigned int attrib ;
+} tlb_t ;
+
+typedef struct
+{
+  void * to_vaddr ;
+  const void * from_vaddr ;
+  unsigned int size ;
+  tlb_t a_tlb ;
+  tlb_t b_tlb ;
+  unsigned int a_raddress ;
+  unsigned int b_raddress ;
+  unsigned int from_check_pre ;
+  unsigned int to_check_pre ;
+  unsigned int from_check_post ;
+  unsigned int to_check_post ;
+  unsigned int frag_index ;
+} copy_op_t ;
+
+static void diagnose_tlb(tlb_t *t)
+{
+	unsigned int t0=t->pageid ;
+	unsigned int t1=t->xlat ;
+	unsigned int t2=t->attrib ;
+	TRACEN(k_t_request,"vaddr=%p tlb_v=0x%08x %08x-%08x-%08x ts=%d tid=0x%02x epn=0x%08x rpn=0x%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d uxwr=%d sxwr=%d",
+			t->vaddr,t->tlb_v,t0,t1,t2,
+			(t0 & TLB0_TS) ? 1 : 0,
+			(t2 >> 22) & 0xff ,
+			TLB0_EPN_1K(t0),
+			TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+			TLB_SIZES[(t0 & 0xF0) >> 4],
+			(t2 & TLB2_W) ? 1 : 0,
+			(t2 & TLB2_I) ? 1 : 0,
+			(t2 & TLB2_M) ? 1 : 0,
+			(t2 & TLB2_G) ? 1 : 0,
+			(t2 & TLB2_U0) ? 1 : 0,
+			(t2 & TLB2_U1) ? 1 : 0,
+			(t2 & TLB2_U2) ? 1 : 0,
+			(t2 & TLB2_U3) ? 1 : 0,
+			(t0 & TLB0_V) ? 1 : 0,
+			(t2 >> 3) & 7,
+			t2 & 7
+			) ;
+}
+static void diagnose_faulty_copy(copy_op_t *c)  __attribute__((unused)) ;
+static void diagnose_faulty_copy(copy_op_t *c)
+{
+	TRACEN(k_t_request,"from_vaddr=%p to_vaddr=%p size=0x%08x a_raddress=0x%08x b_raddress=0x%08x from_check_pre=0x%08x to_check_pre=0x%08x from_check_post=0x%08x to_check_post=0x%08x frag_index=%d",
+			c->from_vaddr,c->to_vaddr,c->size,c->a_raddress,c->b_raddress,c->from_check_pre,c->from_check_post,c->to_check_pre,c->to_check_post,c->frag_index) ;
+	diagnose_tlb(&c->a_tlb) ;
+	diagnose_tlb(&c->b_tlb) ;
+}
+
+/* Find the real store address for a virtual address, by looking at the TLB and causing a TLB miss if needed */
+static unsigned int v_to_r_maybe(const void * vaddr,tlb_t *t)
+{
+     unsigned int vaddr_int=(unsigned int)vaddr ;
+     int tlbx=search_tlb_v(vaddr_int) ;
+     int pageid=get_tlb_pageid(tlbx) ;
+     int xlat=get_tlb_xlat(tlbx) ;
+     int attrib=get_tlb_attrib(tlbx) ;
+     int tlbx1=search_tlb_v((unsigned int)vaddr) ;
+     if( k_diagnose)
+	     {
+		     t->vaddr = vaddr ;
+		     t->tlb_v = tlbx1 ;
+		     t->pageid = pageid ;
+		     t->xlat = xlat ;
+		     t->attrib = attrib ;
+	     }
+     if( (tlbx == tlbx1)    /* Translation didn't change under me due to e.g. interrupt */
+		     && ((pageid & TLB0_V) != 0) /* TLB is valid */
+		     && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */
+		     )
+	     {
+			unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+			unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+			unsigned int result = (vaddr_int-epn) + rpn ;
+			TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+					vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+			return result ;
+
+	     }
+     else
+	     {
+			TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped",
+					vaddr,tlbx,pageid,tlbx1) ;
+		     return (unsigned int) -1 ; // Not mapped
+	     }
+}
+
+#endif
diff --git a/drivers/net/bgp_network/450_tlb.h b/drivers/net/bgp_network/450_tlb.h
new file mode 100644
index 0000000..67f04c9
--- /dev/null
+++ b/drivers/net/bgp_network/450_tlb.h
@@ -0,0 +1,121 @@
+/* Basic access functions for 'software TLBs' in powerpc 440/450 */
+#ifndef __450_tlb_h__
+#define __450_tlb_h__
+#include <asm/bluegene_ras.h>
+
+static inline int get_tlb_pageid(int tlbindex)
+  {
+	  int rc ;
+	   /*  PPC44x_TLB_PAGEID is 0 */
+	  asm volatile( "tlbre  %[rc],%[index],0"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+	  return rc ;
+ }
+
+static inline int get_tlb_xlat(int tlbindex)
+  {
+	  int rc ;
+	   /*  PPC44x_TLB_XLAT is 1 */
+	  asm volatile( "tlbre  %[rc],%[index],1"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+	  return rc ;
+ }
+
+static inline int get_tlb_attrib(int tlbindex)
+  {
+	  int rc ;
+	   /*  PPC44x_TLB_ATTRIB is 2 */
+	  asm volatile( "tlbre  %[rc],%[index],2"
+                    : [rc] "=r" (rc)
+                    : [index] "r" (tlbindex)
+                    ) ;
+	  return rc ;
+ }
+
+static inline int search_tlb(unsigned int vaddr)
+  {
+    int rc ;
+     /*  PPC44x_TLB_ATTRIB is 2 */
+    asm volatile( "tlbsx  %[rc],0,%[vaddr]"
+                    : [rc] "=r" (rc)
+                    : [vaddr] "r" (vaddr)
+                    ) ;
+    return rc ;
+ }
+
+//static inline int search_tlb_validity(unsigned int vaddr)
+//{
+//  int validity ;
+//  asm volatile( "tlbsx.  %[validity],0,%[vaddr]" "\n"
+//		    "mfcr %[validity]"
+//                  :
+//                    [validity] "=r" (validity)
+//                  : [vaddr] "r" (vaddr)
+//                  : "cc"
+//                  ) ;
+//  return validity ;
+//}
+
+
+static inline int search_tlb_v(unsigned int vaddr)
+  {
+    int rc ;
+    int tlbindex ;
+    int validity ;
+     /*  PPC44x_TLB_ATTRIB is 2 */
+    asm volatile( "tlbsx.  %[tlbindex],0,%[vaddr]" "\n"
+		    "mfcr %[validity]"
+                    : [tlbindex] "=r" (tlbindex),
+                      [validity] "=r" (validity)
+                    : [vaddr] "r" (vaddr)
+                    : "cc"
+                    ) ;
+//    tlbindex = search_tlb(vaddr) ;
+//    validity=search_tlb_validity(vaddr) ;
+    rc = (validity & 0x20000000) | (tlbindex & 0xefffffff) ; // Hi bit for 'found', other bits (bottom 6, really) for index
+//    TRACEN(k_t_request,"vaddr=0x%08x tlbindex=0x%08x validity=0x%08x rc=0x%08x",vaddr,tlbindex,validity,rc) ;
+    return rc ;
+ }
+
+#define TLB0_EPN_1K(a)   ((a)&0xFFFFFC00)    /*   EA[ 0:21] */
+#define TLB0_V          _BN(22)              /*   Valid Bit */
+#define TLB0_TS         _BN(23)              /*   Translation Address Space */
+#define TLB0_SIZE(x)    _B4(27,x)            /*   Page Size */
+#define TLB1_ERPN(e)     _B4(31,e)           /*   Extended RPN: 4 MSb's of 36b Physical Address */
+#define TLB1_RPN_1K(p)   ((p)&0xFFFFFC00)    /*   RPN[ 0:21] */
+
+#define TLB2_FAR         _BN(10)             /*   Fixed Address Region */
+#define TLB2_WL1         _BN(11)             /*   Write-Thru L1        (when CCR1[L2COBE]=1) */
+#define TLB2_IL1I        _BN(12)             /*   Inhibit L1-I caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL1D        _BN(13)             /*   Inhibit L1-D caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL2I        _BN(14)             /*   see below (on normal C450: Inhibit L2-I caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL2D        _BN(15)             /*   see below (on normal C450: Inhibit L2-D caching (when CCR1[L2COBE]=1) */
+#define TLB2_U0          _BN(16)             /*   see below (undefined/available on normal C450 */
+#define TLB2_U1          _BN(17)             /*   User 1: L1 Transient Enable */
+#define TLB2_U2          _BN(18)             /*   User 2: L1 Store WithOut Allocate #define TLB2_U3          _BN(19)            //  see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */
+#define TLB2_U3          _BN(19)             /*   see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */
+#define TLB2_W           _BN(20)             /*   Write-Thru=1, Write-Back=0 */
+#define TLB2_I           _BN(21)             /*   Cache-Inhibited=1, Cacheable=0 */
+#define TLB2_M           _BN(22)             /*   Memory Coherence Required */
+#define TLB2_G           _BN(23)             /*   Guarded */
+#define TLB2_E           _BN(24)             /*   Endian: 0=Big, 1=Little */
+#define TLB2_UX          _BN(26)             /*   User       Execute Enable */
+#define TLB2_UW          _BN(27)             /*   User       Write   Enable */
+#define TLB2_UR          _BN(28)             /*   User       Read    Enable */
+#define TLB2_SX          _BN(29)             /*   Supervisor Execute Enable */
+#define TLB2_SW          _BN(30)             /*   Supervisor Write   Enable */
+#define TLB2_SR          _BN(31)             /*   Supervisor Read    Enable */
+
+/*  BGP Specific controls */
+#define TLB2_IL3I        (TLB2_IL2I)         /*  L3 Inhibit for Instruction Fetches */
+#define TLB2_IL3D        (TLB2_IL2D)         /*  L3 Inhibit for Data Accesses */
+#define TLB2_IL2         (TLB2_U0)           /*  U0 is L2 Prefetch Inhibit */
+#define TLB2_T           (TLB2_U1)           /*  U1 Transient Enabled is supported. */
+#define TLB2_SWOA        (TLB2_U2)           /*  U2 Store WithOut Allocate is supported. */
+#define TLB2_L2_PF_OPT   (TLB2_U3)           /*  U3 is L2 Optimiztic Prefetch ("Automatic" when 0) */
+
+#endif
diff --git a/drivers/net/bgp_network/bgdiagnose.h b/drivers/net/bgp_network/bgdiagnose.h
new file mode 100644
index 0000000..be20521
--- /dev/null
+++ b/drivers/net/bgp_network/bgdiagnose.h
@@ -0,0 +1,183 @@
+/*
+ * bgdiagnose.h
+ *
+ * Diagnostic routines for 450/BGP bringup
+ *
+ */
+#ifndef __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__
+#define __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__
+/* #include <asm/bluegene.h> */
+
+#include <linux/kernel.h>
+/* #include <asm/bgp_personality.h> */
+#include <asm/bluegene_ras.h>
+#include "450_tlb.h"
+
+/* static BGP_Personality_t* bgp_personality ; */
+
+/* static void show_personality_kernel(BGP_Personality_Kernel_t * Kernel_Config) */
+/* { */
+/* 	printk(KERN_INFO "show_personality_kernel L1Config=0x%08x L2Config=0x%08x L3Config=0x%08x L3Select=0x%08x FreqMHz=%d NodeConfig=0x%08x\n", */
+/* 			Kernel_Config->L1Config, */
+/* 			Kernel_Config->L2Config, */
+/* 			Kernel_Config->L3Config, */
+/* 			Kernel_Config->L3Select, */
+/* 			Kernel_Config->FreqMHz, */
+/* 			Kernel_Config->NodeConfig) ; */
+/*  */
+/* } */
+/* static void show_personality(void) */
+/* { */
+/* //	bgp_personality = bgcns()->getPersonalityData(); */
+/* //	show_personality_kernel(&bgp_personality->Kernel_Config) ; */
+/* } */
+
+static const char* TLB_SIZES[] = {
+    "  1K",  /*  0 */
+    "  4K",
+    " 16K",
+    " 64K",
+    "256K",
+    "  1M",
+    "?-6?",
+    " 16M",
+    "?-8?",
+    "256M",
+    "  1G",
+    "?11?",
+    "?12?",
+    "?13?",
+    "?14?",
+    "?15?"
+};
+
+#include "450_tlb.h"
+
+
+static void show_tlbs(unsigned int vaddr) __attribute__ ((unused)) ;
+static void show_tlbs(unsigned int vaddr) {
+
+    int i;
+    uint32_t t0, t1, t2;
+    int tlb_index = search_tlb(vaddr) ;
+    for (i = 0; i < 64; i++) {
+	    t0 = get_tlb_pageid(i) ;
+	    t1 = get_tlb_xlat(i) ;
+	    t2 = get_tlb_attrib(i) ;
+/* 	_bgp_mftlb(i,t0,t1,t2); */
+/* 	if (t0 & TLB0_V) { */
+	{
+	    printk(KERN_INFO
+		"TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n",
+		i,
+		t0, t1, t2,
+		TLB0_EPN_1K(t0),
+		TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+		TLB_SIZES[(t0 & 0xF0) >> 4],
+		(t2 & TLB2_W) ? 1 : 0,
+		(t2 & TLB2_I) ? 1 : 0,
+		(t2 & TLB2_M) ? 1 : 0,
+		(t2 & TLB2_G) ? 1 : 0,
+		(t2 & TLB2_U0) ? 1 : 0,
+		(t2 & TLB2_U1) ? 1 : 0,
+		(t2 & TLB2_U2) ? 1 : 0,
+		(t2 & TLB2_U3) ? 1 : 0,
+		(t0 & TLB0_V) ? 1 : 0
+		);
+	}
+    }
+    printk(KERN_INFO "vaddr=0x%08x tlb_index=%d\n", vaddr,tlb_index) ;
+}
+
+static void show_tlb_for_vaddr(unsigned int vaddr) __attribute__ ((unused)) ;
+static void show_tlb_for_vaddr(unsigned int vaddr)
+{
+	    int i = search_tlb(vaddr) & 0x3f ;
+	    uint32_t t0 = get_tlb_pageid(i) ;
+	    uint32_t t1 = get_tlb_xlat(i) ;
+	    uint32_t t2  = get_tlb_attrib(i) ;
+	    printk(KERN_INFO
+		"TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n",
+		i,
+		t0, t1, t2,
+		TLB0_EPN_1K(t0),
+		TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+		TLB_SIZES[(t0 & 0xF0) >> 4],
+		(t2 & TLB2_W) ? 1 : 0,
+		(t2 & TLB2_I) ? 1 : 0,
+		(t2 & TLB2_M) ? 1 : 0,
+		(t2 & TLB2_G) ? 1 : 0,
+		(t2 & TLB2_U0) ? 1 : 0,
+		(t2 & TLB2_U1) ? 1 : 0,
+		(t2 & TLB2_U2) ? 1 : 0,
+		(t2 & TLB2_U3) ? 1 : 0,
+		(t0 & TLB0_V) ? 1 : 0
+		);
+
+}
+static inline unsigned  int move_from_spr(unsigned int sprNum)
+  {
+    unsigned long sprVal = 0;
+
+    asm volatile ("mfspr %0,%1\n" : "=r"(sprVal) : "i" (sprNum));
+
+    return sprVal;
+
+  }
+static inline void show_spr(unsigned int spr, const char *name)
+  {
+    printk(KERN_INFO "%s[%03x] = 0x%08x\n",name,spr, move_from_spr(spr)) ;
+  }
+
+static inline unsigned int move_from_dcr(unsigned int dcrNum)
+{
+  unsigned long dcrVal = 0;
+
+  asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory");
+
+  return dcrVal;
+}
+
+static inline unsigned int move_from_msr(void)
+{
+  unsigned long msrVal = 0;
+
+  asm volatile("mfmsr %0" : "=r" (msrVal) : : "memory");
+
+  return msrVal;
+}
+
+static inline void show_msr(void)
+  {
+    printk(KERN_INFO "MSR = 0x%08x\n",move_from_msr()) ;
+  }
+
+static void show_dcr_range(unsigned int start, unsigned int length) __attribute__ ((unused)) ;
+static void show_dcr_range(unsigned int start, unsigned int length)
+  {
+    unsigned int x ;
+    for( x=0;x<length;x+=8 )
+      {
+        unsigned int dcrx=start+x ;
+        printk(KERN_INFO "dcr[%04x]=[%08x %08x %08x %08x %08x %08x %08x %08x]\n",
+            start+x,
+            move_from_dcr(dcrx),move_from_dcr(dcrx+1),move_from_dcr(dcrx+2),move_from_dcr(dcrx+3),
+            move_from_dcr(dcrx+4),move_from_dcr(dcrx+5),move_from_dcr(dcrx+6),move_from_dcr(dcrx+7)
+            ) ;
+      }
+  }
+static void show_sprs(void) __attribute__ ((unused)) ;
+static void show_sprs(void)
+{
+    show_msr() ;
+    show_spr(0x3b3,"CCR0") ;
+    show_spr(0x378,"CCR1") ;
+    show_spr(0x3b2,"MMUCR") ;
+    show_spr(0x39b,"RSTCFG") ;
+/*     show_dcr_range(0x500,32) ; // _BGP_DCR_L30 */
+/*     show_dcr_range(0x540,32) ; // _BGP_DCR_L31 */
+/*     show_dcr_range(0xd00,16) ; // _BGP_DCR_DMA */
+
+  }
+
+#endif
diff --git a/drivers/net/bgp_network/bgp_net_traceflags.h b/drivers/net/bgp_network/bgp_net_traceflags.h
new file mode 100644
index 0000000..439f5d0
--- /dev/null
+++ b/drivers/net/bgp_network/bgp_net_traceflags.h
@@ -0,0 +1,57 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for collective and torus
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_NET_TRACEFLAGS_H__
+#define __BGP_NET_TRACEFLAGS_H__
+
+enum {
+  k_t_general       = 0x01 ,
+  k_t_lowvol        = 0x02 ,
+  k_t_irqflow       = 0x04 ,
+  k_t_irqflow_rcv   = 0x08 ,
+  k_t_protocol      = 0x10 ,
+  k_t_detail        = 0x20 ,
+  k_t_fifocontents  = 0x40 ,
+  k_t_toruspkt      = 0x80 ,
+  k_t_bgcolpkt      = 0x80 ,
+  k_t_init          = 0x100 ,
+  k_t_request       = 0x200 ,
+  k_t_error         = 0x400 ,
+  k_t_sync          = 0x800 ,
+  k_t_api           = 0x1000 ,
+  k_t_diagnosis     = 0x2000 ,
+  k_t_congestion    = 0x4000 ,
+  k_t_startxmit     = 0x8000 ,
+  k_t_napi          = 0x10000 ,
+  k_t_scattergather = 0x20000 ,
+  k_t_flowcontrol   = 0x40000 ,
+  k_t_entryexit     = 0x80000 ,
+  k_t_dmacopy       = 0x100000 ,
+  k_t_fpucopy       = 0x200000 ,
+  k_t_sgdiag        = 0x400000 ,
+  k_t_sgdiag_detail = 0x800000 ,
+  k_t_inject_detail = 0x1000000 ,
+  k_t_userspace     = 0x2000000
+};
+
+#endif
diff --git a/drivers/net/bgp_statistics/Makefile b/drivers/net/bgp_statistics/Makefile
new file mode 100644
index 0000000..666c9b9
--- /dev/null
+++ b/drivers/net/bgp_statistics/Makefile
@@ -0,0 +1,4 @@
+# Makefile for BlueGene collective and torus driver
+
+
+obj-$(CONFIG_BGP_STATISTICS) += bgp_stats.o
diff --git a/drivers/net/bgp_statistics/bgp_stats.c b/drivers/net/bgp_statistics/bgp_stats.c
new file mode 100644
index 0000000..38175db
--- /dev/null
+++ b/drivers/net/bgp_statistics/bgp_stats.c
@@ -0,0 +1,259 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ *
+ * Description: Statistic collection for Blue Gene low-level driver for sockets over torus
+ *
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+#include <linux/alignment_histograms.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+static int  bgp_statistics_init    (void);
+static void bgp_statistics_cleanup (void);
+
+module_init(bgp_statistics_init);
+module_exit(bgp_statistics_cleanup);
+
+
+MODULE_DESCRIPTION("BG/P statistics driver");
+MODULE_LICENSE("GPL");
+
+#ifndef CTL_UNNUMBERED
+#define CTL_UNNUMBERED -2
+#endif
+
+/*  Parameters, statistics, and debugging */
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+struct alignment_histogram al_histogram ;
+#endif
+
+static struct ctl_path bgp_statistics_ctl_path[] = {
+		{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "statistics", .ctl_name = 0, },
+/* 	{ .procname = "torus", .ctl_name = 0, }, */
+	{ },
+};
+
+#define CTL_PARAM_EXT(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &(Var),              \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dointvec       \
+  }
+
+#define CTL_PARAM_EXT_LL(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &(Var),              \
+          .maxlen         = 2*sizeof(int),       \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dointvec       \
+  }
+
+
+struct ctl_table bgp_statistics_table[] = {
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+        CTL_PARAM_EXT("ah_min",al_histogram.min_size_of_interest) ,
+        CTL_PARAM_EXT("sah0",AL_HISTOGRAM(src_alignment_histogram_crc,0)) ,
+        CTL_PARAM_EXT("sah1",AL_HISTOGRAM(src_alignment_histogram_crc,1)) ,
+        CTL_PARAM_EXT("sah2",AL_HISTOGRAM(src_alignment_histogram_crc,2)) ,
+        CTL_PARAM_EXT("sah3",AL_HISTOGRAM(src_alignment_histogram_crc,3)) ,
+        CTL_PARAM_EXT("sah4",AL_HISTOGRAM(src_alignment_histogram_crc,4)) ,
+        CTL_PARAM_EXT("sah5",AL_HISTOGRAM(src_alignment_histogram_crc,5)) ,
+        CTL_PARAM_EXT("sah6",AL_HISTOGRAM(src_alignment_histogram_crc,6)) ,
+        CTL_PARAM_EXT("sah7",AL_HISTOGRAM(src_alignment_histogram_crc,7)) ,
+        CTL_PARAM_EXT("sah8",AL_HISTOGRAM(src_alignment_histogram_crc,8)) ,
+        CTL_PARAM_EXT("sah9",AL_HISTOGRAM(src_alignment_histogram_crc,9)) ,
+        CTL_PARAM_EXT("saha",AL_HISTOGRAM(src_alignment_histogram_crc,10)) ,
+        CTL_PARAM_EXT("sahb",AL_HISTOGRAM(src_alignment_histogram_crc,11)) ,
+        CTL_PARAM_EXT("sahc",AL_HISTOGRAM(src_alignment_histogram_crc,12)) ,
+        CTL_PARAM_EXT("sahd",AL_HISTOGRAM(src_alignment_histogram_crc,13)) ,
+        CTL_PARAM_EXT("sahe",AL_HISTOGRAM(src_alignment_histogram_crc,14)) ,
+        CTL_PARAM_EXT("sahf",AL_HISTOGRAM(src_alignment_histogram_crc,15)) ,
+        CTL_PARAM_EXT("dah0",AL_HISTOGRAM(dst_alignment_histogram_crc,0)) ,
+        CTL_PARAM_EXT("dah1",AL_HISTOGRAM(dst_alignment_histogram_crc,1)) ,
+        CTL_PARAM_EXT("dah2",AL_HISTOGRAM(dst_alignment_histogram_crc,2)) ,
+        CTL_PARAM_EXT("dah3",AL_HISTOGRAM(dst_alignment_histogram_crc,3)) ,
+        CTL_PARAM_EXT("dah4",AL_HISTOGRAM(dst_alignment_histogram_crc,4)) ,
+        CTL_PARAM_EXT("dah5",AL_HISTOGRAM(dst_alignment_histogram_crc,5)) ,
+        CTL_PARAM_EXT("dah6",AL_HISTOGRAM(dst_alignment_histogram_crc,6)) ,
+        CTL_PARAM_EXT("dah7",AL_HISTOGRAM(dst_alignment_histogram_crc,7)) ,
+        CTL_PARAM_EXT("dah8",AL_HISTOGRAM(dst_alignment_histogram_crc,8)) ,
+        CTL_PARAM_EXT("dah9",AL_HISTOGRAM(dst_alignment_histogram_crc,9)) ,
+        CTL_PARAM_EXT("daha",AL_HISTOGRAM(dst_alignment_histogram_crc,10)) ,
+        CTL_PARAM_EXT("dahb",AL_HISTOGRAM(dst_alignment_histogram_crc,11)) ,
+        CTL_PARAM_EXT("dahc",AL_HISTOGRAM(dst_alignment_histogram_crc,12)) ,
+        CTL_PARAM_EXT("dahd",AL_HISTOGRAM(dst_alignment_histogram_crc,13)) ,
+        CTL_PARAM_EXT("dahe",AL_HISTOGRAM(dst_alignment_histogram_crc,14)) ,
+        CTL_PARAM_EXT("dahf",AL_HISTOGRAM(dst_alignment_histogram_crc,15)) ,
+        CTL_PARAM_EXT("rah0",AL_HISTOGRAM(rel_alignment_histogram_crc,0)) ,
+        CTL_PARAM_EXT("rah1",AL_HISTOGRAM(rel_alignment_histogram_crc,1)) ,
+        CTL_PARAM_EXT("rah2",AL_HISTOGRAM(rel_alignment_histogram_crc,2)) ,
+        CTL_PARAM_EXT("rah3",AL_HISTOGRAM(rel_alignment_histogram_crc,3)) ,
+        CTL_PARAM_EXT("rah4",AL_HISTOGRAM(rel_alignment_histogram_crc,4)) ,
+        CTL_PARAM_EXT("rah5",AL_HISTOGRAM(rel_alignment_histogram_crc,5)) ,
+        CTL_PARAM_EXT("rah6",AL_HISTOGRAM(rel_alignment_histogram_crc,6)) ,
+        CTL_PARAM_EXT("rah7",AL_HISTOGRAM(rel_alignment_histogram_crc,7)) ,
+        CTL_PARAM_EXT("rah8",AL_HISTOGRAM(rel_alignment_histogram_crc,8)) ,
+        CTL_PARAM_EXT("rah9",AL_HISTOGRAM(rel_alignment_histogram_crc,9)) ,
+        CTL_PARAM_EXT("raha",AL_HISTOGRAM(rel_alignment_histogram_crc,10)) ,
+        CTL_PARAM_EXT("rahb",AL_HISTOGRAM(rel_alignment_histogram_crc,11)) ,
+        CTL_PARAM_EXT("rahc",AL_HISTOGRAM(rel_alignment_histogram_crc,12)) ,
+        CTL_PARAM_EXT("rahd",AL_HISTOGRAM(rel_alignment_histogram_crc,13)) ,
+        CTL_PARAM_EXT("rahe",AL_HISTOGRAM(rel_alignment_histogram_crc,14)) ,
+        CTL_PARAM_EXT("rahf",AL_HISTOGRAM(rel_alignment_histogram_crc,15)) ,
+        CTL_PARAM_EXT("scah0",AL_HISTOGRAM(src_alignment_histogram_copy,0)) ,
+        CTL_PARAM_EXT("scah1",AL_HISTOGRAM(src_alignment_histogram_copy,1)) ,
+        CTL_PARAM_EXT("scah2",AL_HISTOGRAM(src_alignment_histogram_copy,2)) ,
+        CTL_PARAM_EXT("scah3",AL_HISTOGRAM(src_alignment_histogram_copy,3)) ,
+        CTL_PARAM_EXT("scah4",AL_HISTOGRAM(src_alignment_histogram_copy,4)) ,
+        CTL_PARAM_EXT("scah5",AL_HISTOGRAM(src_alignment_histogram_copy,5)) ,
+        CTL_PARAM_EXT("scah6",AL_HISTOGRAM(src_alignment_histogram_copy,6)) ,
+        CTL_PARAM_EXT("scah7",AL_HISTOGRAM(src_alignment_histogram_copy,7)) ,
+        CTL_PARAM_EXT("scah8",AL_HISTOGRAM(src_alignment_histogram_copy,8)) ,
+        CTL_PARAM_EXT("scah9",AL_HISTOGRAM(src_alignment_histogram_copy,9)) ,
+        CTL_PARAM_EXT("scaha",AL_HISTOGRAM(src_alignment_histogram_copy,10)) ,
+        CTL_PARAM_EXT("scahb",AL_HISTOGRAM(src_alignment_histogram_copy,11)) ,
+        CTL_PARAM_EXT("scahc",AL_HISTOGRAM(src_alignment_histogram_copy,12)) ,
+        CTL_PARAM_EXT("scahd",AL_HISTOGRAM(src_alignment_histogram_copy,13)) ,
+        CTL_PARAM_EXT("scahe",AL_HISTOGRAM(src_alignment_histogram_copy,14)) ,
+        CTL_PARAM_EXT("scahf",AL_HISTOGRAM(src_alignment_histogram_copy,15)) ,
+        CTL_PARAM_EXT("dcah0",AL_HISTOGRAM(dst_alignment_histogram_copy,0)) ,
+        CTL_PARAM_EXT("dcah1",AL_HISTOGRAM(dst_alignment_histogram_copy,1)) ,
+        CTL_PARAM_EXT("dcah2",AL_HISTOGRAM(dst_alignment_histogram_copy,2)) ,
+        CTL_PARAM_EXT("dcah3",AL_HISTOGRAM(dst_alignment_histogram_copy,3)) ,
+        CTL_PARAM_EXT("dcah4",AL_HISTOGRAM(dst_alignment_histogram_copy,4)) ,
+        CTL_PARAM_EXT("dcah5",AL_HISTOGRAM(dst_alignment_histogram_copy,5)) ,
+        CTL_PARAM_EXT("dcah6",AL_HISTOGRAM(dst_alignment_histogram_copy,6)) ,
+        CTL_PARAM_EXT("dcah7",AL_HISTOGRAM(dst_alignment_histogram_copy,7)) ,
+        CTL_PARAM_EXT("dcah8",AL_HISTOGRAM(dst_alignment_histogram_copy,8)) ,
+        CTL_PARAM_EXT("dcah9",AL_HISTOGRAM(dst_alignment_histogram_copy,9)) ,
+        CTL_PARAM_EXT("dcaha",AL_HISTOGRAM(dst_alignment_histogram_copy,10)) ,
+        CTL_PARAM_EXT("dcahb",AL_HISTOGRAM(dst_alignment_histogram_copy,11)) ,
+        CTL_PARAM_EXT("dcahc",AL_HISTOGRAM(dst_alignment_histogram_copy,12)) ,
+        CTL_PARAM_EXT("dcahd",AL_HISTOGRAM(dst_alignment_histogram_copy,13)) ,
+        CTL_PARAM_EXT("dcahe",AL_HISTOGRAM(dst_alignment_histogram_copy,14)) ,
+        CTL_PARAM_EXT("dcahf",AL_HISTOGRAM(dst_alignment_histogram_copy,15)) ,
+        CTL_PARAM_EXT("rcah0",AL_HISTOGRAM(rel_alignment_histogram_copy,0)) ,
+        CTL_PARAM_EXT("rcah1",AL_HISTOGRAM(rel_alignment_histogram_copy,1)) ,
+        CTL_PARAM_EXT("rcah2",AL_HISTOGRAM(rel_alignment_histogram_copy,2)) ,
+        CTL_PARAM_EXT("rcah3",AL_HISTOGRAM(rel_alignment_histogram_copy,3)) ,
+        CTL_PARAM_EXT("rcah4",AL_HISTOGRAM(rel_alignment_histogram_copy,4)) ,
+        CTL_PARAM_EXT("rcah5",AL_HISTOGRAM(rel_alignment_histogram_copy,5)) ,
+        CTL_PARAM_EXT("rcah6",AL_HISTOGRAM(rel_alignment_histogram_copy,6)) ,
+        CTL_PARAM_EXT("rcah7",AL_HISTOGRAM(rel_alignment_histogram_copy,7)) ,
+        CTL_PARAM_EXT("rcah8",AL_HISTOGRAM(rel_alignment_histogram_copy,8)) ,
+        CTL_PARAM_EXT("rcah9",AL_HISTOGRAM(rel_alignment_histogram_copy,9)) ,
+        CTL_PARAM_EXT("rcaha",AL_HISTOGRAM(rel_alignment_histogram_copy,10)) ,
+        CTL_PARAM_EXT("rcahb",AL_HISTOGRAM(rel_alignment_histogram_copy,11)) ,
+        CTL_PARAM_EXT("rcahc",AL_HISTOGRAM(rel_alignment_histogram_copy,12)) ,
+        CTL_PARAM_EXT("rcahd",AL_HISTOGRAM(rel_alignment_histogram_copy,13)) ,
+        CTL_PARAM_EXT("rcahe",AL_HISTOGRAM(rel_alignment_histogram_copy,14)) ,
+        CTL_PARAM_EXT("rcahf",AL_HISTOGRAM(rel_alignment_histogram_copy,15)) ,
+        CTL_PARAM_EXT("tagh0",AL_HISTOGRAM(tagged,0)) ,
+        CTL_PARAM_EXT("tagh1",AL_HISTOGRAM(tagged,1)) ,
+        CTL_PARAM_EXT("tagh2",AL_HISTOGRAM(tagged,2)) ,
+        CTL_PARAM_EXT("tagh3",AL_HISTOGRAM(tagged,3)) ,
+        CTL_PARAM_EXT("tagh4",AL_HISTOGRAM(tagged,4)) ,
+        CTL_PARAM_EXT("tagh5",AL_HISTOGRAM(tagged,5)) ,
+        CTL_PARAM_EXT("tagh6",AL_HISTOGRAM(tagged,6)) ,
+        CTL_PARAM_EXT("tagh7",AL_HISTOGRAM(tagged,7)) ,
+        CTL_PARAM_EXT("tagh8",AL_HISTOGRAM(tagged,8)) ,
+        CTL_PARAM_EXT("tagh9",AL_HISTOGRAM(tagged,9)) ,
+        CTL_PARAM_EXT("tagha",AL_HISTOGRAM(tagged,10)) ,
+        CTL_PARAM_EXT("taghb",AL_HISTOGRAM(tagged,11)) ,
+        CTL_PARAM_EXT("taghc",AL_HISTOGRAM(tagged,12)) ,
+        CTL_PARAM_EXT("taghd",AL_HISTOGRAM(tagged,13)) ,
+        CTL_PARAM_EXT("taghe",AL_HISTOGRAM(tagged,14)) ,
+        CTL_PARAM_EXT("taghf",AL_HISTOGRAM(tagged,15)) ,
+        CTL_PARAM_EXT_LL("qcopy",al_histogram.qcopybytes) ,
+        CTL_PARAM_EXT_LL("copy",al_histogram.copybytes) ,
+        CTL_PARAM_EXT_LL("copyshort",al_histogram.copybytesshort) ,
+        CTL_PARAM_EXT_LL("copymisalign",al_histogram.copybytesmisalign) ,
+        CTL_PARAM_EXT_LL("copybroke",al_histogram.copybytesbroke) ,
+        CTL_PARAM_EXT_LL("crcb",al_histogram.crcbytes) ,
+        CTL_PARAM_EXT_LL("csumpartial",al_histogram.csumpartialbytes) ,
+#endif
+        { 0 },
+};
+
+
+
+static void register_statistics_sysctl(void)
+{
+	register_sysctl_paths(bgp_statistics_ctl_path,bgp_statistics_table) ;
+}
+static int bgp_statistics_init(void)
+  {
+    register_statistics_sysctl() ;
+    return 0 ;
+  }
+
+static void bgp_statistics_cleanup (void)
+{
+
+}
+
diff --git a/drivers/net/bgp_torus/Makefile b/drivers/net/bgp_torus/Makefile
new file mode 100644
index 0000000..e7c2496
--- /dev/null
+++ b/drivers/net/bgp_torus/Makefile
@@ -0,0 +1,9 @@
+# Makefile for BlueGene collective and torus driver
+
+EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__
+
+bgp_torus-y := bgp_dma_tcp_frames.o bgp_dma_tcp.o bgtornic.o torus.o bgp_dma_tcp_diagnose.o bgp_dma_ioctl.o
+#  bgp_dma_memcpy.o would need a significant amout of work to make it functional
+# bgp_torus-$(CONFIG_BLUEGENE_DMA_MEMCPY) += bgp_dma_memcpy.o
+
+obj-$(CONFIG_BGP_TORUS) += bgp_torus.o
diff --git a/drivers/net/bgp_torus/bgp_bic_diagnosis.h b/drivers/net/bgp_torus/bgp_bic_diagnosis.h
new file mode 100644
index 0000000..4ac45ed
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_bic_diagnosis.h
@@ -0,0 +1,75 @@
+/* These are defined by the hardware. */
+#define NR_BIC_GROUPS 15
+#define NR_BIC_GINTS 32
+#define NR_BIC_CPUS 4
+
+/* 4-bit target value for target register */
+#define BIC_TARGET_MASK (0xf)
+#define BIC_TARGET_TYPE_NORMAL (1<<2)
+#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu))
+#define BIC_DEFAULT_CPU 0
+
+/* Define the layout of each group's registers.
+ * This layout should be 0x80 bytes long (including pad).
+ */
+struct bic_group_regs {
+  uint32_t status;      /* 0x00  RW */
+  uint32_t rd_clr_status;     /* 0x04  RO */
+  uint32_t status_clr;      /* 0x08  WO */
+  uint32_t status_set;      /* 0x0c  WO */
+  uint32_t target[4];     /* 0x10  RW */
+  uint32_t normal[NR_BIC_CPUS];   /* 0x20  RW */
+  uint32_t critical[NR_BIC_CPUS];   /* 0x30  RW */
+  uint32_t mcheck[NR_BIC_CPUS];   /* 0x40  RW */
+  uint32_t _pad[12];      /* 0x50     */
+};
+
+/* Define the layout of the interrupt controller mem mapped regs. */
+struct bic_regs {
+  struct bic_group_regs group[NR_BIC_GROUPS];   /* 0x000 */
+  uint32_t hier_normal[NR_BIC_CPUS];      /* 0x780 */
+  uint32_t hier_critical[NR_BIC_CPUS];      /* 0x790 */
+  uint32_t hier_mcheck[NR_BIC_CPUS];      /* 0x7a0 */
+};
+
+struct bic {
+        spinlock_t mask_lock;   /* could be finer grained if necessary */
+        struct bic_regs *regs;
+} ;
+
+extern volatile struct bic bic;
+
+/* void show_bic_regs(void) ; // diagnostic 'printk' of the BIC */
+static void show_bic_group(int g, volatile struct bic_group_regs* gp) __attribute__ ((unused)) ;
+static void show_bic_group(int g, volatile struct bic_group_regs* gp)
+{
+   printk(KERN_NOTICE "bic_group_regs[%d] status=%08x target=[%08x %08x %08x %08x]\n",g,gp->status, gp->target[0], gp->target[1], gp->target[2], gp->target[3]) ;
+   printk(KERN_NOTICE "bic_group_regs[%d] normal=[%08x %08x %08x %08x] critical=[%08x %08x %08x %08x] mcheck=[%08x %08x %08x %08x]\n",g, gp->normal[0], gp->normal[1], gp->normal[2], gp->normal[3], gp->critical[0],gp->critical[1],gp->critical[2],gp->critical[3],gp->mcheck[0],gp->mcheck[1],gp->mcheck[2],gp->mcheck[3]) ;
+}
+
+static void show_bic_regs(void) __attribute__ ((unused)) ;
+static void show_bic_regs(void)
+{
+  struct bic_regs * bic_regs = bic.regs ;
+  int g ;
+  for( g = 0 ; g < NR_BIC_GROUPS ; g += 1 )
+     {
+        show_bic_group(g,bic_regs->group+g) ;
+     }
+  printk(KERN_NOTICE "BIC hier_normal=%08x %08x %08x %08x\n",
+        bic_regs->hier_normal[0],
+        bic_regs->hier_normal[1],
+        bic_regs->hier_normal[2],
+        bic_regs->hier_normal[3]) ;
+  printk(KERN_NOTICE "BIC hier_critical=%08x %08x %08x %08x\n",
+        bic_regs->hier_critical[0],
+        bic_regs->hier_critical[1],
+        bic_regs->hier_critical[2],
+        bic_regs->hier_critical[3]) ;
+  printk(KERN_NOTICE "BIC hier_mcheck=%08x %08x %08x %08x\n",
+        bic_regs->hier_mcheck[0],
+        bic_regs->hier_mcheck[1],
+        bic_regs->hier_mcheck[2],
+        bic_regs->hier_mcheck[3]) ;
+
+}
diff --git a/drivers/net/bgp_torus/bgp_dma_ioctl.c b/drivers/net/bgp_torus/bgp_dma_ioctl.c
new file mode 100644
index 0000000..7936a6b
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_ioctl.c
@@ -0,0 +1,694 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *		'ioctl' and 'procfs' support
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+#include <net/tcp_hiatus.h>
+
+#include <spi/linux_kernel_spi.h>
+
+#include "bgp_dma_tcp.h"
+
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+
+/* #define TRUST_TORUS_CRC */
+
+#define SEND_SHORT_FRAMES_INLINE
+#define ENABLE_TUNING
+
+#define ENABLE_LEARNING_ADDRESSES
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/*  Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+
+/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */
+/* #define TORNIC_STEAL_POLL_CORE */
+/* #endif */
+
+#endif
+
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+extern int sysctl_tcp_force_nodelay ;
+extern int sysctl_tcp_permit_cwnd ;
+extern int sysctl_tcp_max_cwnd ;
+#endif
+
+int sysctl_bgp_torus_backlog_floor ;
+int bgp_dma_sockproto ;  /*  Used elsewhere to control whether we try accelerated sockets */
+
+extern int bgtornic_driverparm ;  /*  Parametrisation for bringup of 'tornic' device */
+
+extern unsigned long long printk_clock_aligner ; /* Value of timebase at global barrier */
+
+static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos) ;
+
+static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos) ;
+
+static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+  {
+    int rc ;
+    TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+    dma_tcp_state.tuning_recfifo_threshold=mfdcrx(0xd3a) ;
+    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+    mtdcrx(0xd3a,dma_tcp_state.tuning_recfifo_threshold) ;
+    TRACE("(<)") ;
+    return rc ;
+  }
+
+static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+  {
+    int rc ;
+    dumptorusdcrs() ;
+    TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+    dma_tcp_state.tuning_dcr_c8b=mfdcrx(0xc8b) ;
+    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+    mtdcrx(0xc8b,dma_tcp_state.tuning_dcr_c8b) ;
+    TRACE("(<)") ;
+    return rc ;
+  }
+
+
+
+static struct ctl_path bgp_torus_ctl_path[] = {
+	{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "torus", .ctl_name = 0, },
+	{ },
+};
+
+#define CTL_PARAM(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &dma_tcp_state.Var , \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dointvec       \
+  }
+
+#define CTL_PARAM_DCR(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &dma_tcp_state.Var , \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dodcr       \
+  }
+
+#define CTL_PARAM_DCR_C8B(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &dma_tcp_state.Var , \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dodcr_c8b       \
+  }
+
+#define CTL_PARAM_HWFIFO(Name,Var)                      \
+  {                                              \
+          .ctl_name       = CTL_UNNUMBERED,      \
+          .procname       = Name ,               \
+          .data           = &dma_tcp_state.Var , \
+          .maxlen         = sizeof(int),         \
+          .mode           = 0644,                \
+          .proc_handler   = &proc_dohwfifo       \
+  }
+
+struct ctl_table bgp_dma_table[] = {
+#if defined(USE_SKB_TO_SKB)
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "dma_rec_counters",
+	                .data           = bgp_dma_tcp_counter_copies,
+	                .maxlen         = DMA_NUM_COUNTERS_PER_GROUP*sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_do_dma_rec_counters
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "flow_counter",
+	                .data           = dma_tcp_state.flow_counter,
+	                .maxlen         = k_flow_counters*sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+#endif
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "printk_clock_aligner",
+                .data           = &printk_clock_aligner,
+                .maxlen         = sizeof(unsigned long long),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "tracemask",
+                .data           = &bgp_dma_tcp_tracemask,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "backlog_floor",
+                .data           = &sysctl_bgp_torus_backlog_floor,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "sockproto",
+                .data           = &bgp_dma_sockproto,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "ethem",
+                .data           = &bgp_dma_ethem,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "tornic_driverparm",
+                .data           = &bgtornic_driverparm,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+/*         { */
+/*                 .ctl_name       = CTL_UNNUMBERED, */
+/*                 .procname       = "tornic_count", */
+/*                 .data           = &bgp_tornic_count, */
+/*                 .maxlen         = sizeof(int), */
+/*                 .mode           = 0644, */
+/*                 .proc_handler   = &proc_dointvec */
+/*         }, */
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "tx_by_core",
+                .data           = dma_tcp_state.tx_by_core,
+                .maxlen         = 4*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "tx_in_use_count",
+                .data           = dma_tcp_state.tx_in_use_count,
+                .maxlen         = (k_injecting_directions+1)*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+#if defined(TRACK_LIFETIME_IN_FIFO)
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "max_lifetime_by_direction",
+                .data           = max_lifetime_by_direction,
+                .maxlen         = (k_injecting_directions)*sizeof(unsigned long long),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+#endif
+        CTL_PARAM("configured_quarter",configured_quarter) ,
+        CTL_PARAM("active_quarter",active_quarter) ,
+        CTL_PARAM("bluegene_tcp_is_built",bluegene_tcp_is_built) ,
+        CTL_PARAM("count_no_skbuff",count_no_skbuff) ,
+#if defined(USE_SKB_TO_SKB)
+        CTL_PARAM("eager_limit",eager_limit) ,
+#endif
+#if defined(CONFIG_BGP_STATISTICS)
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "reception_fifo_histogram",
+                .data           = reception_fifo_histogram,
+                .maxlen         = 33*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "reception_fifo_histogram",
+                .data           = reception_fifo_histogram,
+                .maxlen         = 33*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "reception_hi_watermark",
+                .data           = &reception_hi_watermark,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "rtt_histogram",
+                .data           = rtt_histogram,
+                .maxlen         = 33*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "transit_histogram",
+                .data           = transit_histogram,
+                .maxlen         = 33*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "resequence_histogram",
+                .data           = dma_tcp_state.resequence_histogram,
+                .maxlen         = k_concurrent_receives*sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "bytes_sent",
+                .data           = &dma_tcp_state.bytes_sent,
+                .maxlen         = sizeof(unsigned long long),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "bytes_received",
+                .data           = &dma_tcp_state.bytes_received,
+                .maxlen         = sizeof(unsigned long long),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+#endif
+
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_hiatus_counts",
+		.data		= tcp_hiatus_counts,
+		.maxlen		= k_tcp_hiatus_reasons*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_force_nodelay",
+		.data		= &sysctl_tcp_force_nodelay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_permit_cwnd",
+		.data		= &sysctl_tcp_permit_cwnd,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "tcp_max_cwnd",
+		.data		= &sysctl_tcp_max_cwnd,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
+
+#if defined(ENABLE_TUNING)
+        CTL_PARAM("tuning_num_packets",tuning_num_packets) ,
+        CTL_PARAM("tuning_num_empty_passes",tuning_num_empty_passes) ,
+        CTL_PARAM("tuning_non_empty_poll_delay",tuning_non_empty_poll_delay) ,
+        CTL_PARAM("tuning_poll_after_enabling",tuning_poll_after_enabling) ,
+        CTL_PARAM("tuning_run_handler_on_hwi",tuning_run_handler_on_hwi) ,
+        CTL_PARAM("tuning_clearthresh_slih",tuning_clearthresh_slih) ,
+        CTL_PARAM("tuning_clearthresh_flih",tuning_clearthresh_flih) ,
+        CTL_PARAM("tuning_disable_in_dcr",tuning_disable_in_dcr) ,
+
+        CTL_PARAM("tuning_injection_hashmask",tuning_injection_hashmask) ,
+
+        CTL_PARAM_DCR("tuning_recfifo_threshold",tuning_recfifo_threshold) ,
+
+        CTL_PARAM("tuning_exploit_reversepropose",tuning_exploit_reversepropose) ,
+        CTL_PARAM("tuning_counters_per_source",tuning_counters_per_source) ,
+        CTL_PARAM("tuning_defer_skb_until_counter",tuning_defer_skb_until_counter) ,
+        CTL_PARAM("tuning_deliver_eagerly",tuning_deliver_eagerly) ,
+        CTL_PARAM("tuning_diagnose_rst",tuning_diagnose_rst) ,
+        CTL_PARAM("tuning_select_fifo_algorithm",tuning_select_fifo_algorithm) ,
+        CTL_PARAM("tuning_min_icsk_timeout",tuning_min_icsk_timeout) ,
+        CTL_PARAM("tuning_virtual_channel",tuning_virtual_channel) ,
+
+        CTL_PARAM("tuning_enable_siw_placement",tuning_enable_siw_placement) ,
+        CTL_PARAM("tuning_prep_dcmf",tuning_prep_dcmf) ,
+
+        CTL_PARAM_DCR_C8B("tuning_dcr_c8b",tuning_dcr_c8b) ,
+#endif
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "tcp_scattergather_frag_limit",
+                .data           = &tcp_scattergather_frag_limit,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+        },
+#endif
+
+#if defined(KEEP_TCP_FLAG_STATS)
+        CTL_PARAM("tcp_count_fin",tcp_received_flag_count[7]) ,
+        CTL_PARAM("tcp_count_syn",tcp_received_flag_count[6]) ,
+        CTL_PARAM("tcp_count_rst",tcp_received_flag_count[5]) ,
+        CTL_PARAM("tcp_count_psh",tcp_received_flag_count[4]) ,
+        CTL_PARAM("tcp_count_ack",tcp_received_flag_count[3]) ,
+        CTL_PARAM("tcp_count_urg",tcp_received_flag_count[2]) ,
+        CTL_PARAM("tcp_count_ece",tcp_received_flag_count[1]) ,
+        CTL_PARAM("tcp_count_cwr",tcp_received_flag_count[0]) ,
+#endif
+        { 0 },
+};
+
+static void __init
+register_torus_sysctl(dma_tcp_t *dma_tcp)
+{
+	dma_tcp->sysctl_table_header=register_sysctl_paths(bgp_torus_ctl_path,bgp_dma_table) ;
+	TRACEN(k_t_init, "sysctl_table_header=%p",dma_tcp->sysctl_table_header) ;
+
+}
+
+/*  feature for exploring all-to-all performance with a device in /dev */
+static int bgpdmatcp_add_device(int major, int minor, const char* name);
+static int bgpdmatcp_device_open(struct inode *inode, struct file *filp);
+static int bgpdmatcp_device_release(struct inode *inode, struct file * filp);
+static long bgpdmatcp_device_ioctl( struct file * filp,
+                               unsigned int  cmd,   unsigned long arg);
+enum {
+	k_bgpdmatcp_major = 126 ,
+	k_bgpdmatcp_minor_nums = 1
+} ;
+
+struct bgpdmatcp_dev
+{
+  int                  major,minor;        /* device major, minor */
+  struct task_struct* current;            /* process holding device */
+  int                  signum;             /* signal to send holding process */
+  wait_queue_head_t    read_wq;
+  int                  read_complete;
+  struct semaphore     sem;                /* interruptible semaphore */
+  struct cdev          cdev;               /* container device? */
+};
+
+
+static struct bgpdmatcp_dev bgpdmatcp_device;
+
+
+static struct file_operations bgpdmatcp_device_fops =
+	{
+	  .owner=   THIS_MODULE,
+	  .open=    bgpdmatcp_device_open,
+	  .read =   NULL,
+	  .write=   NULL,
+	  .poll=    NULL,
+	  .unlocked_ioctl=   bgpdmatcp_device_ioctl,
+	  .release= bgpdmatcp_device_release,
+	  .mmap=    NULL,
+	};
+
+
+static int bgpdmatcp_add_device(int major,
+			     int minor,
+			     const char* devname
+			     )
+{
+  int ret;
+  dev_t devno;
+  struct bgpdmatcp_dev* dev = &bgpdmatcp_device;
+
+  TRACEN(k_t_init,"devname=%s major=%d minor=%d",devname,major,minor) ;
+  /* initilize struct */
+  init_MUTEX (&dev->sem);
+  dev->major  = major;
+  dev->minor  = minor;
+  init_waitqueue_head(&dev->read_wq);
+  dev->read_complete = 0;
+  devno=MKDEV(major,minor);
+
+  /* register i.e., /proc/devices */
+  ret=register_chrdev_region(devno,1,(char *)devname);
+
+  if (ret) {
+	printk (KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d) err=%d\n",
+	      major,minor,ret);
+	return ret;
+  }
+
+  /* add cdev */
+  cdev_init(&dev->cdev,&bgpdmatcp_device_fops);
+  dev->cdev.owner=THIS_MODULE;
+  dev->cdev.ops=&bgpdmatcp_device_fops;
+  ret=cdev_add(&dev->cdev,devno,1);
+  if (ret) {
+      printk(KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d), err=%d\n",
+	     major,minor,ret);
+      return ret;
+  }
+
+  /* signul to pass to owning process, should be altered using ioctl */
+  dev->signum=-1;
+
+
+  return 0;
+}
+
+
+static int bgpdmatcp_device_open (struct inode *inode, struct file *filp)
+{
+  struct bgpdmatcp_dev *dev=container_of(inode->i_cdev,struct bgpdmatcp_dev,cdev);
+
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+  up(&dev->sem);
+
+  dev->current=current;
+  filp->private_data = (void*) dev;
+
+  return 0;
+}
+
+
+
+
+
+static int bgpdmatcp_device_release (struct inode *inode, struct file * filp)
+{
+  struct bgpdmatcp_dev *dev=(struct bgpdmatcp_dev *)filp->private_data;
+
+  /*Ensure exclusive access*/
+  if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+  dev->current = NULL;
+  up(&dev->sem);
+
+  return 0;
+}
+
+/* Report the counts of how often a TCP write has stalled, by stall reason */
+static void bgp_dma_diag_report_hiatus_counts(int __user * report)
+{
+	copy_to_user(report,tcp_hiatus_counts,k_tcp_hiatus_reasons*sizeof(int)) ;
+}
+
+/* Report bytes read and bytes written over the torus */
+static void bgp_dma_diag_report_transfer_counts(int __user * report)
+{
+	copy_to_user(report,&dma_tcp_state.bytes_received,sizeof(unsigned long long)) ;
+	copy_to_user(report+sizeof(unsigned long long)/sizeof(int),&dma_tcp_state.bytes_sent,sizeof(unsigned long long)) ;
+}
+
+
+enum {
+	k_ioctl_activate = 0 ,
+	k_ioctl_wait = 1 ,
+	k_ioctl_clearcount = 2 ,
+	k_ioctl_activate_minicube = 3 ,
+	k_ioctl_wait_sync = 4 ,
+	k_ioctl_activate_to_one = 5 ,
+	k_ioctl_report_tx_queue = 6 ,
+	k_ioctl_report_hiatus_counts = 7 ,
+	k_ioctl_report_bytes_transferred = 8
+};
+static long bgpdmatcp_device_ioctl (
+				struct file * filp,
+				unsigned int cmd,
+				unsigned long arg)
+{
+	TRACEN(k_t_detail, "cmd=%d arg=0x%08lx",cmd,arg) ;
+
+	switch (cmd) {
+		case k_ioctl_activate :
+			{
+				int sendBytes ;
+				if( get_user(sendBytes,(int __user *)arg) )
+					{
+						return -EFAULT ;
+					}
+				if( sendBytes <= k_injection_packet_size)
+					{
+						dma_tcp_transfer_activate_sync(sendBytes) ;
+					}
+/* 				else */
+/* 					{ */
+/* 						dma_tcp_transfer_activate(sendBytes) ; */
+/* 					} */
+			}
+			break ;
+/* #if 0 */
+/* 		case k_ioctl_wait : */
+/* 			{ */
+/* 				int demandCount ; */
+/* 				int rc ; */
+/* 				if( get_user(demandCount,(int __user *)arg) ) */
+/* 					{ */
+/* 						return -EFAULT ; */
+/* 					} */
+/* 				rc = dma_tcp_transfer_wait(demandCount) ; */
+/* 				return rc ? 0 : (-EAGAIN) ; */
+/* 			} */
+/* 			break ; */
+/* #endif */
+		case k_ioctl_wait_sync :
+			{
+				int demandCount ;
+				int rc ;
+				if( get_user(demandCount,(int __user *)arg) )
+					{
+						return -EFAULT ;
+					}
+				rc = dma_tcp_transfer_wait_sync(demandCount) ;
+				return rc ? 0 : (-EAGAIN) ;
+			}
+			break ;
+		case k_ioctl_clearcount :
+			dma_tcp_transfer_clearcount() ;
+			break ;
+/* #if 0 */
+/* 		case k_ioctl_activate_minicube : */
+/* 			{ */
+/* 				int sendBytes ; */
+/* 				if( get_user(sendBytes,(int __user *)arg) ) */
+/* 					{ */
+/* 						return -EFAULT ; */
+/* 					} */
+/* 				dma_tcp_transfer_activate_minicube(sendBytes) ; */
+/* 			} */
+/* 			break ; */
+/* 		case k_ioctl_activate_to_one : */
+/* 			{ */
+/* 				int sendBytes ; */
+/* 				unsigned int tg ; */
+/* 				if( get_user(sendBytes,(int __user *)arg) ) */
+/* 					{ */
+/* 						return -EFAULT ; */
+/* 					} */
+/* 				if( get_user(tg,(int __user *)(arg+sizeof(int))) ) */
+/* 					{ */
+/* 						return -EFAULT ; */
+/* 					} */
+/* 				dma_tcp_transfer_activate_to_one(sendBytes,tg) ; */
+/* 			} */
+/* 			break ; */
+/* #endif */
+		case k_ioctl_report_tx_queue :
+			bgp_dma_diag_report_transmission_queue((int __user *)arg) ;
+			break ;
+		case k_ioctl_report_hiatus_counts :
+			bgp_dma_diag_report_hiatus_counts((int __user *)arg) ;
+			break ;
+		case k_ioctl_report_bytes_transferred :
+			bgp_dma_diag_report_transfer_counts((int __user *)arg) ;
+			break ;
+	}
+  return 0;
+}
+
+void __init
+dma_tcp_devfs_procfs_init(dma_tcp_t * dma_tcp)
+{
+    bgpdmatcp_add_device(k_bgpdmatcp_major,0,"bgpdmatcp") ;
+    register_torus_sysctl(dma_tcp) ;
+}
+
diff --git a/drivers/net/bgp_torus/bgp_dma_memcpy.c b/drivers/net/bgp_torus/bgp_dma_memcpy.c
new file mode 100644
index 0000000..e916e78
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_memcpy.c
@@ -0,0 +1,1326 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: copy_tofrom_user using the BGP DMA hardware
+ *
+ *
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pagemap.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <asm/div64.h>
+#include <linux/vmalloc.h>
+#include <asm/atomic.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+#include "bgp_dma_tcp.h"
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+#include "../bgp_network/450_tlb.h"
+#include "bgp_memcpy.h"
+
+/* Machine memory geometry */
+enum {
+	k_l1_line_size = 32 ,
+	k_page_shift = PAGE_SHIFT ,
+	k_page_size = 1 << k_page_shift ,
+	k_page_offset_mask = k_page_size-1
+};
+/* How we are going to use the hardware */
+enum {
+	k_counters_per_core = 1 ,
+	k_spinlimit = 100000 ,
+	k_requires_fp = 0 ,
+	k_my_vc_for_adaptive = k_VC_anyway
+	/* 	k_my_vc_for_adaptive = k_VC_ordering */
+};
+/* What diagnostics/verification are we going to enable */
+enum {
+/* 	k_diagnose = 0 , */
+	k_diag_not_mapped = 1 ,
+	k_fromcheck_pre = 0 ,
+	k_fromcheck_post = 0,
+	k_tocheck_pre = 0,
+	k_tocheck_post = 0 ,
+	k_check_with_crc = 1 ,
+	k_flush_target_from_l1 = 0 ,
+	k_verify_dma = 1,
+	k_fixup_faulty_memcpy=1,
+	k_map_write_check=0 ,
+	k_disable_after_too_many_faults=1
+};
+
+/* value to let the counter get to when it is idle --- we do not want '0' because that would mean an interrupt */
+enum {
+	k_counter_idle_value = 0x00000010
+};
+
+
+enum {
+  k_InjectionFifoGroupMemcpy = 1 ,
+  k_ReceptionCounterGroupMemcpy = 1
+};
+
+/* For putting an 'msync'in where we don't think we should need it, but helping initial diagnostics */
+static inline void maybe_msync(void)
+{
+	_bgp_msync() ;
+}
+/* data cache block flush, evict the given line from L1 if it is there */
+static inline void dcbf(unsigned int a0,unsigned int a1)
+{
+	  asm volatile( "dcbf %[a0],%[a1]"
+                    :
+                    : [a0] "b" (a0), [a1] "b" (a1)
+                    ) ;
+}
+static inline void dcbf0(unsigned int a)
+{
+	  asm volatile( "dcbf 0,%[a]"
+                    :
+                    : [a] "b" (a)
+                    ) ;
+}
+static void flush_l1(void * address, unsigned int length)
+{
+	unsigned int address_int=(unsigned int) address ;
+	unsigned int address_end_int=address_int+length-1 ;
+	unsigned int line_start=address_int & ~(k_l1_line_size-1) ;
+	unsigned int line_end=address_end_int & ~(k_l1_line_size-1) ;
+	unsigned int line_count=(line_end-line_start)/k_l1_line_size + 1 ;
+	unsigned int x ;
+	unsigned int flush_address=line_start;
+	for(x=0;x<line_count;x+=1)
+		{
+			dcbf0(flush_address) ;
+			flush_address += k_l1_line_size ;
+		}
+}
+typedef struct {
+	unsigned int count ;
+	atomic_t in_use[k_counters_per_core] ;
+	unsigned int pad_to_line_size[(k_l1_line_size-k_counters_per_core-1)/sizeof(unsigned int)] ;
+} core_counter_allocation_t __attribute__((aligned(32)));
+
+static core_counter_allocation_t counter_allocation[k_injecting_cores] ;
+
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ;
+static int acquire_counter(void)
+{
+	unsigned int this_core=smp_processor_id();
+	core_counter_allocation_t * cci = counter_allocation + this_core ;
+	unsigned int prev_count = cci->count++ ;
+	unsigned int counter_index = prev_count & (k_counters_per_core-1) ;
+	int in_use = atomic_inc_return(cci->in_use+counter_index) ;
+	int rc=(1 == in_use) ? (counter_index + this_core*k_counters_per_core) : -1 ;
+	dma_tcp_t * dma_tcp=&dma_tcp_state ;
+	TRACEN(k_t_dmacopy,"prev_count=0x%08x counter_index=%d in_use=%d rc=%d",prev_count,counter_index,in_use,rc) ;
+	if( 1 == in_use)
+		{
+			 DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[rc].counter_hw_ptr,k_counter_idle_value,0,0x0fffffff) ;
+			 show_injection_fifo_state(dma_tcp, rc) ;
+		}
+	return rc ;
+
+}
+static void release_counter(unsigned int counter)
+{
+	unsigned int counter_index=counter % k_counters_per_core ;
+	unsigned int core_index=counter / k_counters_per_core ;
+	core_counter_allocation_t * cci = counter_allocation + core_index ;
+	TRACEN(k_t_dmacopy,"counter=%d core_index=%d counter_index=%d in_use=%d",counter,core_index,counter_index,atomic_read(cci->in_use+counter_index)) ;
+	atomic_set(cci->in_use+counter_index,0) ;
+}
+
+static void cause_fallback(void)
+{
+	TRACEN(k_t_request,"Turning off DMA memcpy") ;
+	bgp_memcpy_control.use_dma = 0 ;
+	dma_memcpy_statistic(k_copy_cause_fallback) ;
+}
+
+static unsigned int find_real_address(const void * virtual_address)
+{
+	struct page *realpage = NULL ;
+	int res ;
+        /* Try to fault in all of the necessary pages */
+	down_read(&current->mm->mmap_sem);
+	res = get_user_pages(
+		current,
+		current->mm,
+		(unsigned long) virtual_address,
+		1, /* One page */
+		0, /* intent read */
+		0, /* don't force */
+		&realpage,
+		NULL);
+	up_read(&current->mm->mmap_sem);
+
+	TRACEN(k_t_dmacopy,"find_real_address virtual_address=%p res=%d page=%p pfn=0x%08lx real_address=0x%016llx",
+			virtual_address,res,realpage,page_to_pfn(realpage),page_to_phys(realpage)) ;
+
+	if( 1 == res) /* Number of pages mapped, should be 1 for this call */
+		{
+			unsigned int rc = page_to_phys(realpage) ;
+			put_page(realpage) ;
+			return rc ;
+		}
+	return 0 ;
+
+}
+
+static unsigned int v_to_r_maybe_show(const void * vaddr)
+{
+     unsigned int vaddr_int=(unsigned int)vaddr ;
+     int tlbx=search_tlb_v(vaddr_int) ;
+     int pageid=get_tlb_pageid(tlbx) ;
+     int xlat=get_tlb_xlat(tlbx) ;
+     int attrib=get_tlb_attrib(tlbx) ;
+     int tlbx1=search_tlb_v((unsigned int)vaddr) ;
+     if( (tlbx == tlbx1)    /* Translation didn't change under me due to e.g. interrupt */
+		     && ((pageid & TLB0_V) != 0) /* TLB is valid */
+		     && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */
+		     )
+	     {
+			unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+			unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+			unsigned int result = (vaddr_int-epn) + rpn ;
+			TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+					vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+			return result ;
+
+	     }
+     else
+	     {
+			TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped",
+					vaddr,tlbx,pageid,tlbx1) ;
+			tlbx=search_tlb_v(vaddr_int) ;
+			pageid=get_tlb_pageid(tlbx) ;
+			xlat=get_tlb_xlat(tlbx) ;
+			attrib=get_tlb_attrib(tlbx) ;
+			tlbx1=search_tlb_v((unsigned int)vaddr) ;
+			{
+				unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+				unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+				unsigned int result = (vaddr_int-epn) + rpn ;
+				TRACEN(k_t_request,"retry vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+						vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+			}
+
+		     return (unsigned int) -1 ; // Not mapped
+	     }
+}
+
+static unsigned int v_to_r(const void * vaddr, tlb_t *t)
+{
+	unsigned int rc=v_to_r_maybe(vaddr,t) ;
+	unsigned int rc2=v_to_r_maybe(vaddr,t) ;
+	if( rc != rc2)
+		{
+			dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ;
+			rc=rc2 ;
+			rc2=v_to_r_maybe(vaddr,t) ;
+		}
+	if( rc != rc2)
+		{
+
+			dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ;
+			TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ;
+			return 0xffffffff ;
+		}
+	if( 0xffffffff == rc)  // Not mapped, touch the address and see what happens
+		{
+			unsigned int pageInt ;
+			int getrc = get_user(pageInt,(unsigned int __user *)vaddr ) ;
+			_bgp_msync() ;
+			if( getrc )
+				{
+					TRACEN(k_t_general,"Unmapped : %p",vaddr) ;
+					rc =(unsigned int) -1 ; // Not mapped
+				}
+			else
+				{
+					rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+					rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+					if( rc != rc2)
+						{
+							dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ;
+							rc=rc2 ;
+							rc2=v_to_r_maybe(vaddr,t) ;
+						}
+					if( rc != rc2)
+						{
+							dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ;
+							TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ;
+							return 0xffffffff ;
+						}
+					dma_memcpy_statistic(k_copy_tlb_touches) ;
+				}
+		}
+	return rc ;
+}
+static unsigned int v_to_r_write(const void * vaddr, tlb_t *t)
+{
+	unsigned int rc=v_to_r_maybe(vaddr,t) ;
+	unsigned int rc2=v_to_r_maybe(vaddr,t) ;
+	if( rc != rc2)
+		{
+			dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ;
+			rc=rc2 ;
+			rc2=v_to_r_maybe(vaddr,t) ;
+		}
+	if( rc != rc2)
+		{
+
+			dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ;
+			TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ;
+			return 0xffffffff ;
+		}
+	if( 0xffffffff == rc)  // Not mapped, touch the address and see what happens
+		{
+			unsigned int pageInt =0;
+			int putrc = get_user(pageInt,(unsigned int __user *)vaddr ) ;
+			_bgp_msync() ;
+			if( putrc )
+				{
+					TRACEN(k_t_general,"Unmapped : %p",vaddr) ;
+					rc =(unsigned int) -1 ; // Not mapped
+				}
+			else
+				{
+					rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+					rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+					if( rc != rc2)
+						{
+							dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ;
+							rc=rc2 ;
+							rc2=v_to_r_maybe(vaddr,t) ;
+						}
+					if( rc != rc2)
+						{
+							dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ;
+							TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ;
+							return 0xffffffff ;
+						}
+					dma_memcpy_statistic(k_copy_tlb_touches) ;
+				}
+		}
+	return rc ;
+}
+static inline void create_dma_descriptor_memcpy(dma_tcp_t *dma_tcp,
+                int injection_counter,
+                int reception_counter,
+                dma_addr_t dataAddr,
+                int msglen,
+                unsigned int offset,
+                DMA_InjDescriptor_t *desc
+		)
+{
+	    int ret1 __attribute((unused));
+	    TRACEN(k_t_dmacopy , "(>) memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc);
+	    if( 0 == msglen)
+		    {
+			    TRACEN(k_t_error , "(E) zero length memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc);
+		    }
+	    ret1 = DMA_LocalDirectPutDescriptor( desc,
+	                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+	                                     injection_counter,  /*  inj counter id */
+	                                     dataAddr,        /*  send offset */
+	                                     k_ReceptionCounterGroupMemcpy,        /*  rec ctr grp */
+	                                     reception_counter,
+	                                     offset,        /*  reception offset */
+	                                     msglen          /*  message length */
+	                                     );
+
+	    TRACEN(k_t_dmacopy , "(<) ret1=%d",ret1);
+
+}
+
+static void diagnose_injection_fifo(DMA_InjFifo_t       *f_ptr)
+{
+	int  free_space_0 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 		 				     0, /* Use shadow head */
+ 		 				     0);/* use shadow tail */
+	int  free_space_1 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 		 				     1, /* Use hardware head */
+ 		 				     0);/* use shadow tail */
+	TRACEN(k_t_request,"free_space_0=0x%08x free_space_1=0x%08x",free_space_0,free_space_1) ;
+}
+
+static void diagnose_injection_fifo_by_id(
+		DMA_InjFifoGroup_t    *fg_ptr,
+		int                    fifo_id
+		)
+{
+	diagnose_injection_fifo(&fg_ptr->fifos[fifo_id]) ;
+}
+
+static inline int inject_dma_descriptor_memcpy(dma_tcp_t *dma_tcp,
+		                           unsigned int desired_fifo,
+		                           DMA_InjDescriptor_t *desc)
+  {
+    int ret __attribute__((unused));
+    TRACEN(k_t_dmacopy , "(>) injecting desired_fifo=%d desc=%p",desired_fifo,desc);
+    maybe_msync() ;
+    ret = DMA_InjFifoInjectDescriptorById( &dma_tcp->memcpyInjFifoGroupFrames,
+                                            dma_tcp->memcpyInjFifoFramesIds[desired_fifo],
+                                            desc );
+    maybe_msync() ;
+     if(ret != 1 )
+ 	    {
+ 		    TRACEN(k_t_error,"(!!!) ret=%d",ret) ;
+ 		    diagnose_injection_fifo_by_id(
+ 				    &dma_tcp->memcpyInjFifoGroupFrames,
+                                    dma_tcp->memcpyInjFifoFramesIds[desired_fifo]
+                                                                    ) ;
+
+ 	    }
+
+    TRACEN(k_t_general , "(<) ret=%d",ret);
+    return 1 ;
+
+  }
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ;
+static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c) ;
+
+typedef struct {
+	void * address ;
+	const void * partner_address ;
+	unsigned int size ;
+} memcpy_control;
+
+static unsigned int dma_copy_partial(dma_tcp_t * dma_tcp,unsigned int counter_index, memcpy_control * mc,copy_op_t *c)
+{
+	void * address = mc->address ;
+	const void * partner_address = mc->partner_address ;
+	unsigned int size = mc->size ;
+	unsigned int address_int = (unsigned int) address ;
+	unsigned int partner_address_int = (unsigned int ) partner_address ;
+
+	unsigned int address_offset=address_int & k_page_offset_mask ;
+	unsigned int partner_address_offset=partner_address_int & k_page_offset_mask ;
+	unsigned int lim_address=min(size,k_page_size-address_offset) ;
+	unsigned int lim_partner_address=min(size,k_page_size-partner_address_offset) ;
+	unsigned int lim_size=min(lim_address,lim_partner_address) ;
+	if( k_diagnose) c->frag_index += 1;
+
+	TRACEN(k_t_dmacopy,"address=%p partner_address=%p size=0x%08x lim_size=0x%05x",
+			address,partner_address,size,lim_size) ;
+
+	mc->address = address+lim_size ;
+	mc->partner_address = partner_address+lim_size ;
+	mc->size = size-lim_size ;
+
+	return instrument_copy_user_address_within_page(dma_tcp,counter_index,address,lim_size,partner_address,c) ;
+}
+
+/* return 0 iff the range described fits within one page */
+static int crosses_page_boundary(const void * address, unsigned int size)
+{
+	unsigned int a=(unsigned int) address ;
+	unsigned int ae = a+size-1 ;
+	return (ae >> k_page_shift ) - (a >> k_page_shift) ;
+}
+static unsigned int dma_copy_full_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c)
+{
+	unsigned int rc ;
+	TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x",
+			address,partner_address,size) ;
+	rc=instrument_copy_user_address_within_page(dma_tcp,counter_index,address,size,partner_address,c) ;
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+static unsigned int dma_copy_full(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c)
+{
+	unsigned int rc=0 ;
+	memcpy_control mc ;
+	TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x",
+			address,partner_address,size) ;
+	mc.address=address ;
+	mc.partner_address=partner_address ;
+	mc.size=size ;
+	while(mc.size != 0 && rc == 0)
+		{
+			rc |= dma_copy_partial(dma_tcp,counter_index,&mc,c) ;
+		}
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+static unsigned int dma_copy_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int real_address, unsigned int partner_real_address, unsigned int size,copy_op_t *c)
+{
+	unsigned int full_frame_count=size / k_torus_link_payload_size ;
+	unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ;
+	unsigned int trailing_frame_size = size - full_frame_size ;
+	unsigned int rc=0 ;
+
+	DMA_InjDescriptor_t desc ;
+	TRACEN(k_t_dmacopy,"(>) counter_index=%d real_address=0x%08x partner_real_address=0x%08x size=0x%05x full_frame_count=%d full_frame_size=0x%08x trailing_frame_size=0x%08x",
+			counter_index,real_address,partner_real_address,size,full_frame_count,full_frame_size,trailing_frame_size) ;
+	if( k_requires_fp)
+		{
+			enable_kernel_fp() ;
+		}
+	if( full_frame_size > 0 )
+		{
+			create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address,full_frame_size,real_address,&desc) ;
+			inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ;
+			rc = 1 ;
+		}
+	if( trailing_frame_size > 0 )
+		{
+			show_injection_fifo_state(dma_tcp,counter_index) ;
+			create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address+full_frame_size,trailing_frame_size,real_address + full_frame_size,&desc) ;
+			inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ;
+			rc+=1 ;
+		}
+	return rc ;
+}
+
+static void spin_idle(unsigned int idlecount)
+{
+	unsigned int x ;
+	for(x=0;x<idlecount;x+=1)
+		{
+			asm volatile("nop;");
+		}
+}
+
+/* Engage in least-squares regression to estimate data rates */
+dma_statistic_t bgp_dma_rate ;
+static void rate_observe(dma_statistic_t * st,int x,int y)
+{
+  int s1 = st->s1 + 1;
+  int sx = st->sx + x;
+  long long int sxx = st->sxx + x*x ;
+  int sy = st->sy + y ;
+  long long int sxy = st->sxy + x*y ;
+
+
+  st->s1 = s1 ;
+  st->sx = sx ;
+  st->sxx = sxx ;
+  st->sy = sy ;
+  st->sxy = sxy ;
+
+  if( ((s1 >> 1) & 0xff ) <= bgp_memcpy_control.rate_observe_report_count ) /* Sample a few */
+	  {
+		  long long det=s1*sxx-((long long)sx)*sx ;
+		  long long m0 = s1*sxy - ((long long)sx)*sy ;
+		  long long m1 = sxx*sy -sx*sxy ;
+		  unsigned long long q0 = m0 ;
+		  unsigned long long q1 = m1 ;
+		  unsigned int uidet = det ;
+		  if( uidet != 0)
+			  {
+				  do_div(q0,uidet) ;
+				  do_div(q1,uidet) ;
+			  }
+		  else
+			  {
+				  q0 = 0 ;
+				  q1 = 0 ;
+			  }
+
+		  TRACEN(k_t_request,"x=%d y=%d s1=%d sx=%d sxx=%lld sy=%d sxy=%lld det=%lld m0=%lld m1=%lld q0=%lld q1=%lld",
+				  x,y,s1,sx,sxx,sy,sxy,det,m0,m1,q0,q1) ;
+	  }
+
+}
+static int await_copy_completion(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int size )
+{
+	int rc=0 ;
+       unsigned int  fifo_current_head =
+	(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+       unsigned int fifo_initial_head = fifo_current_head ;
+       unsigned int  fifo_tail =
+	(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+       unsigned int spincount = 0 ;
+       unsigned int initial_rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+       unsigned int idlecount=bgp_memcpy_control.cycles_per_packet*size/256 ;
+	TRACEN(k_t_dmacopy,"(>) counter_index=%d size=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec_counter_val=%d idlecount=%d",
+			counter_index,size,fifo_current_head,fifo_tail,initial_rec_counter_val,idlecount) ;
+	show_injection_fifo_state(dma_tcp,counter_index) ;
+	spin_idle(idlecount) ;
+	maybe_msync() ;
+	{
+		int rec_counter_after_idle=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+		int rec_counter_val = rec_counter_after_idle ;
+		if( rec_counter_after_idle > 0)
+			  {
+				  rate_observe(&bgp_dma_rate, 0,0) ;
+				  rate_observe(&bgp_dma_rate, idlecount,initial_rec_counter_val-rec_counter_after_idle) ;
+			  }
+/* 		while(fifo_current_head != fifo_tail && spincount < k_spinlimit ) */
+/* 			{ */
+/* 			       fifo_current_head = */
+/* 				(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */
+/* 	//			fifo_current_tail = */
+/* 	//			(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */
+/* 				spincount += 1 ; */
+/* 			} */
+		while( rec_counter_val > k_counter_idle_value && spincount < k_spinlimit )
+			{
+				maybe_msync() ;
+				rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+				spincount += 1 ;
+			}
+		maybe_msync() ;
+		DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+	       fifo_current_head =
+		(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+		{
+/* 		unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; */
+		dma_memcpy_statistic((0==spincount) ? k_copy_await_idle_zero : ((1==spincount) ? k_copy_await_idle_high : k_copy_await_idle_low)) ;
+		TRACEN(k_t_dmacopy,
+				"size=0x%08x fifo_initial_head=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec=%d after_idle=%d rec=%d spincount=%d idlecount=%d",
+				size,fifo_initial_head,fifo_current_head,fifo_tail,initial_rec_counter_val,rec_counter_after_idle,rec_counter_val,spincount,idlecount) ;
+		if( fifo_current_head != fifo_tail || rec_counter_val != k_counter_idle_value)
+			{
+				rc=1 ;
+				TRACEN(k_t_error,"(E) fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d",
+						fifo_current_head,fifo_tail,spincount,rec_counter_val) ;
+			}
+		TRACEN(k_t_dmacopy,"(<) rc=%d fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d",rc,fifo_current_head,fifo_tail,spincount,rec_counter_val) ;
+		}
+	}
+	return rc ;
+}
+
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index)
+{
+       unsigned int  fifo_current_head =
+	(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+       unsigned int  fifo_current_tail =
+	(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+       unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+       unsigned int rec_counter_base=DMA_CounterGetBaseHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ;
+       unsigned int rec_counter_max=DMA_CounterGetMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ;
+       unsigned int enabled=DMA_CounterGetEnabled(&dma_tcp->memcpyRecCounterGroup,0) ;
+	TRACEN(k_t_dmacopy,"counter_index=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x rec_counter_val=0x%08x base=0x%08x max=0x%08x enabled=0x%08x",
+			counter_index,fifo_current_head,fifo_current_tail,rec_counter_val,rec_counter_base,rec_counter_max,enabled) ;
+
+}
+
+static inline int next_prbs(int seed)
+{
+	int ncmask = seed >> 31 ;  /*  0x00000000 or 0xffffffff */
+	return (seed << 1) ^ (0x04C11DB7 & ncmask) ;   /*  CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+}
+
+static inline unsigned int rc_revise(unsigned int X0, unsigned int X1)
+{
+	if(k_check_with_crc)
+		{
+			return next_prbs(X0) ^ X1 ;
+		}
+	else
+		{
+			return X0+X1 ;
+		}
+
+}
+static unsigned int region_check_int(const unsigned int * ai, unsigned int intcount)
+{
+	unsigned int x ;
+	unsigned int rc=0 ;
+	for(x=0;x<intcount;x+=1)
+		{
+			rc=rc_revise(rc,*(ai++)) ;
+		}
+	return rc ;
+
+}
+static unsigned int region_check(const void * addr, unsigned int size)
+{
+	const unsigned int * ai = (const unsigned int *) addr ;
+	unsigned int intcount = size/sizeof(int) ;
+	unsigned int tailcount = size % sizeof(int) ;
+	unsigned int rc = region_check_int(ai,intcount) ;
+	if(tailcount )
+		{
+			const unsigned char * ac = (const unsigned char *) addr ;
+			unsigned int tail = (ac[size-3] << 16) | (ac[size-3] << 8) | ac[size-1] ;
+			rc=rc_revise(rc,tail) ;
+		}
+	return rc ;
+
+}
+static void report_faulty_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c)
+{
+	unsigned int * di = (unsigned int *) dest ;
+	const unsigned int * si = (const unsigned int *) src ;
+	unsigned char * dc = (unsigned char *) (dest) ;
+	const unsigned char * sc = (const unsigned char *) (src) ;
+	unsigned int x ;
+	unsigned int faultwordcount = 0 ;
+	unsigned int zsourcecount = 0 ;
+	v_to_r_maybe_show(dest) ;
+	v_to_r_maybe_show(src) ;
+	c->to_check_post=region_check(dest,size) ;
+	if( k_disable_after_too_many_faults)
+		{
+			int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ;
+			if( faults_to_go <= 0 )
+				{
+					cause_fallback() ;
+				}
+			else
+				{
+					bgp_memcpy_control.faults_until_disable=faults_to_go ;
+				}
+		}
+	dma_memcpy_statistic(k_copy_verify_miscompares) ;
+	TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ;
+	for(x=0;x<size/sizeof(unsigned int);x+=1)
+		{
+			unsigned int sx = si[x] ;
+			unsigned int dx = di[x] ;
+			zsourcecount += (0 == sx) ;
+			if( dx != sx )
+				{
+					if( faultwordcount < 10 )
+						{
+							TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x",
+									x,di+x,si+x,dx,sx) ;
+						}
+					if( k_fixup_faulty_memcpy) di[x]=sx ;
+					faultwordcount += 1 ;
+				}
+		}
+	if( dc[size-3] != sc[size-3])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ;
+			if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ;
+		}
+	if( dc[size-2] != sc[size-2])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ;
+			if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ;
+		}
+	if( dc[size-1] != sc[size-1])
+		{
+			TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+					size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ;
+			if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ;
+		}
+	TRACEN(k_t_error,"%d/%ld words incorrectly copied, %d sourcewords were zero",faultwordcount,size/sizeof(unsigned int),zsourcecount) ;
+	v_to_r_maybe_show(dest) ;
+	v_to_r_maybe_show(src) ;
+	show_stack(NULL,0) ;
+	c->from_check_post=region_check(src,size) ;
+	diagnose_faulty_copy(c) ;
+}
+/*  Check that a 'memcpy' was accurately done ... */
+static int verify_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c)
+{
+	unsigned int * di = (unsigned int *) dest ;
+	const unsigned int * si = (const unsigned int *) src ;
+	unsigned char * dc = (unsigned char *) (dest) ;
+	const unsigned char * sc = (const unsigned char *) (src) ;
+	unsigned int q = di[0] ^ si[0] ;
+	unsigned int x ;
+	dma_memcpy_statistic(k_copy_verify_attempts) ;
+	TRACEN(k_t_dmacopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ;
+	for(x=1;x<size/sizeof(unsigned int);x+=1)
+		{
+			q |= *(++di) ^ *(++si) ;
+		}
+	q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ;
+	if(q) report_faulty_memcpy(dest,src,size,c) ;
+	return q ;
+}
+
+static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c)
+{
+	unsigned int addr_int =(unsigned int) address ;
+	unsigned int start_page=(addr_int >> k_page_shift) ;
+	unsigned int end_page=((addr_int+size-1) >> k_page_shift) ;
+	unsigned int partner_address=(unsigned int) partner_vaddr ;
+	unsigned int partner_start_page=(partner_address >> k_page_shift) ;
+	unsigned int partner_end_page=((partner_address+size-1) >> k_page_shift) ;
+	TRACEN(k_t_dmacopy,"counter_index=%d address=%p size=0x%08lx partner_vaddr=%p start_page=0x%08x end_page=0x%08x partner_start_page=0x%08x partner_end_page=0x%08x",
+			counter_index,address,size,partner_vaddr,start_page,end_page,partner_start_page,partner_end_page) ;
+	maybe_msync() ;
+	if( end_page == start_page && partner_end_page == partner_start_page)
+		{
+			unsigned int real_address=v_to_r( address,&c->a_tlb) ;
+			unsigned int real_address_tablewalk=find_real_address(address) ;
+			unsigned int partner_real_address=v_to_r_write(partner_vaddr,&c->b_tlb) ;
+			unsigned int partner_real_address_tablewalk=find_real_address(partner_vaddr) ;
+			TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ;
+			if( k_diagnose)
+				{
+					c->a_raddress=real_address ;
+					c->b_raddress=partner_real_address ;
+				}
+			if( 0xffffffff != real_address && 0xffffffff != partner_real_address)
+				{
+					unsigned int injection_count ;
+					TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ;
+					if( k_flush_target_from_l1)
+						{
+							flush_l1(address,size) ;
+						}
+					injection_count=dma_copy_within_page(dma_tcp,counter_index,real_address,partner_real_address,size,c) ;
+					return 0 ;
+
+				}
+			if( 0xffffffff == real_address ) dma_memcpy_statistic(k_copy_source_tlb_rejects) ;
+			if( 0xffffffff == partner_real_address ) dma_memcpy_statistic(k_copy_target_tlb_rejects) ;
+			return 1 ;
+		}
+	dma_memcpy_statistic(k_copy_spanpage_rejects) ;
+	return 1 ;  // At least one of the addresses wasn't mapped, or things spanned a page boundary
+
+}
+
+static int instrument_copy_user_address(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c)
+{
+	int rc ;
+	{
+	rc= dma_copy_full(dma_tcp,counter_index,address, partner_vaddr,size,c) ;
+	if( 0 == rc)
+		{
+			rc = await_copy_completion(dma_tcp,counter_index,size) ;
+		}
+	}
+	if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma)
+		{
+				{
+					rc = verify_memcpy(address, partner_vaddr,  size,c) ;
+					if(rc)
+						{
+							TRACEN(k_t_error,"trapped") ;
+						}
+				}
+		}
+	return rc ;
+
+}
+static int instrument_copy_user_address_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c)
+{
+	int rc ;
+	{
+	rc= dma_copy_full_singlepage(dma_tcp,counter_index, address, partner_vaddr,size,c) ;
+	if( 0 == rc)
+		{
+			rc = await_copy_completion(dma_tcp,counter_index,size) ;
+		}
+	}
+	if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma)
+		{
+				{
+					rc = verify_memcpy(address, partner_vaddr,  size,c) ;
+					if(rc)
+						{
+							TRACEN(k_t_error,"trapped") ;
+						}
+				}
+		}
+	return rc ;
+
+}
+static int instrument_copy_user(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c)
+{
+	dma_tcp_t * dma_tcp=&dma_tcp_state ;
+	dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE);
+	int rc ;
+	TRACEN(k_t_dmacopy,"(>)") ;
+	maybe_msync() ;
+	DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ;
+	 show_injection_fifo_state(dma_tcp, counter_index) ;
+	DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+	 show_injection_fifo_state(dma_tcp, counter_index) ;
+	maybe_msync() ;
+        DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId,  0xffffffff );
+        _bgp_msync() ;
+	rc= instrument_copy_user_address(dma_tcp,counter_index,to,size,fromAddr,(void *)from,c) ;
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+static int instrument_copy_user_singlepage(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c)
+{
+	dma_tcp_t * dma_tcp=&dma_tcp_state ;
+	dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE);
+	int rc ;
+	TRACEN(k_t_dmacopy,"(>)") ;
+	maybe_msync() ;
+	 show_injection_fifo_state(dma_tcp, counter_index) ;
+	DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ;
+	 show_injection_fifo_state(dma_tcp, counter_index) ;
+	DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+	 show_injection_fifo_state(dma_tcp, counter_index) ;
+	maybe_msync() ;
+        DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId,  0xffffffff );
+        _bgp_msync() ;
+	rc= instrument_copy_user_address_singlepage(dma_tcp,counter_index,to,size,fromAddr,from,c) ;
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+static int instrument_copy_tofrom_user(void * to, const void * from, unsigned long size,copy_op_t *c)
+{
+	int rc=1 ;
+	int counter_index=acquire_counter() ;
+	TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ;
+	if( counter_index >= 0)
+		{
+			rc= instrument_copy_user(to,from,size,counter_index,c) ;
+			release_counter(counter_index) ;
+		}
+	else
+		{
+			dma_memcpy_statistic(k_copy_no_counter_rejects) ;
+		}
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+
+static int instrument_copy_tofrom_user_singlepage(void *to, const void * from, unsigned long size,copy_op_t *c)
+{
+	int rc=1 ;
+	int counter_index=acquire_counter() ;
+	TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ;
+	if( counter_index >= 0)
+		{
+			rc= instrument_copy_user_singlepage(to,from,size,counter_index,c) ;
+			release_counter(counter_index) ;
+		}
+	else
+		{
+			dma_memcpy_statistic(k_copy_no_counter_rejects) ;
+		}
+	TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+	return rc ;
+}
+
+static int all_pages_mapped_read(unsigned long address, unsigned long size)
+{
+	unsigned int start_page=(address >> k_page_shift) ;
+	unsigned int end_page=((address+size) >> k_page_shift) ;
+	unsigned int page_count = end_page-start_page+1 ;
+	unsigned int x ;
+	if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+	 /*  Defend against the possibility that the user application has posted an unmapped address */
+	for(x=0;x<page_count;x+=1)
+		{
+			int pageInt ;
+			int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift)  ;
+			if( get_user(pageInt,pageIntP) )
+				{
+					TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+					if( k_diag_not_mapped)
+					{
+						tlb_t t ;
+						unsigned int r=v_to_r_maybe((void *)address, &t) ;
+						TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+						TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+						diagnose_tlb(&t) ;
+					}
+
+					return 1;
+				}
+
+		}
+	return 0 ;
+}
+
+static int all_pages_mapped_write(unsigned long address, unsigned long size)
+{
+	unsigned int start_page=(address >> k_page_shift) ;
+	unsigned int end_page=((address+size) >> k_page_shift) ;
+	unsigned int page_count = end_page-start_page+1 ;
+	unsigned int x ;
+/* 	int pageInt ; */
+	char __user * pageCharP = (char __user *) address ;
+	if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+	if(put_user(0,pageCharP))
+		{
+			TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+			if( k_diag_not_mapped)
+			{
+				tlb_t t ;
+				unsigned int r=v_to_r_maybe((void *)address, &t) ;
+				TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+				TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+				diagnose_tlb(&t) ;
+			}
+
+			return 1;
+		}
+	 /*  Defend against the possibility that the user application has posted an unmapped address */
+	for(x=1;x<page_count;x+=1)
+		{
+/* 			int pageInt ; */
+			char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift)  ;
+/*  TODO: Fix this up against the possibility of 0..2 bytes at the start of the last page */
+			if( put_user(0,pageCharP) )
+				{
+					TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+					if( k_diag_not_mapped)
+					{
+						tlb_t t ;
+						unsigned int r=v_to_r_maybe((void *)address, &t) ;
+						TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+						TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+						diagnose_tlb(&t) ;
+					}
+
+					return 1;
+				}
+
+		}
+	return 0 ;
+}
+
+/* Returns 1 if we could DMA-copy things, 0 if we couldn't */
+extern unsigned long bgp_dma_instrument_copy_tofrom_user(void  *to,
+                const void *from, unsigned long size)
+{
+	TRACEN(k_t_general,"to=%p from=%p size=0x%08lx",to,from,size) ;
+	dma_memcpy_statistic(k_copy_tofrom_user_calls) ;
+	if( size > 0 && size >= bgp_memcpy_control.dma_threshold )
+		{
+			copy_op_t c ;
+			TRACEN(k_t_dmacopy,"to=%p from=%p size=0x%08lx",to,from,size) ;
+			if( all_pages_mapped_read((unsigned long) from,size))
+				{
+					dma_memcpy_statistic(k_copy_source_rejects) ;
+					return 1 ;
+				}
+			if( k_map_write_check && all_pages_mapped_write((unsigned long) to,size))
+				{
+					dma_memcpy_statistic(k_copy_target_rejects) ;
+					return 1 ;
+				}
+			if( k_diagnose)
+				{
+					c.to_vaddr=to ;
+					c.from_vaddr=(void *)from ;
+					c.size=size ;
+					c.frag_index=0 ;
+					c.from_check_post = 0xffffffff ;
+					c.to_check_pre = 0xffffffff ;
+					c.to_check_post = 0xffffffff ;
+					if(k_fromcheck_pre)
+						{
+							c.from_check_pre=region_check((void *)from,size) ;
+						}
+					else
+						{
+							c.from_check_pre = 0xffffffff ;
+						}
+					if(k_tocheck_pre)
+						{
+							c.to_check_pre=region_check(to,size) ;
+						}
+					else
+						{
+							c.to_check_pre = 0xffffffff ;
+						}
+				}
+
+
+			if( crosses_page_boundary(from,size) || crosses_page_boundary(to,size))
+				{
+					if( bgp_memcpy_control.handle_pagecrossing)
+						{
+
+							unsigned long rc= instrument_copy_tofrom_user(to,from,size,&c) ;
+							dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+							TRACEN(k_t_dmacopy,"rc=%ld",rc) ;
+							if(k_diagnose && 0 == rc )
+								{
+									if(k_fromcheck_post)
+										{
+											c.from_check_post=region_check(from,size) ;
+										}
+									if(k_tocheck_post)
+										{
+											c.to_check_post=region_check(to,size) ;
+										}
+									if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre)
+										||
+										(k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post)
+										||
+										(k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post)
+										)
+										{
+											diagnose_faulty_copy(&c) ;
+											return 1 ;
+										}
+								}
+							return rc ;
+						}
+					else
+						{
+							dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+							return 1 ;
+						}
+				}
+			else
+				{
+					{
+						unsigned long rc= instrument_copy_tofrom_user_singlepage(to,from,size,&c) ;
+						dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+						TRACEN(k_t_dmacopy,"rc=%ld",rc) ;
+						if(k_diagnose && 0 == rc )
+							{
+								if(k_fromcheck_post)
+									{
+										c.from_check_post=region_check(from,size) ;
+									}
+								if(k_tocheck_post)
+									{
+										c.to_check_post=region_check(to,size) ;
+									}
+								if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre)
+									||
+									(k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post)
+									||
+									(k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post)
+									)
+									{
+										diagnose_faulty_copy(&c) ;
+										return 1 ;
+									}
+							}
+
+						return rc ;
+					}
+
+				}
+		}
+	dma_memcpy_statistic(k_copy_size_rejects) ;
+	return 1 ; // Not copied, size under threshold
+
+}
+
+static struct ctl_table dma_memcpy_table[] = {
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "counter_allocation_0",
+	                .data           = counter_allocation+0,
+	                .maxlen         = sizeof(core_counter_allocation_t),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "counter_allocation_1",
+	                .data           = counter_allocation+1,
+	                .maxlen         = sizeof(core_counter_allocation_t),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "counter_allocation_2",
+	                .data           = counter_allocation+2,
+	                .maxlen         = sizeof(core_counter_allocation_t),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "counter_allocation_3",
+	                .data           = counter_allocation+3,
+	                .maxlen         = sizeof(core_counter_allocation_t),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        { 0 },
+} ;
+
+static struct ctl_path dma_memcpy_ctl_path[] = {
+	{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "dmacopy", .ctl_name = 0, },
+	{ },
+};
+
+static void __init
+bgp_dma_memcpy_init_counter_allocation(void)
+{
+	unsigned int core_index ;
+	       register_sysctl_paths(dma_memcpy_ctl_path,dma_memcpy_table) ;
+	for(core_index=0;core_index<k_injecting_cores;core_index+=1)
+		{
+			core_counter_allocation_t * cci = counter_allocation + core_index ;
+			unsigned int counter_index ;
+			cci->count =  0;
+			for(counter_index=0;counter_index<k_counters_per_core;counter_index+=1)
+				{
+					atomic_set(cci->in_use+counter_index,0) ;
+				}
+
+		}
+	TRACEN(k_t_init,"counter_allocation initialised") ;
+}
+
+/*  This gets driven in the FLIH when a DMA interrupt occurs */
+static void dummyCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+	TRACEN(k_t_error,"(>) Unexpected interrupt" );
+	TRACEN(k_t_error,"(<)" );
+}
+
+/* 'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs */
+/* seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to */
+/* try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed */
+/* to make it work. */
+void __init
+bgp_dma_memcpyInit(dma_tcp_t * dma_tcp)
+{
+	       bgp_dma_memcpy_init_counter_allocation() ;
+    {
+      int counter_index ;
+      for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1  )
+        {
+              dma_tcp->memcpyInjFifoFramesPri[ counter_index ] = 0 ;
+              dma_tcp->memcpyInjFifoFramesLoc[ counter_index ] = 1 ;
+              dma_tcp->memcpyInjFifoFramesIds[ counter_index ] = counter_index ;
+              dma_tcp->memcpyInjFifoFramesMap[ counter_index ] = 0;  /*  'memcpy' injector not connected to torus */
+        }
+    }
+    {
+      int ret = DMA_InjFifoGroupAllocate( k_InjectionFifoGroupMemcpy,
+          k_injecting_cores,   /*  num inj fifos */
+                                  dma_tcp->memcpyInjFifoFramesIds,
+                                  dma_tcp->memcpyInjFifoFramesPri,
+                                  dma_tcp->memcpyInjFifoFramesLoc,
+                                  dma_tcp->memcpyInjFifoFramesMap,
+                                  NULL,
+                                  NULL,
+                                  NULL,
+                                  NULL,
+                                  NULL,
+                                  & dma_tcp->memcpyInjFifoGroupFrames );
+
+      TRACEN(k_t_init,"(=)DMA_InjFifoGroupAllocate rc=%d", ret );
+
+      if( 0 == ret)
+    {
+      int counter_index ;
+      for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1  )
+        {
+        	TRACEN(k_t_init,"fg_ptr=%p fifo_id=%d va_start=%p va_head=%p va_end=%p",
+        			&dma_tcp->memcpyInjFifoGroupFrames,
+        			dma_tcp->memcpyInjFifoFramesIds[counter_index],
+        			dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+        			dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+        			dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1
+        			) ;
+        	{
+              int ret = DMA_InjFifoInitById( &dma_tcp->memcpyInjFifoGroupFrames,
+                  dma_tcp->memcpyInjFifoFramesIds[counter_index],
+                  dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+                  dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,   /*  head */
+                  dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1   /*  end */
+                                 );
+
+              dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head =
+                (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+              TRACEN(k_t_init,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head);
+        	}
+        }
+    }
+     /*  Set up a reception counter for 'memcpy' */
+        {
+           /*  Initialize reception counter group */
+          int ret  __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception,
+        		  k_ReceptionCounterGroupMemcpy,  /*  group number */
+              DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+                                dma_tcp->memcpyRecCntrSubgrps,
+/*  TODO: Not really taking interrupts from this counter group, but maybe it has to be coherent ? */
+//       	                                0,   /*  target core for interrupts */
+//       	                                NULL, /* Not planning to take interrupts from memcpy counters */
+                                2,   /*  target core for interrupts */
+                                dummyCounterZeroHandler,
+                                NULL,
+                                NULL,
+                                & dma_tcp->memcpyRecCounterGroup );
+          TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+        }
+/* 		    { */
+/* 		      int counter_index ; */
+/* 		      for( counter_index=0; counter_index< DMA_NUM_COUNTERS_PER_GROUP; counter_index += 1  ) */
+/* 			      { */
+/* 				      DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */
+/* 				      DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,k_counter_idle_value,0,0xffffffff) ; */
+/* 			      } */
+/* 			_bgp_msync() ; */
+/* //		      for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1  ) */
+/* //			      { */
+/* //					DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */
+/* //			      } */
+/* 			_bgp_msync() ; */
+/* 		    } */
+
+
+
+}
+}
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.c b/drivers/net/bgp_torus/bgp_dma_tcp.c
new file mode 100644
index 0000000..cb0ded8
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp.c
@@ -0,0 +1,1069 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ * Intent: Send a 'request block' to the partner's memory FIFO
+ *         Partner initiates a 'remote read' from me
+ *         Partner sends a 'response block' to my FIFO to say the data is transferred
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+#include <net/tcp_hiatus.h>
+
+#include <spi/linux_kernel_spi.h>
+#include <asm/bgcns.h>
+
+#include "bgp_dma_tcp.h"
+
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+
+/* #define TRUST_TORUS_CRC */
+
+#define SEND_SHORT_FRAMES_INLINE
+#define ENABLE_TUNING
+
+#define ENABLE_LEARNING_ADDRESSES
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/*  Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+
+/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */
+/* #define TORNIC_STEAL_POLL_CORE */
+/* #endif */
+
+#endif
+
+
+/* #define REQUIRES_DUMPMEM */
+
+/* #if defined(CONFIG_BLUEGENE_TORUS_TRACE) */
+/* int bgp_dma_tcp_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion ; // | k_t_scattergather ;
+/* int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion |k_t_irqflow|k_t_irqflow_rcv; */
+/* int bgp_dma_tcp_tracemask = 0xffffffff ; */
+/* int bgp_dma_tcp_tracemask =  k_t_request | k_t_error ; */
+/* #endif */
+
+/* extern int sysctl_somaxconn ; // listening socket backlog, will want to increase this to allow at least 'n' SYNs per node in the block */
+/* #define DEBUG_CLEAR_SKB */
+
+//extern int bgp_dma_irq ;  /*  Interrupt number that the torus is using */
+
+enum {
+	k_fifo_irq = 124 ,  /*  Linux interrupt number for 'fifo threshold crossing' interrupt */
+	k_rec_counter_irq = 132   /*  Linux interrupt number for 'reception counter hit zero' interrupt */
+};
+
+enum {
+	k_find_source_of_rst_flags = 1 /* Whether to enable making a fuss about the source of a 'rst' frame */
+};
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR)
+#define TORNIC_TORUS_AFFINITY
+#endif
+
+enum {
+  k_TorusAffinityCPU =
+#if defined(TORNIC_TORUS_AFFINITY)
+	  2
+#else
+	  0
+#endif
+};
+
+extern cpumask_t cpu_nouser_map;   /*  Added to support 'steal' of core prior to long-running softirq */
+
+int  __init
+dma_tcp_module_init    (void);
+/* void __exit dma_tcp_module_cleanup (void); */
+
+/* module_init(dma_tcp_module_init); */
+/* module_exit(dma_tcp_module_cleanup); */
+
+#if defined(CONFIG_BGP_STATISTICS)
+int rtt_histogram[33] ;
+int transit_histogram[33] ;
+#endif
+
+
+MODULE_DESCRIPTION("BG/P sockets over torus DMA driver");
+MODULE_LICENSE("GPL");
+
+
+#define TCP_DMA_NAME  "tcp_bgp_dma"
+#ifndef CTL_UNNUMBERED
+#define CTL_UNNUMBERED -2
+#endif
+
+/*  Routines related to interrupt management from bgp_bic.c */
+void bic_disable_irq(unsigned int irq) ;   /*  Intended to be called from a FLIH to indicate that this interrupt will not fire again */
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;  /*  Intended to indicate which core will take the next interrupt of this type. Doesn't explocitly enable but other async things may enable */
+void bic_unmask_irq(unsigned int irq) ;  /*  Explicitly enable this interrupt */
+
+
+
+#define ENABLE_TIMESTAMP_TRACKING
+enum {
+  k_FLIH_Entry ,
+  k_FLIH_Exit ,
+  k_SLIH_Entry ,
+  k_SLIH_Exit ,
+  k_Poll_Entry ,
+  k_Poll_Exit ,
+  k_Enable ,
+  k_CouldEnable ,
+  k_Quantity
+};
+
+static char *timestamp_names[] = {
+    "k_FLIH_Entry" ,
+    "k_FLIH_Exit" ,
+    "k_SLIH_Entry" ,
+    "k_SLIH_Exit" ,
+    "k_Poll_Entry" ,
+    "k_Poll_Exit" ,
+    "k_Enable" ,
+    "k_CouldEnable"
+};
+
+typedef struct {
+  unsigned int hi ;
+  unsigned int lo ;
+} timestamp_t ;
+
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+enum {
+  k_TimestampRingSize = 8
+};
+
+typedef struct {
+  unsigned int current_index ;
+  timestamp_t timestamp[k_TimestampRingSize] ;
+} timestamp_ring_t;
+
+static timestamp_ring_t timestamp_ring[k_Quantity] ;
+#endif
+
+static void record_timestamp(unsigned int x)
+  {
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+    unsigned int tbhi = get_tbu();
+    unsigned int tblo = get_tbl();
+    unsigned int tbhi2 = get_tbu();
+    unsigned int tblo2 = ( tbhi == tbhi2 ) ? tblo : 0 ;
+    timestamp_ring_t *tr = timestamp_ring+x ;
+    unsigned int cx=tr->current_index ;
+    unsigned int cxm=cx&(k_TimestampRingSize-1) ;
+    tr->timestamp[cxm].hi = tbhi2 ;
+    tr->timestamp[cxm].lo = tblo2 ;
+    TRACEN(k_t_detail,"Timestamp %s[%d] = 0x%08x%08x",timestamp_names[x],cx,tbhi2,tblo2) ;
+    tr->current_index=cx+1 ;
+#endif
+  }
+
+static void show_timestamps(void)
+  {
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+    int x ;
+    TRACEN(k_t_detail,"(>)") ;
+    for(x=0;x<k_Quantity;x+=1)
+      {
+        timestamp_ring_t *tr = timestamp_ring+x ;
+        unsigned int cx=tr->current_index ;
+        int q ;
+        for(q=-k_TimestampRingSize;q<0 ; q+=1)
+          {
+            unsigned int cxm=(cx+q)&(k_TimestampRingSize-1) ;
+            TRACEN(k_t_request,"Timestamp %s[%03d] = 0x%08x%08x",timestamp_names[x],q,tr->timestamp[cxm].hi,tr->timestamp[cxm].lo) ;
+          }
+      }
+    TRACEN(k_t_detail,"(<)") ;
+#endif
+  }
+
+static void init_tuning(dma_tcp_t *dma_tcp)
+  {
+#if defined(CONFIG_BLUEGENE_TCP)
+    dma_tcp->bluegene_tcp_is_built = 1 ;
+#else
+    dma_tcp->bluegene_tcp_is_built = 0 ;
+#endif
+    dma_tcp->tuning_num_packets = 0x7fffffff ;  /*  up from '1', used 16 at one time */
+#if defined(KEEP_TCP_FLAG_STATS)
+    dma_tcp->tcp_received_flag_count[0] = 0 ;
+    dma_tcp->tcp_received_flag_count[1] = 0 ;
+    dma_tcp->tcp_received_flag_count[2] = 0 ;
+    dma_tcp->tcp_received_flag_count[3] = 0 ;
+    dma_tcp->tcp_received_flag_count[4] = 0 ;
+    dma_tcp->tcp_received_flag_count[5] = 0 ;
+    dma_tcp->tcp_received_flag_count[6] = 0 ;
+    dma_tcp->tcp_received_flag_count[7] = 0 ;
+#endif
+#if defined(TORNIC_DEV_POLL)
+#if defined(TORNIC_STEAL_POLL_CORE)
+     /*     dma_tcp->tuning_num_empty_passes = 1000000 ; // Try 1 second 'spin' if no data coming */
+    dma_tcp->tuning_num_empty_passes = 5000 ;  /*  Try 5 millisecond 'spin' if no data coming if we have a whole core for it */
+    dma_tcp->tuning_non_empty_poll_delay = 850 ;
+#else
+     /*  Sharing a core, but with 'poll' NAPI */
+    dma_tcp->tuning_num_empty_passes = 1 ;  /*  Try 10 microsecond 'spin' if no data coming if we are sharing core with app */
+    dma_tcp->tuning_non_empty_poll_delay = 1 ;
+#endif
+#else
+     /*  'interrupts' NAPI */
+    dma_tcp->tuning_num_empty_passes = 1 ;  /*  Try 10 microsecond 'spin' if no data coming if we are sharing core with app */
+    dma_tcp->tuning_non_empty_poll_delay = 1 ;
+#endif
+    dma_tcp->tuning_poll_after_enabling = 1 ;  /*  changed from 0 on 20080619 */
+    dma_tcp->tuning_run_handler_on_hwi = 0 ;  /*  was 1 */
+    dma_tcp->tuning_clearthresh_slih = 1 ;  /*  = 0 , whether to clear the 'threshold crossed' bit in the slih */
+    dma_tcp->tuning_clearthresh_flih = 0 ;  /*  = 0 , whether to clear the 'threshold crossed' bit in the flih */
+    dma_tcp->tuning_disable_in_dcr = 1 ;  /*  = 1, whether to toggle the DCR interrupt enable/disable */
+    dma_tcp->tuning_exploit_reversepropose = 1 ;  /*  which way to run the propose/accept protocol */
+    dma_tcp->tuning_counters_per_source = 0 ;  /*  Max reception counters to commit per source node (0 indicates to use 'shareout' algorithm */
+    dma_tcp->tuning_min_icsk_timeout = 200 ;  /*  Push TCP timeout on torus up to 200 jiffies, we think we have a reliable network ... */
+    dma_tcp->tuning_injection_hashmask = 3 ;  /*  = 3, whether to mask down the number of injection fifos per direction */
+    dma_tcp->tuning_virtual_channel = k_VC_anyway ; /* Select adaptive routing at boot time */
+  }
+
+dma_tcp_t dma_tcp_state ;
+
+
+/* void __exit */
+/* dma_tcp_module_cleanup (void) */
+/* { */
+//   /*  nothing to do */
+/* } */
+
+
+
+/* #if defined(CONFIG_BLUEGENE_TCP) */
+#if 1
+static int bgp_dma_tcp_poll(dma_tcp_t *) ;
+static int bgp_dma_tcp_poll(dma_tcp_t *dma_tcp)
+{
+/*  Values when I inherited the code, now taken from 'tuning params' */
+/*   int num_packets = 1; // received packets one by one */
+/*   int num_empty_passes = 512; */
+/*   int non_empty_poll_delay = 850; */
+/*  Other values I have tried */
+/*   int num_packets = 100; */
+/*   int num_empty_passes = 0; */
+/*   int non_empty_poll_delay = 0; */
+/*   int num_packets = 100; // received packets 100 at a time */
+/*   int num_empty_passes = 5; */
+/*   int non_empty_poll_delay = 10; */
+/*   dumpmem(dma_tcp_state.receptionFIFO,128,"Reception memory FIFO") ; */
+
+  int ret ;
+  TRACEN(k_t_irqflow, "(>) tuning_num_packets=%d tuning_num_empty_passes=%d tuning_non_empty_poll_delay=%d",
+      dma_tcp->tuning_num_packets,dma_tcp->tuning_num_empty_passes,dma_tcp->tuning_non_empty_poll_delay );
+  dma_tcp->device_stats = bgtornet_stats() ;
+  ret = DMA_RecFifoPollNormalFifoById( dma_tcp->tuning_num_packets,
+               recFifoId,
+               dma_tcp->tuning_num_empty_passes,
+               dma_tcp->tuning_non_empty_poll_delay,
+               dma_tcp->recFifoGroup,
+               bgp_dma_tcp_empty_fifo_callback);
+  touch_softlockup_watchdog() ;  /*  If we get a continuous stream of packets, we do not really want the softlockup watchdog to bark */
+  TRACEN(k_t_irqflow, "(<) ret=%d",ret );
+/*   ASSERT( ret >= 0 ); */
+  return ret;
+}
+
+
+static void recfifo_disable(void)
+  {
+    TRACEN(k_t_detail,"(><)") ;
+    mtdcrx(0xd71+dma_tcp_ReceptionFifoGroup(&dma_tcp_state),0) ;
+  }
+
+static void recfifo_enable(void)
+  {
+    TRACEN(k_t_detail,"(><)") ;
+    record_timestamp(k_Enable) ;
+    mtdcrx(0xd71+dma_tcp_ReceptionFifoGroup(&dma_tcp_state),0x80000000 >> (8*dma_tcp_ReceptionFifoGroup(&dma_tcp_state))) ;
+  }
+
+static void reccounter_disable(void)
+  {
+    TRACEN(k_t_detail,"(><)") ;
+    mtdcrx(0xd7a+dma_tcp_ReceptionCounterGroup(&dma_tcp_state),0) ;
+  }
+
+static void reccounter_enable(void)
+  {
+    TRACEN(k_t_detail,"(><)") ;
+    record_timestamp(k_Enable) ;
+    mtdcrx(0xd7a+dma_tcp_ReceptionCounterGroup(&dma_tcp_state),0xffffffff) ;
+  }
+
+static void dma_tcp_slih_handler(unsigned long dummy)
+  {
+    int ret;
+    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+    unsigned int is_up=dma_tcp->is_up ;
+    record_timestamp(k_SLIH_Entry) ;
+
+    TRACEN(k_t_irqflow,"(>)" );
+    enable_kernel_fp() ;
+     /*  Clear the 'threshold crossed' flag so we don't automatically reinterrupt */
+    DMA_RecFifoSetClearThresholdCrossed( dma_tcp_state.recFifoGroup,
+                 0x80000000 >> (8*dma_tcp_ReceptionFifoGroup(&dma_tcp_state)),
+                 0 );
+    ret = bgp_dma_tcp_poll(dma_tcp);
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+    if(is_up)
+      {
+        mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+200) ;  /*  Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */
+      }
+#endif
+    record_timestamp(k_SLIH_Exit) ;
+#if !defined(TORNIC_DEV_POLL)
+    recfifo_enable() ;
+    if(is_up)
+      {
+        reccounter_enable() ;
+      }
+#endif
+    TRACEN(k_t_irqflow,"(<)" );
+  }
+
+static void trip_missed_interrupt(dma_tcp_t *dma_tcp)
+{
+	unsigned int fifo_dcr = mfdcrx(0xd71) ;
+	unsigned int counter_dcr = mfdcrx(0xd7a) ;
+	struct bic_regs * bic_regs = bic.regs ;
+	unsigned int target_2_3 = bic_regs->group[2].target[3] ;
+	unsigned int target_3_0 = bic_regs->group[3].target[0] ;
+	unsigned int notEmpty = DMA_RecFifoGetNotEmpty(dma_tcp->recFifoGroup,0) ;
+	unsigned int thresholdCrossed = DMA_RecFifoGetThresholdCrossed(dma_tcp->recFifoGroup,0) ;
+	if( fifo_dcr != 0x80000000 || counter_dcr != 0xffffffff || target_2_3 != 0x00006000 || target_3_0 != 0x00006000 || notEmpty != 0 )
+		{
+			TRACEN(k_t_general,"maybe missed interrupt fifo_dcr=0x%08x counter_dcr=0x%08x target_2_3=0x%08x target_3_0=0x%08x notEmpty=0x%08x thresholdCrossed=0x%08x",
+					fifo_dcr,counter_dcr,target_2_3,target_3_0,notEmpty,thresholdCrossed) ;
+			dma_tcp_slih_handler(0) ;
+		}
+}
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+static void dma_tcp_missed_interrupt(unsigned long dummy)
+{
+	    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	    unsigned int is_up=dma_tcp->is_up ;
+	TRACEN(k_t_irqflow,"(>) is_up=%d",is_up) ;
+	if(is_up )
+	  {
+      trip_missed_interrupt(dma_tcp) ;
+      mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+10) ;  /*  Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */
+	  }
+	TRACEN(k_t_irqflow,"(<)") ;
+}
+#endif
+static volatile int dma_ticket_req ;
+static volatile int dma_ticket_ack ;
+
+void dma_tcp_poll_handler(void)
+  {
+    int cur_ticket_req = dma_ticket_req ;
+    record_timestamp(k_Poll_Entry) ;
+
+    dma_ticket_ack = cur_ticket_req ;
+    TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (>)",cur_ticket_req );
+    dma_tcp_slih_handler(0) ;
+    TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (<)",cur_ticket_req );
+    record_timestamp(k_Poll_Exit) ;
+  }
+
+void dma_tcp_rx_enable(void)
+  {
+	  unsigned long flags ;
+    TRACEN(k_t_irqflow,"(>)" );
+    record_timestamp(k_CouldEnable) ;
+    recfifo_enable() ;
+    reccounter_enable() ;
+    bic_set_cpu_for_irq(k_fifo_irq+dma_tcp_ReceptionFifoGroup(&dma_tcp_state),k_TorusAffinityCPU) ;
+    bic_set_cpu_for_irq(k_rec_counter_irq+dma_tcp_ReceptionCounterGroup(&dma_tcp_state),k_TorusAffinityCPU) ;
+     /*  Both interrupts unmasked before we take one to avoid the chance of an interrupt after the first */
+     /*   which (?) could go round the loop and 'do the wrong thing' with respect to napi and enabling the second */
+     /*   while trying to run the napi poll */
+    local_irq_save(flags) ;
+    bic_unmask_irq(k_fifo_irq+dma_tcp_ReceptionFifoGroup(&dma_tcp_state)) ;
+    bic_unmask_irq(k_rec_counter_irq+dma_tcp_ReceptionCounterGroup(&dma_tcp_state)) ;
+    local_irq_restore(flags) ;
+     /*  If we get here and there's an 'interrupt cause' in the DCRs, we have missed an interrupt. Trace it and fire the SLIH. */
+    trip_missed_interrupt(&dma_tcp_state ) ;
+    TRACEN(k_t_irqflow,"(<)" );
+
+  }
+
+static DECLARE_TASKLET(dma_tcp_slih, dma_tcp_slih_handler,0) ;
+
+/*  This gets driven in the FLIH when a DMA interrupt occurs */
+static void receiveFLIH(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+  TRACEN(k_t_irqflow,"(>) FLIH dma_tcp_state.active_quarter=%i",dma_tcp_state.active_quarter );
+  record_timestamp(k_FLIH_Entry) ;
+  bic_disable_irq(k_fifo_irq+dma_tcp_ReceptionFifoGroup(&dma_tcp_state)) ;
+  bic_disable_irq(k_rec_counter_irq+dma_tcp_ReceptionCounterGroup(&dma_tcp_state)) ;
+  bgtornet_rx_schedule() ;
+  record_timestamp(k_FLIH_Exit) ;
+  TRACEN(k_t_irqflow,"(<) FLIH" );
+}
+
+static void receiveCommHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+	TRACEN(k_t_irqflow,"(>)" );
+        recfifo_disable() ;
+	receiveFLIH(arg1,arg2,arg3,arg4) ;
+	TRACEN(k_t_irqflow,"(<)" );
+}
+
+/*  This gets driven in the FLIH when a DMA interrupt occurs */
+static void receiveCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+	TRACEN(k_t_irqflow,"(>)" );
+        reccounter_disable() ;
+	receiveFLIH(arg1,arg2,arg3,arg4) ;
+	TRACEN(k_t_irqflow,"(<)" );
+}
+
+
+static int unknownActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg __attribute__ ((unused)) =packet_ptr->SW_Arg ;
+    unsigned int Func_Id __attribute__ ((unused)) =packet_ptr->Func_Id ;
+    unsigned int x __attribute__ ((unused)) =SW_Arg >> 16 ;
+    unsigned int y __attribute__ ((unused)) =( SW_Arg >> 8) & 0xff ;
+    unsigned int z __attribute__ ((unused)) =SW_Arg & 0xff ;
+    TRACEN(k_t_error,"(!!!) %08x %02x (%02x,%02x,%02x) payload_ptr=%p payload_bytes=%d", SW_Arg,Func_Id,x,y,z,payload_ptr, payload_bytes );
+    return 0 ;
+  }
+
+/* static char reception_fifo_buffer[k_desired_reception_memory_fifo_size] __attribute__ ((__aligned__(32))) ; */
+/*  We need a reception FIFO; we are prepared to compromise on its size */
+static void __init
+dma_tcp_setup_reception_fifo(dma_tcp_t *dma_tcp)
+  {
+    unsigned int allocation_size=k_desired_reception_memory_fifo_size ;
+    void * allocation_address=local_permanent_alloc(k_desired_reception_memory_fifo_size) ;
+    dma_tcp->receptionfifo = allocation_address ;
+    dma_tcp->receptionfifoSize = allocation_size ;
+     /*  Must get a memory FIFO area, and it must be L1-aligned */
+    BUG_ON(allocation_address == NULL) ;
+    BUG_ON(0 != (0x1f & (int)allocation_address)) ;
+    if( allocation_address != NULL )
+      {
+        memset(allocation_address, 0xcc, allocation_size) ;
+      }
+    TRACEN(k_t_init,"reception_fifo address=%p length=%d=0x%08x",allocation_address,allocation_size,allocation_size) ;
+  }
+
+#endif
+
+
+//void __init
+//bgp_fpu_register_memcpy_sysctl(void) ;
+
+enum
+{
+	k_enable_dma_memcpy = 1
+} ;
+
+//int bluegene_globalBarrier_nonBlocking(unsigned int channel, int reset, unsigned int timeoutInMillis) ;
+//
+//extern unsigned long long printk_clock_aligner ;
+///* Determine the offset of the 'local' timebase from a 'common' time signal as per global barrier */
+//static void __init
+//align_timebase(void)
+//  {
+//    int rc0 ;
+//    int rc1 = -1 ;
+//    unsigned long flags ;
+//    unsigned long long tb ;
+//    local_irq_save(flags) ;
+//    rc0 = bluegene_globalBarrier_nonBlocking(3,1,1000 ) ;
+//    if( rc0 == BGCNS_RC_CONTINUE ) rc1 = bluegene_globalBarrier_nonBlocking(3,0,1000 ) ;
+//    tb = get_tb() ;
+//    printk_clock_aligner = tb ;
+//    TRACEN(k_t_init,"rc0=%d rc1=%d tb=0x%016llx",rc0,rc1,tb) ;
+//    local_irq_restore(flags) ;
+//  }
+
+static void __init
+dma_tcp_init(dma_tcp_t *dma_tcp, BGP_Personality_t *pers)
+  {
+    int compute_node_count = pers->Network_Config.Xnodes*pers->Network_Config.Ynodes*pers->Network_Config.Znodes ;
+    int i_am_compute_node= ( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank ) ;
+    TRACEN(k_t_init,"(>) PAGE_SHIFT=%d PAGE_SIZE=%lu", PAGE_SHIFT, PAGE_SIZE );
+//    bgp_fpu_register_memcpy_sysctl() ;
+    init_tuning(dma_tcp) ;
+    dma_tcp->location.coordinate[0] = pers->Network_Config.Xcoord;
+    dma_tcp->location.coordinate[1] = pers->Network_Config.Ycoord;
+    dma_tcp->location.coordinate[2] = pers->Network_Config.Zcoord;
+    dma_tcp->extent.coordinate[0]  = pers->Network_Config.Xnodes;
+    dma_tcp->extent.coordinate[1]  = pers->Network_Config.Ynodes;
+    dma_tcp->extent.coordinate[2]  = pers->Network_Config.Znodes;
+    dma_tcp->configured_quarter = 0 ;
+    dma_tcp->node_count = compute_node_count ;
+    dma_tcp->node_slot_mask = (compute_node_count )-1 ;
+
+    dma_tcp->SW_Arg = (pers->Network_Config.Xcoord << 16)
+                   | (pers->Network_Config.Ycoord << 8)
+                   | (pers->Network_Config.Zcoord) ;
+    dma_tcp->src_key = dma_tcp->location.coordinate[0]*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+                      +dma_tcp->location.coordinate[1]*dma_tcp->extent.coordinate[2]
+                      +dma_tcp->location.coordinate[2] ;
+
+    dma_tcp->xbits = fls(pers->Network_Config.Xnodes)-1 ;
+    dma_tcp->ybits = fls(pers->Network_Config.Ynodes)-1 ;
+    dma_tcp->zbits = fls(pers->Network_Config.Znodes)-1 ;
+    /* YKT BGP seems wired so that no partition less than 8x8x8 is a torus in any dimension */
+    dma_tcp->is_torus_x = (pers->Network_Config.Xnodes >= 8 && pers->Network_Config.Ynodes >= 8 && pers->Network_Config.Znodes >= 8) ;
+    dma_tcp->is_torus_y = dma_tcp->is_torus_x ;
+    dma_tcp->is_torus_z = dma_tcp->is_torus_x ;
+    dma_tcp->block_id = pers->Network_Config.BlockID & 0x00ffffff ;
+    dma_tcp->i_am_compute_node = i_am_compute_node ;
+    TRACEN(k_t_init,"SW_Arg=0x%08x rank=%d=0x%08x src_key=0x%08x xbits=%d ybits=%d zbits=%d ",
+		    dma_tcp->SW_Arg, pers->Network_Config.Rank, pers->Network_Config.Rank, dma_tcp->src_key,
+		    dma_tcp->xbits,dma_tcp->ybits,dma_tcp->zbits );
+
+    if( 0 == dma_tcp->mtu)
+      {
+        bgp_dma_tcp_set_mtu(dma_tcp, 64996) ;
+      }
+
+#if defined(TORUS_RECEIVE_WITH_SLIH)
+#else
+    skb_queue_head_init(&dma_tcp->skb_pool) ;
+    skb_queue_head_init(&dma_tcp->skb_list_free) ;
+#endif
+    {
+	    int core ;
+	    for( core=0; core<k_injecting_cores; core += 1)
+		    {
+			    int desired_fifo ;
+			    for(desired_fifo=0;desired_fifo<k_injecting_directions;desired_fifo+=1)
+			    spin_lock_init(&dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo]) ;
+		    }
+    }
+
+#if defined(TORUS_RECEIVE_WITH_SLIH)
+#else
+    tasklet_schedule(&pool_filler_slih) ;
+#endif
+
+#if defined(CONFIG_BLUEGENE_TCP)
+     /*  Only compute nodes are torus-capable ... */
+    if( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank )
+      {
+        dma_tcp_setup_reception_fifo(dma_tcp) ; // Need this 'early' (before ifup) in case of needing to allocate a lot of physically contiguous memory
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+        setup_timer(&dma_tcp->torus_missed_interrupt_timer,dma_tcp_missed_interrupt,0) ;
+#endif
+        dma_tcp_frames_init(dma_tcp) ;
+      }
+#endif
+    dma_tcp_devfs_procfs_init(dma_tcp) ;
+    TRACEN(k_t_init,"(<)" );
+  }
+
+void dma_tcp_ifup(dma_tcp_t *dma_tcp, BGP_Personality_t *pers)
+  {
+    TRACEN(k_t_init,"(>)" );
+#if defined(CONFIG_BLUEGENE_TCP)
+     /*  Only compute nodes are torus-capable ... */
+    if( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank )
+      {
+        dma_tcp->active_quarter = dma_tcp->configured_quarter & 3 ;
+        dma_tcp->is_up = 1 ;
+//        align_timebase() ;
+        {
+    int subX ;
+    for(subX=0;subX<DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP;subX +=1)
+      {
+        dma_tcp->injCntrSubgrps[ subX ] = subX ;
+        dma_tcp->recCntrSubgrps[ subX ] = subX ;
+      }
+        }
+
+       /*  register a receive function for 'unrecognised' memfifo packets */
+        DMA_RecFifoRegisterRecvFunction(unknownActor, dma_tcp, 1, 0);
+
+        dma_tcp->recMap.threshold[0] = dma_tcp->receptionfifoSize/16;    /*  generate interrupts when anything is in the fifo */
+        {
+          int i ;
+          for(i=0;i<4;i+=1)
+            {
+              int j ;
+              for(j=0;j<7;j+=1)
+                {
+                  dma_tcp->recMap.ts_rec_map[i][j] = 8*dma_tcp_ReceptionFifoGroup(dma_tcp) ;
+                }
+            }
+        }
+        {
+            int ret  __attribute__ ((unused)) = DMA_RecFifoSetMap( &dma_tcp->recMap );  /*  fifo 0 will receive packets from everywhere */
+
+            TRACEN(k_t_init,"(=)DMA_RecFifoSetMap rc=%d", ret );
+        }
+       /*  Register functions for 'frames' style access */
+        dma_tcp_frames_ifup(dma_tcp) ;
+
+         /*  set up rec fifo group */
+        dma_tcp->recFifoGroup = DMA_RecFifoGetFifoGroup( dma_tcp_ReceptionFifoGroup(dma_tcp), 0, receiveCommHandler, NULL, NULL, NULL, NULL );
+
+
+        TRACEN(k_t_init,"(=)DMA_RecFifoGetFifoGroup dma_tcp->recFifoGroup=%p", dma_tcp->recFifoGroup );
+
+         /*  initalize rec fifo */
+        {
+        int ret  __attribute__ ((unused)) = DMA_RecFifoInitById ( dma_tcp->recFifoGroup,
+            recFifoId,
+            dma_tcp->receptionfifo,                 /*  fifo start */
+            dma_tcp->receptionfifo,                 /*  fifo head */
+            dma_tcp->receptionfifo+dma_tcp->receptionfifoSize    /*  fifo end */
+                                );
+        TRACEN(k_t_init,"(=)DMA_RecFifoInitById rc=%d", ret );
+        }
+        TRACEN(k_t_general, "(=)(I) testdma: CounterGroupAllocate");
+
+        {
+         /*  Initialize injection counter group */
+        int ret  __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Injection,
+                              dma_tcp_InjectionCounterGroup(dma_tcp),  /*  group number */
+                              DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+                              dma_tcp->injCntrSubgrps,
+                              0,   /*  target core for interrupts */
+                              NULL,
+                              NULL,
+                              NULL,
+                              & dma_tcp->injCounterGroup );
+
+        TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+        }
+        memset(dma_tcp->inj_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ;
+
+         /*  enable the counter */
+        {
+          int ret;
+          DMA_CounterSetEnableById( & dma_tcp->injCounterGroup,0) ;
+          ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup,0,0xffffffff) ;
+          TRACEN(k_t_general, "(=)(I) testdma: DMA_CounterSetValueWideOpenById ret=%d",ret) ;
+
+        }
+
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER) && defined(CONFIG_BLUEGENE_DMA_MEMCPY)
+         /*  TODO: Investigate why 'dma_memcpy' needed to be initialised before 'dma_tcp counters' */
+        if( k_enable_dma_memcpy)  bgp_dma_memcpyInit(dma_tcp) ;
+#endif
+       {
+          /*  Initialize reception counter group */
+         int ret  __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception,
+             dma_tcp_ReceptionCounterGroup(dma_tcp),  /*  group number */
+             DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+                               dma_tcp->recCntrSubgrps,
+                               k_TorusAffinityCPU,   /*  target core for interrupts */
+                               receiveCounterZeroHandler,
+                               NULL,
+                               NULL,
+                               & dma_tcp->recCounterGroup );
+         TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+       }
+       memset(dma_tcp->recCntrInUse,0,DMA_NUM_COUNTERS_PER_GROUP) ;
+       memset(dma_tcp->rcv_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ;
+       dma_tcp->qtyFreeRecCounters = 64 ;
+       dma_tcp->scanRecCounter = 0 ;
+       dma_tcp->framesDisposed = 0 ;
+       atomic_set(&dma_tcp->framesProposed, 0 ) ;
+      }
+#endif
+    TRACEN(k_t_init,"(<)" );
+  }
+
+// Currently there is no implementation of Kernel_CounterGroupFree , so we cannot free off the hardware used by the eth-on-torus
+enum {
+  k_has_counter_group_free = 0
+};
+static void dma_tcp_ifdown(dma_tcp_t *dma_tcp)
+  {
+    TRACEN(k_t_init,"(>)" );
+    dma_tcp->is_up = 0 ;
+    dma_tcp_frames_ifdown(dma_tcp) ;
+    if( k_has_counter_group_free)
+      {
+        {
+           /*  Free reception counter group */
+          int ret  __attribute__ ((unused)) = DMA_CounterGroupFree(
+              dma_tcp_ReceptionCounterGroup(dma_tcp),  /*  group number */
+              DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+                                dma_tcp->recCntrSubgrps,
+                                & dma_tcp->recCounterGroup );
+          TRACEN(k_t_init,"(=)DMA_CounterGroupFree rc=%d", ret );
+        }
+        /*  disable the injection counter */
+       {
+         DMA_CounterSetDisableById( & dma_tcp->injCounterGroup,0) ;
+       }
+       {
+        /*  Free injection counter group */
+       int ret  __attribute__ ((unused)) = DMA_CounterGroupFree(
+                             dma_tcp_InjectionCounterGroup(dma_tcp),  /*  group number */
+                             DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+                             dma_tcp->injCntrSubgrps,
+                             & dma_tcp->injCounterGroup );
+
+       TRACEN(k_t_init,"(=)DMA_CounterGroupFree rc=%d", ret );
+       }
+      }
+    else
+      {
+        TRACEN(k_t_request,"(!!!) No implementation of counter group free") ;
+      }
+    TRACEN(k_t_init,"dma_tcp->tuning_prep_dcmf=%d",dma_tcp->tuning_prep_dcmf) ;
+    if( dma_tcp->tuning_prep_dcmf)
+      {
+        TRACEN(k_t_init,"Getting ready for DCMF use of torus") ;
+        memset(&dma_tcp->recMap,0,sizeof(dma_tcp->recMap)) ;
+      }
+
+    TRACEN(k_t_init,"(<)" );
+  }
+void bgp_torus_set_mtu(unsigned int mtu)
+  {
+    bgp_dma_tcp_set_mtu(&dma_tcp_state, mtu) ;
+  }
+
+int __init
+dma_tcp_module_init(void)
+{
+  int ret = 0;
+
+  BGP_Personality_t pers;
+
+  bluegene_getPersonality(&pers, sizeof(pers));
+
+  dma_tcp_init(&dma_tcp_state, &pers) ;
+
+ TRACEN(k_t_init, "(I)initDMA finished ret:%d",ret);
+  return ret;
+}
+
+static void fix_retransmit_timeout(struct sk_buff *skb)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	struct sock *sk = skb->sk ;
+	if( sk)
+	  {
+      unsigned int family=sk->sk_family ;
+      struct inet_sock *inet = inet_sk(sk) ;
+      struct inet_connection_sock *icsk = inet_csk(sk) ;
+      int is_icsk = inet->is_icsk ;
+      TRACEN(k_t_detail,"skb=%p sk=%p sk_family=0x%04x is_icsk=%d",skb,sk,family,is_icsk) ;
+      if( AF_INET == family && is_icsk )
+        {
+          TRACEN(k_t_detail,"icsk_timeout-jiffies=%lu icsk_rto=%u",icsk->icsk_timeout-jiffies,icsk->icsk_rto) ;
+          if( icsk->icsk_rto < dma_tcp->tuning_min_icsk_timeout )
+            {
+              icsk->icsk_rto=dma_tcp->tuning_min_icsk_timeout ;
+            }
+        }
+	  }
+}
+
+
+int bgp_dma_tcp_send_and_free( struct sk_buff *skb )
+{
+	int rc ;
+	if( k_find_source_of_rst_flags && dma_tcp_state.tuning_diagnose_rst )
+		{
+			struct ethhdr *eth = (struct ethhdr *)skb->data;
+		        unsigned int h_proto =  eth->h_proto ;
+			if( ETH_P_IP == h_proto )
+				{
+				        struct iphdr *iph = (struct iphdr *)(eth+1) ;
+				        if(IPPROTO_TCP == iph->protocol )
+				        	{
+				        		struct tcphdr *tcph = (struct tcphdr *)(iph+1) ;
+				        		if( tcph->rst)
+				        			{
+				        				TRACEN(k_t_request,"RST on frame to [%02x:%02x:%02x]",
+				        						eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+				        				show_stack(0,0) ; /* Stack back-chain may help explain why it was sent */
+
+				        			}
+				        	}
+
+				}
+
+		}
+	fix_retransmit_timeout(skb) ;
+	rc = bgp_dma_tcp_send_and_free_frames(skb) ;
+	return rc ;
+}
+
+/*  Test if we think a socket is affected by torus congestion. Do this by looking to see if anything is in any software transmit FIFO */
+unsigned int bgp_torus_congestion(struct sock *sk)
+  {
+    unsigned int core ;
+    unsigned int direction ;
+    struct inet_connection_sock *icskp = inet_csk(sk) ;
+    struct inet_sock *inet = inet_sk(sk);
+    unsigned int daddr=inet->daddr ;
+    dma_tcp_t *dma_tcp=&dma_tcp_state ;
+    struct sk_buff *skb = skb_peek(&sk->sk_write_queue) ;
+
+    if( dma_tcp->i_am_compute_node
+        )
+      {
+        if( NULL == skb )
+          {
+            TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d q-empty-retransmit",
+                sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+                daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+                icskp->icsk_retransmits, icskp->icsk_rto
+                ) ;
+            return 0 ;
+          }
+        if( 0 == skb->len)
+          {
+            TRACEN(k_t_general,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d ack-transmit",
+                sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+                daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+                icskp->icsk_retransmits, icskp->icsk_rto
+                ) ;
+            return 0 ;
+          }
+#if defined(USE_SKB_TO_SKB)
+        {
+		unsigned int framesProposed=atomic_read(&dma_tcp->framesProposed) ;
+		unsigned int framesDisposed=dma_tcp->framesDisposed ;
+		if( framesProposed != framesDisposed)
+			{
+				TRACEN(k_t_general,
+					    "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u propose=0x%08x disp=0x%08x\n",
+				    sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+				    daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+				    framesProposed,framesDisposed
+				    ) ;
+			      return 1 ;
+
+			}
+        }
+#endif
+        for( core=0; core<k_injecting_cores; core += 1)
+           {
+             for( direction=0;direction<k_injecting_directions; direction+=1)
+                {
+                  unsigned int  fifo_current_head =
+                   (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+                  unsigned int  fifo_current_tail =
+                   (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+                if( fifo_current_head != fifo_current_tail)
+                  {
+                    TRACEN(k_t_general,
+                		    "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u core=%d direction=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x\n",
+                        sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+                        daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+                        core,direction,
+                        fifo_current_head,fifo_current_tail
+                        ) ;
+                  return 1 ;
+                  }
+              }
+           }
+      }
+
+    TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d retransmit",
+        sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+        daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+        icskp->icsk_retransmits, icskp->icsk_rto
+        ) ;
+/*     if( icskp->icsk_rto < 300) */
+/* 	    { */
+/* 		    icskp->icsk_rto = icskp->icsk_rto << 1 ; */
+/* 		    return 1 ; */
+/* 	    } */
+    return 0 ;
+  }
+
+void analyse_retransmit(struct sock *sk, struct sk_buff *skb)
+  {
+    if( skb && skb->len>0 )           /*  Need a SKB,and if len=0 then it's an ACK with no data */
+      {
+        struct inet_sock *inet = inet_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        unsigned int daddr=inet->daddr ;
+        unsigned int daddr_b0 = daddr >> 24 ;
+        if( daddr_b0 == 11 || daddr_b0 == 12 )  /*  BGP fabric is 11.*.*.* and 12.*.*.* , only interested in those */
+          {
+            TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)",
+                sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+                daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,icsk->icsk_retransmits, icsk->icsk_rto) ;
+          }
+      }
+
+  }
+
+
+/*  Seem to have picked up a half-implemented feature. Dummy it. */
+DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray;
+int DMA_CounterInitAppSegments(void) { return 0 ; }
+
+void dma_tcp_set_port(unsigned int port)   // Intended for configuring which quarter of the BGP DMA unit to use
+  {
+    TRACEN(k_t_request,"(><) port=0x%08x",port) ;
+    if( port > 0)
+      {
+        dma_tcp_state.configured_quarter = (port-1) & 3 ;
+      }
+  }
+void dma_tcp_open(void)  // 'ifconfig up' handler
+  {
+    BGP_Personality_t pers;
+    TRACEN(k_t_request,"(>) ifconfig up") ;
+    bluegene_getPersonality(&pers, sizeof(pers));
+    dma_tcp_ifup(&dma_tcp_state, &pers) ;
+    TRACEN(k_t_request,"(<) ifconfig up") ;
+  }
+void dma_tcp_close(void) // 'ifconfig down' handler
+  {
+    TRACEN(k_t_request,"(>) ifconfig down") ;
+    dma_tcp_ifdown(&dma_tcp_state) ;
+    TRACEN(k_t_request,"(<) ifconfig down") ;
+  }
+
+void set_siw_placement_callback(dma_addr_t (*siw_placement_callback)(struct sk_buff *skb))
+  {
+    TRACEN(k_t_init,"siw_placement_callback=%p",siw_placement_callback) ;
+#if defined(ENABLE_SIW_PLACEMENT)
+    dma_tcp_state.siw_placement_callback=siw_placement_callback ;
+#endif
+  }
+EXPORT_SYMBOL(set_siw_placement_callback) ;
+void show_personality(void) ;
+void show_sprs(void) ;
+/*  Issue a diagnostic op at the DMA layer */
+void torus_diag(int op)
+  {
+    BGP_Personality_t pers;
+    TRACES("(>)op=%d",op) ;
+
+    bluegene_getPersonality(&pers, sizeof(pers));
+    switch(op)
+    {
+    case 0:
+      show_bic_regs() ;
+      break ;
+    case 1:
+#if defined(CONFIG_BLUEGENE_TCP)
+      if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+        {
+        	tasklet_schedule(&dma_tcp_slih);
+        }
+#endif
+      break ;
+    case 2:
+      if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+        {
+        	dumpdmadcrs(k_t_request) ;
+        }
+      break ;
+    case 3:
+#if defined(CONFIG_BLUEGENE_TCP)
+      if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+        {
+	      dumpRecFifoGroup(dma_tcp_state.recFifoGroup)  ;
+	      show_timestamps() ;
+	      bgp_dma_tcp_display_pending_slots(&dma_tcp_state,dma_tcp_state.node_count) ;
+        }
+#endif
+      break ;
+    case 4:
+/*       show_state() ; // kernel threads and their stacks */
+      break ;
+    case 5:
+/*       show_tlbs() ; // This core's current TLBs */
+/*       show_sprs() ; // Core special-purpose regs relevant to debugging */
+/*       show_personality() ; // Items from the 'personality' from microcode */
+      break ;
+    case 6:
+/* #if defined(USE_SKB_TO_SKB) */
+/* 	    bgp_dma_diag_reissue_rec_counters(&dma_tcp_state) ; */
+/* #endif */
+	      break ;
+	    case 7:
+	#if defined(USE_SKB_TO_SKB)
+		    dma_tcp_show_reception(&dma_tcp_state) ;
+	#endif
+		    break ;
+    default:
+      ;
+    }
+    TRACES("(<)") ;
+  }
+
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.h b/drivers/net/bgp_torus/bgp_dma_tcp.h
new file mode 100644
index 0000000..d9dd5f5
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp.h
@@ -0,0 +1,1660 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_DMA_TCP_H__
+#define __BGP_DMA_TCP_H__
+#include <linux/bootmem.h>
+#include <asm/div64.h>
+#include <linux/timer.h>
+#include <linux/bootmem.h>
+#include <linux/sysctl.h>
+#include <asm/atomic.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+extern int bgp_dma_tcp_tracemask ;
+
+/*  Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+/* #define COMPILED_TRACEMASK (0xffffffff-k_t_irqflow-k_t_irqflow_rcv-k_t_detail-k_t_fifocontents-k_t_toruspkt) */
+#define COMPILED_TRACEMASK (0xffffffff)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+/* #define TORNIC_DIAGNOSE_TLB */
+#include <linux/KernelFxLog.h>
+/*  'XTRACEN' would be a dummied-out trace statement */
+#define XTRACEN(i,x...)
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+#define TRACING(i) (bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)))
+#define TRACE(x...)    KernelFxLog(bgp_dma_tcp_tracemask & k_t_general,x)
+#define TRACE1(x...)   KernelFxLog(bgp_dma_tcp_tracemask & k_t_lowvol,x)
+#define TRACE2(x...)   KernelFxLog(bgp_dma_tcp_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#else
+#define TRACING(x) 0
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+#if defined(CONFIG_BLUEGENE_TCP)
+#define ENABLE_FRAMES
+#endif
+
+#define AUDIT_FRAME_HEADER
+
+#define KEEP_TCP_FLAG_STATS
+
+#define BARRIER_WITH_IOCTL
+/* #define EXERCISE_WITH_IOCTL */
+
+void bgp_dma_diag_report_transmission_queue(int __user * report) ;
+
+#if defined(BARRIER_WITH_IOCTL)
+void dma_tcp_transfer_activate_sync(int sendBytes) ;
+int dma_tcp_transfer_wait_sync(int demandCount) ;
+void dma_tcp_transfer_clearcount(void) ;
+#endif
+
+#if defined(EXERCISE_WITH_IOCTL)
+void dma_tcp_transfer_activate(int sendBytes) ;
+void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg) ;
+void dma_tcp_transfer_activate_minicube(int sendBytes) ;
+int dma_tcp_transfer_wait(int demandCount) ;
+#endif
+
+/*  Whether we want a 'watchdog' on torus arrivals */
+#define HAS_MISSED_INTERRUPT_TIMER
+
+/*  Adaptive routing controls. */
+/*  USE_ADAPTIVE_ROUTING builds a runtime capable of it; lower the value in /sys/module/bgp_torus/parameters/bgp_dma_adaptive_frame_limit to get frames send that way */
+/*  INITIAL_ADAPTIVE_ROUTING sets things that way at boot (and may set params up so that attempted deterministic routing isn't actually deterministic) */
+#if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING)
+#define USE_ADAPTIVE_ROUTING
+#define RESEQUENCE_ARRIVALS
+#define INITIAL_ADAPTIVE_ROUTING
+#endif
+
+/*  Support for skbuff-to-skbuff DMA */
+#define USE_SKB_TO_SKB
+
+/*  What to use the 'dest-key' in the linkhdr for. Timestamping looks good ... */
+/* #define ENABLE_LATENCY_TRACKING */
+/* #define TRACK_SEQUENCE */
+/* #define ENABLE_PROGRESS_TRACKING */
+
+#define TORUS_RECEIVE_WITH_SLIH
+
+/* #define TORUS_WITH_SIGNATURES */
+
+/* Whether to support a soft-Iwarp data placement callback */
+#define ENABLE_SIW_PLACEMENT
+
+/*  Diagnosic options */
+enum {
+	k_allow_interrupts_while_injecting = 0 , /*  Select this for profiling injection */
+	k_async_free = 1 ,  /*  Set this to allow timer-based freeing of skbuffs where the DMA has completed */
+	k_dumpmem_diagnostic = 0 ,
+	k_scattergather_diagnostic = 0 ,
+	k_verify_target = 0 ,  /*  Whether to firewall-check that the target is reachable */
+	k_detail_stats = 0 , /*  Whether to collect detailed statistics */
+	k_counter_flow_control = 1 , /*  Whether to flow-control by limiting the number of reception counters allocates to a single source */
+	k_force_eager_flow = 0 , /* Whether to start up with everything running 'eager' protocol (no 'rendezvous') */
+	k_abbreviate_headlen = 1 , /* Whether to abbreviate the DMA transfer of 'head' in respect of the FIFO transfer */
+	/* TODO: after testing that it works (on busy machines) , we should always take the 'deferral' path */
+	k_allow_defer_skb_for_counter = 1, /* Whether to allow deferral allocating a 'full-size' skb until a reception counter is available */
+	k_verify_ctlen = 1 , /* Whether to check that the length in the IP header matches the skbuff structure */
+	k_configurable_virtual_channel = 1  /* Whether to allow runtime configuration of the virtual channel to use */
+};
+
+
+
+enum {
+  numInjCounters = 1 ,
+  recFifoId = 0
+//  k_InjectionFifoGroup = 0 ,
+//  k_ReceptionFifoGroup = 0 ,
+//  k_InjectionCounterGroup = 0 ,
+//  k_ReceptionCounterGroup = 0 ,
+};
+
+/*  We handle fragmented skbuffs if they are presented. The receive side doesn't need to know; */
+/*  the send side injects additional 'direct put' descriptors as needed. */
+/*  The bytes on the wire might be slightly different split between cells, but on the receive side this */
+/*  is all handled by hardware. */
+enum {
+	k_support_scattergather = 1  /*  Whether we support a 'scattergather' skbuff */
+};
+
+/*  At one time, we ran per-core injection, to try to minimise the locking requirement. This is now changed to */
+/*  per-destination injection, to try to minimise the out-of-order delivering. */
+enum {
+ k_injecting_cores = 4 ,
+ k_skb_controlling_directions = 7 ,  /*  'directions' where we want to free skbuffs when sent */
+#if defined(USE_SKB_TO_SKB)
+ k_injecting_directions = 8 ,  /*  6 real directions, a 'taxi' for single packet messages, and a 'propose/accept stream' */
+#else
+ k_injecting_directions = 7 ,  /*  6 real directions, a 'taxi' for single packet messages */
+#endif
+};
+
+/*  Following section for 'packets' style */
+enum {
+  k_torus_skb_alignment = 16 ,
+  k_torus_link_payload_size = 240
+};
+
+enum {
+  k_idma_descriptor_size = 32 ,
+  k_injection_packet_size = 240
+} ;
+
+enum {
+/*	k_concurrent_receives = 32  */ /*  Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */
+ 	k_concurrent_receives = 128 /* Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */
+};
+
+static inline void * local_permanent_alloc(unsigned int size)
+  {
+    void *result =  kmalloc(size, GFP_KERNEL) ;
+    TRACEN(k_t_general,"size=0x%08x result=%p",size,result) ;
+    return result ;
+  }
+
+/*  Using these when we are statically allocating buffers, or using alloc_bootmem_low */
+enum {
+  k_idma_descriptor_count = 16384,  /*  Design choice */
+  k_injection_packet_count = 16384  /*  Matches IDMA descriptor count, to keep tagging simple */
+   /*   k_injection_packet_count = (1<<22)/k_injection_packet_size // 4 megabytes of 'runway' */
+};
+
+enum {
+  k_memcpy_idma_descriptor_count = 64,  /*  Design choice */
+};
+
+typedef struct {
+  char buffer[k_idma_descriptor_size*k_memcpy_idma_descriptor_count] ;
+} memcpy_packet_injection_memoryfifo_t __attribute__((aligned(16)));
+
+typedef struct {
+  char buffer[k_idma_descriptor_size*k_idma_descriptor_count] ;
+} packet_injection_memoryfifo_t __attribute__((aligned(16)));
+
+typedef struct {
+  int tailx[k_injection_packet_count] ;
+} packet_injection_tag_t ;
+
+typedef struct {
+	struct sk_buff * skb_array[k_injection_packet_count] ;
+} packet_skb_array_t ;
+
+static inline packet_injection_memoryfifo_t * allocate_packet_injection_memoryfifo(unsigned int core, unsigned int direction)
+  {
+    packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(packet_injection_memoryfifo_t)) ;
+      BUG_ON(rc == NULL) ;
+    XTRACEN(k_t_init,"allocate_packet_injection_memoryfifo core=%d direction=%d rc=%p",
+        core, direction, rc ) ;
+    BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ;  /*  Need 32-byte alignment */
+    return rc ;
+  }
+
+static inline memcpy_packet_injection_memoryfifo_t * allocate_memcpy_packet_injection_memoryfifo(unsigned int core)
+  {
+	  memcpy_packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(memcpy_packet_injection_memoryfifo_t)) ;
+      BUG_ON(rc == NULL) ;
+    TRACEN(k_t_general,"allocate_memcpy_packet_injection_memoryfifo core=%d rc=%p",
+        core, rc ) ;
+    BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ;  /*  Need 32-byte alignment */
+    return rc ;
+  }
+
+static inline packet_injection_tag_t * allocate_packet_injection_tag(unsigned int core, unsigned int direction)
+  {
+    packet_injection_tag_t * rc = kmalloc(sizeof(packet_injection_tag_t),GFP_KERNEL) ;
+    BUG_ON(rc == NULL) ;
+    XTRACEN(k_t_init,"allocate_packet_injection_tag core=%d direction=%d rc=%p",
+        core, direction, rc ) ;
+    BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ;  /*  Need 32-byte alignment */
+    return rc ;
+  }
+
+static inline packet_skb_array_t * allocate_packet_skb_array(unsigned int core, unsigned int direction)
+  {
+	  packet_skb_array_t * rc = kmalloc(sizeof(packet_skb_array_t),GFP_KERNEL) ;
+    BUG_ON(rc == NULL) ;
+    XTRACEN(k_t_init,"allocate_skb_array core=%d direction=%d rc=%p",
+        core, direction, rc ) ;
+    memset(rc,0,sizeof(packet_skb_array_t)) ;
+    return rc ;
+  }
+
+enum {
+  k_idma_frame_count = 16384  /*  Design choice */
+};
+
+typedef struct {
+#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES)
+  packet_injection_memoryfifo_t * idma_fifo ;
+  packet_injection_tag_t * idma_tag ;
+  packet_skb_array_t * idma_skb_array ;
+  unsigned int fifo_head_index ;
+  unsigned int fifo_tail_index ;
+  unsigned int buffer_head_index ;
+  unsigned int buffer_tail_index ;
+  unsigned int fifo_initial_head ;
+  unsigned int packets_injected_count ;
+  unsigned int injection_vacant ;
+  unsigned int injection_high_watermark ;
+#endif
+#if defined(ENABLE_FRAMES)
+  struct sk_buff_head frame_queue ;
+#endif
+} idma_direction_t ;
+
+static inline void allocate_idma_direction(idma_direction_t * idma_direction,unsigned int core, unsigned int direction)
+  {
+#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES)
+    idma_direction->idma_fifo = allocate_packet_injection_memoryfifo(core, direction) ;
+    idma_direction->idma_tag = allocate_packet_injection_tag(core,direction) ;
+    idma_direction->idma_skb_array = allocate_packet_skb_array(core,direction) ;
+    idma_direction->fifo_head_index = 0 ;
+    idma_direction->fifo_tail_index = 0 ;
+    idma_direction->buffer_head_index = 0 ;
+    idma_direction->buffer_tail_index = 0 ;
+    idma_direction->injection_vacant = 0 ;
+    idma_direction->injection_high_watermark = 0 ;
+    idma_direction->packets_injected_count = 0 ;
+#endif
+#if defined(ENABLE_FRAMES)
+    skb_queue_head_init(&idma_direction->frame_queue)  ;
+#endif
+  }
+
+typedef struct {
+  idma_direction_t idma_direction[k_injecting_directions] ;
+  memcpy_packet_injection_memoryfifo_t *memcpy_packet_fifo ;
+  unsigned int memcpy_packet_fifo_head_index ;
+  unsigned int memcpy_packet_fifo_tail_index ;
+  unsigned int memcpy_fifo_initial_head ;
+} idma_core_t ;
+
+static inline void allocate_idma_core(idma_core_t * idma_core,unsigned int core)
+  {
+    int direction ;
+    for( direction=0 ; direction<k_injecting_directions;direction+=1 )
+      {
+        allocate_idma_direction(idma_core->idma_direction+direction, core, direction) ;
+      }
+    idma_core->memcpy_packet_fifo=allocate_memcpy_packet_injection_memoryfifo(core) ;
+  }
+
+typedef struct {
+  idma_core_t idma_core[k_injecting_cores] ;
+} idma_t ;
+
+static inline void allocate_idma(idma_t * idma)
+  {
+    int core ;
+    for( core=0 ; core<k_injecting_cores;core+=1 )
+      {
+        allocate_idma_core(idma->idma_core+core, core) ;
+      }
+  }
+
+/*  'per-slot' structures for demultiplexing received torus messages. */
+/*  we are no longer running 1 slot per possubly-sending core, i.e. 4 per node in the partition; now running 1 per node */
+/*  Get/set methods because for 'large' machines we might need bigger tables than can be kmalloced in one go */
+#if defined(ENABLE_LATENCY_TRACKING)
+
+typedef struct {
+  unsigned long long s1 ;
+  unsigned long long sx ;
+  unsigned long long sxx ;
+  unsigned int xmin ;
+  unsigned int xmax ;
+} rcv_statistic_t ;
+
+static void rcv_statistic_clear(rcv_statistic_t *t)
+  {
+    t->s1 = 0;
+    t->sx = 0;
+    t->sxx = 0 ;
+    t->xmin = 0xffffffff ;
+    t->xmax = 0 ;
+  }
+static void rcv_statistic_observe(rcv_statistic_t *t, unsigned int x)
+  {
+    unsigned long long ullx = x ;
+    unsigned long long ullxx = ullx*ullx ;
+    t->s1 += 1 ;
+    t->sx += x ;
+    t->sxx += ullxx ;
+    if( x<t->xmin ) t->xmin=x ;
+    if( x>t->xmax ) t->xmax=x ;
+  }
+static unsigned int rcv_statistic_mean(rcv_statistic_t *t)
+  {
+    unsigned long long s1=t->s1 ;
+    unsigned long long sx=t->sx ;
+    unsigned long long rc = sx ;
+    do_div(rc,(unsigned int)s1) ;
+    TRACEN(k_t_detail,"sx=0x%08x%08x s1=0x%08x%08x mean=%u",
+        (unsigned int)(sx>>32),(unsigned int)sx,
+        (unsigned int)(s1>>32),(unsigned int)s1,(unsigned int)rc) ;
+    return (unsigned int)rc ;
+  }
+static unsigned int rcv_statistic_variance(rcv_statistic_t *t, unsigned int m)
+  {
+    unsigned long long s1=t->s1 ;
+    unsigned long long sx=t->sx ;
+    unsigned long long sxx=t->sxx ;
+    unsigned long long mm=m ;
+    unsigned long long vv =  sxx - mm*mm ;
+    unsigned long long rc=vv ;
+    do_div(rc,(unsigned int)s1) ;
+    TRACEN(k_t_detail,"sxx=0x%08x%08x sx=0x%08x%08x s1=0x%08x%08x mm=0x%08x%08x vv=0x%08x%08x variance=%u",
+        (unsigned int)(sxx>>32),(unsigned int)sxx,
+        (unsigned int)(sx>>32),(unsigned int)sx,
+        (unsigned int)(s1>>32),(unsigned int)s1,
+        (unsigned int)(mm>>32),(unsigned int)mm,
+        (unsigned int)(vv>>32),(unsigned int)vv,
+        (unsigned int)rc) ;
+    return (unsigned int)rc ;
+  }
+#endif
+/*  TODO: Can this be condensed ? Should be a 'char * payload' and a 'char * payload_alert', down to 8 bytes */
+/*   or could even be a 28-bit address (since we know 16-byte alignment) and a 4-bit count so we treat things */
+/*   in more detail every 16 packets or when the frame is done if sooner */
+/*  TODO: also: maybe the injector should flag the last packet of a frame with a different function ? */
+typedef struct  {
+  unsigned char * payload ;
+  unsigned char * payload_alert ;
+  unsigned int expect ;
+  int lastcell ;
+  unsigned int proposals_active ;
+  struct sk_buff_head proposals_pending_flow ;
+#if defined(USE_ADAPTIVE_ROUTING)
+  struct sk_buff * skb_per_conn[k_concurrent_receives] ;
+#if defined(RESEQUENCE_ARRIVALS)
+  struct sk_buff * skb_pending_resequence[k_concurrent_receives] ;
+  unsigned int conn_id_pending_delivery ;
+#endif
+#endif
+#if defined(ENABLE_LATENCY_TRACKING)
+  rcv_statistic_t latency ;
+  unsigned int basetime ;
+#endif
+#if defined(ENABLE_PROGRESS_TRACKING)
+  unsigned long long timestamp ;
+#endif
+} rcv_per_slot_t ;
+
+typedef struct {
+  unsigned int  partner_ip_address ;
+  unsigned int  partner_xyz ;
+} learned_address_entry ;
+
+typedef struct {
+  rcv_per_slot_t * rcv_per_slot_vector ;
+  struct sk_buff ** skb_per_slot_vector ;
+} rcv_t ;
+
+static inline char * get_rcv_payload(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->rcv_per_slot_vector[slot_index].payload ;
+  }
+
+static inline void set_rcv_payload(rcv_t *rcv, unsigned int slot_index, char * payload )
+  {
+    rcv->rcv_per_slot_vector[slot_index].payload = payload ;
+  }
+
+static inline unsigned int get_proposals_active(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->rcv_per_slot_vector[slot_index].proposals_active ;
+  }
+
+static inline void set_proposals_active(rcv_t *rcv, unsigned int slot_index, unsigned int proposals_active )
+  {
+    rcv->rcv_per_slot_vector[slot_index].proposals_active = proposals_active ;
+  }
+
+static inline char * get_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->rcv_per_slot_vector[slot_index].payload_alert ;
+  }
+
+static inline void set_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index, char * payload_alert )
+  {
+    rcv->rcv_per_slot_vector[slot_index].payload_alert = payload_alert ;
+  }
+
+static inline unsigned int get_rcv_expect(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->rcv_per_slot_vector[slot_index].expect ;
+  }
+
+static inline void set_rcv_expect(rcv_t *rcv, unsigned int slot_index, unsigned int expect)
+  {
+    rcv->rcv_per_slot_vector[slot_index].expect = expect ;
+  }
+
+static inline int get_rcv_lastcell(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->rcv_per_slot_vector[slot_index].lastcell ;
+  }
+
+static inline void set_rcv_lastcell(rcv_t *rcv, unsigned int slot_index, int lastcell)
+  {
+    rcv->rcv_per_slot_vector[slot_index].lastcell = lastcell ;
+  }
+
+static inline struct sk_buff * get_rcv_skb(rcv_t *rcv, unsigned int slot_index)
+  {
+    return rcv->skb_per_slot_vector[slot_index] ;
+  }
+
+static inline void set_rcv_skb(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb)
+  {
+    rcv->skb_per_slot_vector[slot_index] = skb ;
+  }
+
+static inline void init_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+	skb_queue_head_init(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+static inline void enq_pending_flow(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb)
+{
+	skb_queue_tail(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow,skb) ;
+}
+
+static inline struct sk_buff * deq_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+	return skb_dequeue(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+static inline unsigned int count_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+	return skb_queue_len(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+#if defined(USE_ADAPTIVE_ROUTING)
+static inline struct sk_buff * get_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+{
+	return rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] ;
+}
+
+static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) __attribute__((unused)) ;
+static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+{
+	rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] = skb ;
+}
+#if defined(RESEQUENCE_ARRIVALS)
+  static inline struct sk_buff * get_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+  {
+	  return rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] ;
+  }
+  static inline void set_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+  {
+	  rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] = skb;
+  }
+  static inline int get_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index)
+  {
+	  return rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery ;
+  }
+  static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) __attribute__((unused)) ;
+  static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+  {
+	  rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery=conn_id ;
+  }
+
+#endif
+
+#endif
+
+static inline unsigned long long get_timestamp(rcv_t *rcv, unsigned int slot_index)
+  {
+#if defined(ENABLE_PROGRESS_TRACKING)
+    return rcv->rcv_per_slot_vector[slot_index].timestamp ;
+#else
+    return 0 ;
+#endif
+  }
+
+static inline void set_timestamp(rcv_t *rcv, unsigned int slot_index, unsigned long long timestamp)
+  {
+#if defined(ENABLE_PROGRESS_TRACKING)
+    rcv->rcv_per_slot_vector[slot_index].timestamp=timestamp ;
+#endif
+  }
+
+enum {
+	k_slots_per_node = 1 ,  /*  down from 4 ... */
+	k_connids_per_node = 128  /*  Number of conn-ids we track per node on the sending side */
+};
+static inline void allocate_rcv(rcv_t *rcv, unsigned int node_count)
+  {
+    rcv->rcv_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(rcv_per_slot_t), GFP_KERNEL) ;
+    BUG_ON(NULL == rcv->rcv_per_slot_vector) ;
+    memset(rcv->rcv_per_slot_vector,0,k_slots_per_node*node_count*sizeof(rcv_per_slot_t)) ;
+    rcv->skb_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(struct sk_buff *), GFP_KERNEL) ;
+    BUG_ON(NULL == rcv->skb_per_slot_vector) ;
+    BUG_ON(NULL == rcv->skb_per_slot_vector) ;
+    {
+	    unsigned int slot ;
+	    for(slot=0;slot<node_count;slot+=1)
+		    {
+			    init_pending_flow(rcv,slot) ;
+		    }
+    }
+  }
+
+#if defined(USE_ADAPTIVE_ROUTING)
+
+extern ulong bgp_dma_adaptive_frame_limit ;
+
+typedef struct {
+	atomic_t * conn_id ;
+#if defined(USE_SKB_TO_SKB)
+	struct sk_buff **skb ;
+#endif
+} tx_t ;
+
+static inline void init_tx_conn_id(tx_t *tx, unsigned int slot_index)
+{
+	atomic_set(tx->conn_id+slot_index,0xffffffff) ;
+}
+
+static inline void allocate_tx(tx_t *tx, unsigned int node_count)
+  {
+    tx->conn_id = kmalloc(k_slots_per_node*node_count*sizeof(atomic_t), GFP_KERNEL) ;
+    BUG_ON(NULL == tx->conn_id) ;
+    {
+	    int x ;
+	    for(x=0;x<node_count;x+=1)
+		    {
+			    init_tx_conn_id(tx,x) ;
+		    }
+    }
+#if defined(USE_SKB_TO_SKB)
+    tx->skb = kmalloc(k_connids_per_node*node_count*sizeof(struct sk_buff *),GFP_KERNEL) ;
+#endif
+    BUG_ON(NULL == tx->skb) ;
+    memset(tx->skb,0,k_connids_per_node*node_count*sizeof(struct sk_buff *)) ;
+  }
+
+static inline unsigned int take_tx_conn_id(tx_t *tx, unsigned int slot_index)
+{
+	unsigned int rc= atomic_inc_return(tx->conn_id+slot_index) ;
+	TRACEN(k_t_general,"slot_index=0x%08x conn_id=0x%08x",slot_index,rc) ;
+	return rc ;
+}
+#if defined(USE_SKB_TO_SKB)
+static inline struct sk_buff * get_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id)
+{
+	return tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] ;
+}
+static inline void set_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+{
+	tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] = skb ;
+}
+
+#endif
+
+#endif
+
+/*  End of 'packets' style section */
+enum {
+  k_desired_reception_memory_fifo_size =
+#if defined(CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT)
+    1 << (CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT)
+#else
+    1 << 22   /*  Try 4MB as a static region, if not set externally */
+/*     1 << 20  // Try 1MB as a static region, if not set externally */
+#endif
+} ;
+enum {
+  k_metadata_injection_memory_fifo_size = 4096 ,
+  k_bulk_injection_memory_fifo_size = 4096
+};
+
+typedef struct {
+  char buffer[k_metadata_injection_memory_fifo_size] ;
+} metadata_injection_memoryfifo_t ;
+
+typedef struct {
+  char buffer[k_bulk_injection_memory_fifo_size] ;
+} bulk_injection_memoryfifo_t ;
+
+
+#if defined(BARRIER_WITH_IOCTL)
+enum {
+	k_diag_target_data_size = 1<<20 ,  /*  Aim up to 1MB ... */
+	k_diag_packet_count = k_diag_target_data_size/k_injection_packet_size ,  /*  Rounding down for packets ... */
+};
+typedef struct {
+	char buffer[k_diag_target_data_size] ;
+} diag_block_buffer_t ;
+
+static inline diag_block_buffer_t * allocate_diag_block_buffer(void)
+{
+	diag_block_buffer_t * result = kmalloc(k_diag_target_data_size,GFP_KERNEL) ;
+	BUG_ON(NULL == result) ;
+	return result ;
+}
+
+static inline unsigned int * allocate_shuffle_vector(unsigned int xe, unsigned int ye, unsigned int ze)
+{
+	unsigned int * result = kmalloc(xe*ye*ze*sizeof(unsigned int),GFP_KERNEL) ;
+	BUG_ON(NULL == result) ;
+	return result ;
+}
+#endif
+
+
+enum {
+  k_Dimensionality = 3
+};
+
+typedef struct {
+  unsigned char coordinate[k_Dimensionality] ;
+} torusLocation_t ;
+
+typedef enum {
+	k_send_propose_rpc ,
+	k_act_propose_rpc ,
+	k_send_accept_rpc ,
+	k_act_accept_rpc ,
+
+	k_defer_accept_rpc_counters ,
+	k_defer_accept_rpc_nodeflow ,
+	k_send_eager ,
+	k_receive_eager ,
+
+	k_no_reception_counter ,
+	k_parked ,
+	k_scattergather ,
+	k_receive_incomplete ,
+
+	k_headlength_zero ,
+	k_fraglength_zero ,
+	k_accept_audit_fail ,
+	k_receive_audit_fail ,
+
+	k_counted_length_mismatch ,
+	k_reordered ,
+	k_queue_filled_propose_fifo ,
+
+	k_siw_placement_hit ,
+	k_siw_placement_miss ,
+
+	k_flow_counters
+} flowpoint_e ;
+
+#if defined(CONFIG_BGP_STATISTICS)
+extern int reception_fifo_histogram[33] ;
+extern int reception_hi_watermark ;
+extern int rtt_histogram[33] ;
+extern int transit_histogram[33] ;
+#endif
+
+enum {
+	k_pending_rcv_skb_classes = 6
+};
+typedef struct {
+	struct sk_buff_head pending_rcv_skbs ; /* List of sk_buffs awaiting a reception counter */
+	unsigned int outstanding_counters ; /* Number of counters awaiting completion in this direction */
+} bgp_dma_balancer_direction ;
+typedef struct {
+	bgp_dma_balancer_direction b[k_pending_rcv_skb_classes] ;
+} bgp_dma_balancer ;
+typedef struct {
+  torusLocation_t location ;
+  torusLocation_t extent ;
+   /*  Number of bits required to represent a node in each torus dimension */
+  unsigned int xbits ;
+  unsigned int ybits ;
+  unsigned int zbits ;
+  /* Which quarter of the DMA unit we should use */
+  unsigned int active_quarter ; /* 0 .. 3 */
+  unsigned int is_up ; // Whether the interface is 'up'
+
+    DMA_RecFifoGroup_t * recFifoGroup;
+     rcv_t rcvdemux ;  /*  Reception demultiplex */
+#if defined(USE_ADAPTIVE_ROUTING)
+     tx_t tx_mux ;  /*  Transmission multiplexer (conn_ids by slot) */
+#endif
+     unsigned int node_count ;  /*  Total number of nodes in the block */
+     unsigned int node_slot_mask ;  /*  ((node_count << 2)-1) , for bit-masking to firewall check received data */
+#ifdef ENABLE_PACKETS
+    DMA_InjFifoGroup_t   injFifoGroupPackets;
+    int injFifoPacketsIds[ k_injecting_cores*k_injecting_directions ];
+    int proto_issue_packets ;
+
+     /*  End of packets-style interface */
+#endif
+    idma_t idma ;  /*  Injection DMA buffering */
+#ifdef ENABLE_PACKETS
+    unsigned short int injFifoPacketsPri[ k_injecting_cores*k_injecting_directions ] ;
+    unsigned short int injFifoPacketsLoc[ k_injecting_cores*k_injecting_directions ] ;
+    unsigned char      injFifoPacketsMap[ k_injecting_cores*k_injecting_directions ] ;
+#endif
+    struct sk_buff_head inj_queue[k_injecting_directions] ;   /* Lists of skb's queued because DMA buffers have no space */
+    unsigned int packets_received_count ;
+    struct timer_list runway_check_timer ;
+    struct timer_list transmission_free_skb_timer ;
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+    struct timer_list torus_missed_interrupt_timer ;
+#endif
+#ifdef ENABLE_FRAMES
+    DMA_InjFifoGroup_t   injFifoGroupFrames;
+    int injFifoFramesIds[ k_injecting_cores*k_injecting_directions ];
+    int proto_issue_frames_single ;
+#if defined(USE_ADAPTIVE_ROUTING)
+    int proto_issue_frames_adaptive ;
+#endif
+#if defined(USE_SKB_TO_SKB)
+    int proto_transfer_propose ;
+    int eager_limit ;  /*  frames larger than this to be sent with skb-to-skb DMA */
+    int flow_counter[k_flow_counters] ;
+#endif
+#if defined(BARRIER_WITH_IOCTL)
+    int proto_issue_diag_sync ;
+    diag_block_buffer_t * diag_block_buffer ;
+    unsigned int * shuffle_vector ;
+    unsigned int shuffle_seed ;
+    int prev_tbl ;
+    unsigned int timing_histogram_buckets[33] ;
+#endif
+    unsigned short int injFifoFramesPri[ k_injecting_cores*k_injecting_directions ] ;
+    unsigned short int injFifoFramesLoc[ k_injecting_cores*k_injecting_directions ] ;
+    unsigned char      injFifoFramesMap[ k_injecting_cores*k_injecting_directions ] ;
+#endif
+
+    DMA_CounterGroup_t   injCounterGroup;
+    DMA_CounterGroup_t   recCounterGroup;
+
+    void * receptionfifo ;
+    unsigned int receptionfifoSize ;
+
+    unsigned int mtu ;
+    unsigned int max_packets_per_frame ;
+
+    DMA_RecFifoMap_t recMap;   /*  rec fifo map structure */
+
+
+
+#if defined(USE_SKB_TO_SKB)
+    int injCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+    int recCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+    char recCntrInUse [ DMA_NUM_COUNTERS_PER_GROUP ] ;
+    int qtyFreeRecCounters ;
+    int scanRecCounter ;
+    struct sk_buff * inj_skbs[DMA_NUM_COUNTERS_PER_GROUP] ;
+    struct sk_buff * rcv_skbs[DMA_NUM_COUNTERS_PER_GROUP] ;
+    unsigned int slot_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+    unsigned char conn_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+    int rcv_timestamp[DMA_NUM_COUNTERS_PER_GROUP] ;
+    int rcv_checked_time ;
+    bgp_dma_balancer balancer ;
+    atomic_t framesProposed ;
+    unsigned int framesDisposed ;
+#endif
+#if defined(ENABLE_SIW_PLACEMENT)
+    dma_addr_t (*siw_placement_callback)(struct sk_buff *skb) ;
+#endif
+
+    unsigned short int memcpyInjFifoFramesPri[ k_injecting_cores ] ;
+    unsigned short int memcpyInjFifoFramesLoc[ k_injecting_cores ] ;
+    unsigned char      memcpyInjFifoFramesMap[ k_injecting_cores ] ;
+    DMA_InjFifoGroup_t   memcpyInjFifoGroupFrames;
+    int memcpyInjFifoFramesIds[ k_injecting_cores ];
+    DMA_CounterGroup_t   memcpyRecCounterGroup;
+    int memcpyRecCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+
+    int proto_diagnose ; /* 'diagnose' frame to software reception FIFO */
+
+    unsigned int SW_Arg ;  /* / 'Software Arg', we send our {x,y,z} */
+    unsigned int src_key ;  /*  'source key', we send rank */
+
+
+    spinlock_t dirInjectionLock[k_injecting_cores*k_injecting_directions] ;  /*  serialise access to injection FIFOs */
+
+    void * previousActor ;  /*  FIFO address of previous Actor, for detecting replays */
+
+
+     /*  sysctl entries */
+    struct ctl_table_header * sysctl_table_header ;
+/* Statistics */
+
+    struct net_device_stats * device_stats ;
+    unsigned int count_no_skbuff ;
+    unsigned int tx_by_core[4] ;
+    unsigned int tx_in_use_count[k_injecting_directions+1] ;
+#if defined(KEEP_TCP_FLAG_STATS)
+    unsigned int tcp_received_flag_count[8] ;
+#endif
+/*  Tuning parameters */
+    int tuning_num_packets ;  /*  = 1 , number of packets to process per poll call */
+    int tuning_num_empty_passes ;  /*  = 512 , number of times to spin before returning */
+    int tuning_non_empty_poll_delay ;  /*  = 850 , number of cycles to spin between looks at the FIFO */
+    int tuning_poll_after_enabling ;  /*  = 1 , whether to poll again after enabling for interrupts */
+    int tuning_run_handler_on_hwi ;  /*  = 1 , whether to run the hander on FIFO hardware interrupts (as well as rDMA ones) */
+    int tuning_clearthresh_slih ;  /*  = 1 , whether to clear the 'threshold crossed' bit in the slih */
+    int tuning_clearthresh_flih ;  /*  = 1 , whether to clear the 'threshold crossed' bit in the flih */
+    int tuning_disable_in_dcr ;  /*  = 1, whether to toggle the DCR interrupt enable/disable */
+    int tuning_injection_hashmask ;  /*  = 3, whether to mask down the number of injection FIFOs in use per direction */
+
+    int tuning_recfifo_threshold ;  /*  for moving to/from DCR */
+    int tuning_dcr_c8b ;  /*  for moving to/from DCR */
+    int tuning_enable_hwfifo ;  /*  For registering/unregistering 'hardware FIFO' interrupts */
+
+    int tuning_exploit_reversepropose ;  /*  Whether to try the 'reverse propose' protocol */
+    int tuning_counters_per_source ;  /*  How many reception counters to commit per source node */
+    int tuning_defer_skb_until_counter ; /* Whether to defer sk_buff allocation until a reception counter is available */
+    int tuning_deliver_eagerly ; /* Whether to skip the 'resequence arrivals' step */
+    int tuning_diagnose_rst ; /* Whether to cut trace records when being asked to send a TCP segment with a 'rst' */
+
+    int tuning_select_fifo_algorithm ; /* Which FIFO selection algorithm to use (head-of-line block minimisation) */
+
+    int tuning_min_icsk_timeout ;  /*  What to push ICSK retransmit timeout up to if we find it low */
+
+    int tuning_virtual_channel ; /* Which virtual channel to use (i.e. whether to force deterministic routing) */
+
+    int tuning_enable_siw_placement ; /* Whether to allow siw to call for direct placement */
+
+    int tuning_prep_dcmf ;/* Whether to get ready for DCMF at 'ifconfig down' time */
+
+  unsigned int block_id ;
+  unsigned char i_am_compute_node ;
+  unsigned char bluegene_tcp_is_built ;
+  unsigned char is_torus_x ;
+  unsigned char is_torus_y ;
+  unsigned char is_torus_z ;
+  unsigned char last_queue_picked ;
+#if defined(CONFIG_BGP_STATISTICS)
+  unsigned int resequence_histogram[k_concurrent_receives] ;
+  unsigned long long  bytes_sent ;
+  unsigned long long  bytes_received ;
+#endif
+  unsigned int configured_quarter ;
+} dma_tcp_t ;
+
+// Intent to allow the 'quarter' of the DMA hardware to be chosen at runtime before 'ifconfig up'
+static inline int dma_tcp_InjectionFifoGroup(dma_tcp_t  * dma_tcp)
+  {
+    return dma_tcp->active_quarter ;
+  }
+static inline int dma_tcp_ReceptionFifoGroup(dma_tcp_t  * dma_tcp)
+  {
+    return dma_tcp->active_quarter ;
+  }
+static inline int dma_tcp_InjectionCounterGroup(dma_tcp_t  * dma_tcp)
+  {
+    return dma_tcp->active_quarter ;
+  }
+static inline int dma_tcp_ReceptionCounterGroup(dma_tcp_t  * dma_tcp)
+  {
+    return dma_tcp->active_quarter ;
+  }
+
+typedef enum {
+  k_VC_ordering = DMA_PACKET_VC_BN ,   /*  virtual channel to use when we want to order things, 'Bubble Normal' */
+  k_VC_anyway = DMA_PACKET_VC_D0       /*  virtual channel to use otherwise ... 'Dynamic 0' */
+} VC_e ;
+
+static inline unsigned int virtual_channel(dma_tcp_t *dma_tcp, VC_e channel_hint)
+{
+	return k_configurable_virtual_channel ? dma_tcp->tuning_virtual_channel : channel_hint ;
+}
+
+static inline void instrument_flow(dma_tcp_t *dma_tcp,flowpoint_e flowpoint)
+{
+	dma_tcp->flow_counter[flowpoint] += 1 ;
+}
+
+static inline unsigned int flow_count(dma_tcp_t *dma_tcp,flowpoint_e flowpoint)
+{
+	return dma_tcp->flow_counter[flowpoint] ;
+}
+
+extern dma_tcp_t dma_tcp_state ;
+
+void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount ) ;
+void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) ;
+
+void bgp_dma_tcp_empty_fifo_callback(void) ;
+
+extern void bluegene_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+extern void bluegene_bic_disable_irq(unsigned int irq) ;
+
+int bgnet_receive_torus(struct sk_buff * skb) ;
+int bgtornet_receive_torus(struct sk_buff * skb) ;
+struct net_device_stats *bgtornet_stats(void) ;
+
+void bgtornet_rx_schedule(void) ;
+
+
+static inline int DMA_CounterSetValueWideOpen(
+                                          DMA_Counter_t *c_sw,
+                                          unsigned int   value
+                                         )
+{
+  unsigned int pa_base=0, pa_max=0xffffffff;
+  SPI_assert( c_sw != NULL );
+  c_sw->pa_base = pa_base;
+  c_sw->pa_max = pa_max;
+
+  /*
+   * Write the value, base, and max to the hardware counter
+   */
+  DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr,
+                               value,
+                               pa_base,
+                               pa_max);
+
+  return (0);
+}
+
+static inline  int DMA_CounterSetValueWideOpenById(
+                                     DMA_CounterGroup_t *cg_ptr,
+                                     int                 counter_id ,
+                                     unsigned int   value
+                                    )
+  {
+    int rc;
+
+    SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+    SPI_assert( cg_ptr != NULL );
+    SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+             _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+    rc = DMA_CounterSetValueWideOpen( &cg_ptr->counter[counter_id], value ) ;
+
+     /*  Note: it is assumed that the above function call performs an MBAR */
+
+    return rc;
+
+  }
+
+/*  Choose a transmission FIFO for a stream. This is 'approximately' the deterministic routing algorithm */
+/*  (I think it is 'exactly' the deterministic routing algorithm, with the possible exception of what the hardware will do */
+/*   if you send a packet to something half-way-round in one of the torus dimensions) */
+/*  Return -1 if it is an attempted 'self-send'; this has to be done as a local DMA or a memcpy, not as a torus op */
+static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z) __attribute__ ((unused)) ;
+static inline int sign_extend(int d, unsigned int bb)
+{
+	return (d << (32-bb)) >> (32-bb) ;
+}
+static inline int resolve_direction(int d, unsigned int is_torus, unsigned int bb,  int v0, int v1)
+{
+	if( is_torus) d = sign_extend(d,bb) ;
+	return (d<0) ? v1 : v0 ;
+}
+static int select_transmission_fifo_v(dma_tcp_t *dma_tcp, unsigned int x0,unsigned int x, unsigned int y0,unsigned int y, unsigned int z0,unsigned int z)
+  {
+	  switch(dma_tcp->tuning_select_fifo_algorithm)
+	  {
+		  case 0:
+		  case 1:
+			  {
+				  int dx = x0-x ;
+				  int dy = y0-y ;
+				  int dz = z0-z ;
+				  if( dx != 0 ) return resolve_direction(dx, dma_tcp->is_torus_x,dma_tcp->xbits, 1, 0) ;
+				  if( dy != 0 ) return resolve_direction(dy, dma_tcp->is_torus_y,dma_tcp->ybits, 3, 2) ;
+				  return resolve_direction(dz,dma_tcp->is_torus_z,dma_tcp->zbits, 5, 4) ;
+			  }
+		  default:
+			  /*   rank modulo 6 ... */
+			  	  return ((x<<(dma_tcp->ybits+dma_tcp->zbits)) | (y<<(dma_tcp->zbits)) | (z)) % 6 ;
+
+	  }
+  }
+
+static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z)
+{
+	return select_transmission_fifo_v(dma_tcp,dma_tcp->location.coordinate[0],x,dma_tcp->location.coordinate[1],y,dma_tcp->location.coordinate[2],z) ;
+}
+
+/*  Report the transmission FIFO that a remote node will use to reach this node */
+static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0) __attribute__ ((unused)) ;
+static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+	return select_transmission_fifo_v(dma_tcp,x0,dma_tcp->location.coordinate[0],y0,dma_tcp->location.coordinate[1],z0,dma_tcp->location.coordinate[2]) ;
+}
+
+
+
+int handleSocketsRecvMsgActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           ) ;
+int handleSocketsRecvMsgCompletedActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           ) ;
+int handleSocketsBufferActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           ) ;
+
+
+#ifdef ENABLE_PACKETS
+void dma_tcp_packets_init(dma_tcp_t *dma_tcp) ;
+int bgp_dma_tcp_send_and_free_packets( struct sk_buff *skb
+                    ) ;
+void dma_tcp_packets_show_counts(dma_tcp_t *dma_tcp) ;
+
+#endif
+#ifdef ENABLE_FRAMES
+void dma_tcp_frames_init(dma_tcp_t *dma_tcp) ;
+void dma_tcp_frames_ifup(dma_tcp_t *dma_tcp) ;
+void dma_tcp_frames_ifdown(dma_tcp_t *dma_tcp) ;
+int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb
+                    ) ;
+#endif
+
+/*  ethem codings are ... */
+/*  0 : run things on the tree */
+/*  1 : run things with 'actors' and DMA to/from SKBUFFs */
+/*  2 : run things with 'messages' between memory FIFOs */
+/*  3 : send both (1) and (2), for bringup. */
+/*  until it's working correctly, we will deliver the '1' eth frames and discard the '2' eth frames at the receiver. */
+/*  Additionally we can set a '4' bit, which will send packets over the tree; */
+/*   so we could set '6' and get a working tree drive, and 'messages' flows to go through the motions on a prototype driver without any 'actors' flows */
+
+extern int bgp_dma_ethem ;
+
+/**********************************************************************
+ * DCR access wrapper
+ **********************************************************************/
+
+static inline uint32_t mfdcrx(uint32_t dcrn)
+{
+    uint32_t value;
+    asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory");
+    return value;
+}
+
+static inline void mtdcrx(uint32_t dcrn, uint32_t value)
+{
+    asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory");
+}
+
+
+static void dumpdmadcrs(unsigned int tracelevel) __attribute__ ((unused)) ;
+static void dumpdmadcrs(unsigned int tracelevel)
+  {
+    int x ;
+    for(x=0xd00; x<=0xdff ; x += 8 )
+      {
+        int d0 __attribute__ ((unused)) = mfdcrx(x) ;
+        int d1 __attribute__ ((unused)) = mfdcrx(x+1) ;
+        int d2 __attribute__ ((unused)) = mfdcrx(x+2) ;
+        int d3 __attribute__ ((unused)) = mfdcrx(x+3) ;
+        int d4 __attribute__ ((unused)) = mfdcrx(x+4) ;
+        int d5 __attribute__ ((unused)) = mfdcrx(x+5) ;
+        int d6 __attribute__ ((unused)) = mfdcrx(x+6) ;
+        int d7 __attribute__ ((unused)) = mfdcrx(x+7) ;
+        TRACEN(tracelevel,"Torus DMA dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x",
+            x,d0,d1,d2,d3,d4,d5,d6,d7
+            ) ;
+      }
+  }
+
+static void dumptorusdcrs(void) __attribute__ ((unused)) ;
+static void dumptorusdcrs(void)
+  {
+    int x ;
+    for(x=0xc80; x<=0xc8f ; x += 8 )
+      {
+        int d0 __attribute__ ((unused)) = mfdcrx(x) ;
+        int d1 __attribute__ ((unused)) = mfdcrx(x+1) ;
+        int d2 __attribute__ ((unused)) = mfdcrx(x+2) ;
+        int d3 __attribute__ ((unused)) = mfdcrx(x+3) ;
+        int d4 __attribute__ ((unused)) = mfdcrx(x+4) ;
+        int d5 __attribute__ ((unused)) = mfdcrx(x+5) ;
+        int d6 __attribute__ ((unused)) = mfdcrx(x+6) ;
+        int d7 __attribute__ ((unused)) = mfdcrx(x+7) ;
+        TRACEN(k_t_request,"Torus control dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+            x,d0,d1,d2,d3,d4,d5,d6,d7
+            ) ;
+      }
+  }
+
+#if defined(REQUIRES_DUMPMEM)
+static inline char cfix(char x) __attribute__ ((unused)) ;
+static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+
+static inline char cfix(char x)
+  {
+    return ( x >= 0x20 && x < 0x80 ) ? x : '.' ;
+  }
+static void dumpmem(const void *address, unsigned int length, const char * label)
+  {
+    int x ;
+    TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(>)Memory dump length=0x%08x: %s",length,label) ;
+    for (x=0;x<length;x+=32)
+      {
+        int *v __attribute__ ((unused)) = (int *)(address+x) ;
+        char *c __attribute__ ((unused)) = (char *)(address+x) ;
+        TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
+            v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],
+            cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]),
+            cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]),
+            cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]),
+            cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]),
+            cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]),
+            cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]),
+            cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]),
+            cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31])
+                    ) ;
+      }
+    TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(<)Memory dump") ;
+  }
+
+static void dumpframe(const void *address, unsigned int length, const char * label)
+  {
+    int x ;
+    unsigned int limlen = (length>1024) ? 1024 : length ;
+    TRACEN(k_t_fifocontents,"(>)ethframe dump length=%d: %s",length,label) ;
+    for (x=0;x<limlen;x+=32)
+      {
+        int *v __attribute__ ((unused)) = (int *)(address+x) ;
+        char *c __attribute__ ((unused)) = (char *)(address+x) ;
+        TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
+            v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],
+            cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]),
+            cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]),
+            cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]),
+            cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]),
+            cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]),
+            cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]),
+            cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]),
+            cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31])
+                    ) ;
+      }
+    TRACEN(k_t_fifocontents,"(<)ethframe dump") ;
+  }
+#else
+static inline void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static inline void dumpmem(const void *address, unsigned int length, const char * label)
+  {
+  }
+static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static void dumpframe(const void *address, unsigned int length, const char * label)
+  {
+  }
+#endif
+
+static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup) __attribute__ ((unused)) ;
+static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup)
+  {
+    TRACEN(k_t_request,"(>)recFifoGroup=%p",recFifoGroup) ;
+    if( recFifoGroup != NULL )
+      {
+        TRACEN(k_t_request,"group_id=%d num_normal_fifos=%d num_hdr_fifos=%d mask=%08x status_ptr=%p",
+            recFifoGroup->group_id,recFifoGroup->num_normal_fifos,recFifoGroup->num_hdr_fifos,recFifoGroup->mask,recFifoGroup->status_ptr
+            ) ;
+        TRACEN(k_t_request,"not_empty=%08x%08x available=%08x%08x threshold_crossed=%08x%08x",
+            recFifoGroup->status_ptr->not_empty[0],recFifoGroup->status_ptr->not_empty[1],
+            recFifoGroup->status_ptr->available[0],recFifoGroup->status_ptr->available[1],
+            recFifoGroup->status_ptr->threshold_crossed[0],recFifoGroup->status_ptr->threshold_crossed[1]
+            ) ;
+        TRACEN(k_t_request,"fifos[0] global_fifo_id=%d type=%d num_packets_processed_since_moving_fifo_head=%d",
+            recFifoGroup->fifos[0].global_fifo_id,
+            recFifoGroup->fifos[0].type,
+            recFifoGroup->fifos[0].num_packets_processed_since_moving_fifo_head
+        ) ;
+        TRACEN(k_t_request,"fifos[0] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p %s",
+            recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr,
+            recFifoGroup->fifos[0].dma_fifo.free_space,
+            recFifoGroup->fifos[0].dma_fifo.fifo_size,
+            recFifoGroup->fifos[0].dma_fifo.pa_start,
+            recFifoGroup->fifos[0].dma_fifo.va_start,
+            recFifoGroup->fifos[0].dma_fifo.va_head,
+            recFifoGroup->fifos[0].dma_fifo.va_tail,
+            recFifoGroup->fifos[0].dma_fifo.va_end,
+            (recFifoGroup->fifos[0].dma_fifo.free_space != recFifoGroup->fifos[0].dma_fifo.fifo_size) ? "!!!" : ""
+        ) ;
+        if( recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr != NULL )
+          {
+            TRACEN(k_t_request,"hwfifos[0] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s",
+                recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_start,
+                recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_end,
+                recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head,
+                recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail,
+                (recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head != recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail) ? "!!!" : ""
+                ) ;
+          }
+      }
+    TRACEN(k_t_request,"(<)") ;
+
+  }
+
+static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup) __attribute__ ((unused)) ;
+static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup)
+  {
+    TRACEN(k_t_request,"(>)injFifoGroup=%p",injFifoGroup) ;
+    if( injFifoGroup != NULL )
+      {
+        DMA_InjFifoStatus_t *injStatus = injFifoGroup->status_ptr ;
+        int x ;
+        TRACEN(k_t_request,"status_ptr=%p permissions=0x%08x group_id=%d",
+            injFifoGroup->status_ptr, injFifoGroup->permissions, injFifoGroup->group_id) ;
+        if( injStatus)
+          {
+            unsigned int available = injStatus->available ;
+            TRACEN(k_t_request,"status not_empty=0x%08x available=0x%08x threshold_crossed=0x%08x activated=0x%08x",
+                injStatus->not_empty, available, injStatus->threshold_crossed, injStatus->activated
+            ) ;
+            for( x=0; x<DMA_NUM_INJ_FIFOS_PER_GROUP; x+=1)
+              {
+                if( (0x80000000 >> x) & available)
+                  {
+                    DMA_InjFifo_t *fifo=injFifoGroup->fifos+x ;
+                    DMA_FifoHW_t *hw_ptr = fifo->dma_fifo.fifo_hw_ptr ;
+                    if( fifo->occupiedSize)
+                      {
+                    TRACEN(k_t_request, " fifos[%d] fifo_id=%d desc_count=0x%08x%08x occupiedSize=0x%08x priority=%d local=%d ts_inj_map=0x%02x %s",
+                        x, fifo->fifo_id, (unsigned int)(fifo->desc_count >> 32),(unsigned int)(fifo->desc_count), fifo->occupiedSize, fifo->priority, fifo->local, fifo->ts_inj_map,
+                        (fifo->occupiedSize) ? "!!!" : ""
+                    ) ;
+                      }
+                    if( fifo->dma_fifo.va_head != fifo->dma_fifo.va_tail)
+                      {
+                    TRACEN(k_t_request," fifos[%d] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p",
+                        x,
+                        hw_ptr,
+                        fifo->dma_fifo.free_space,
+                        fifo->dma_fifo.fifo_size,
+                        fifo->dma_fifo.pa_start,
+                        fifo->dma_fifo.va_start,
+                        fifo->dma_fifo.va_head,
+                        fifo->dma_fifo.va_tail,
+                        fifo->dma_fifo.va_end
+                    ) ;
+                      }
+                    if( hw_ptr)
+                      {
+                        if( hw_ptr->pa_head != hw_ptr->pa_tail)
+                          {
+                        TRACEN(k_t_request," hwfifos[%d] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s",
+                            x,
+                            hw_ptr->pa_start,
+                            hw_ptr->pa_end,
+                            hw_ptr->pa_head,
+                            hw_ptr->pa_tail,
+                            (hw_ptr->pa_head != hw_ptr->pa_tail) ? "!!!" : ""
+                            ) ;
+                          }
+                      }
+                  }
+              }
+          }
+      }
+    TRACEN(k_t_request,"(<)") ;
+  }
+
+static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu) __attribute__ ((unused)) ;
+static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu)
+  {
+    unsigned int max_packets_per_frame=(mtu+k_torus_link_payload_size-1) / k_torus_link_payload_size ;
+    unsigned int max_packets_per_frame2=(mtu+k_injection_packet_size-1) / k_injection_packet_size ;
+    unsigned int mtu1=max_packets_per_frame * k_torus_link_payload_size + k_torus_skb_alignment ;
+    unsigned int mtu2=max_packets_per_frame2 * k_injection_packet_size + k_torus_skb_alignment ;
+    dma_tcp->max_packets_per_frame = max_packets_per_frame ;
+    dma_tcp->mtu = (mtu1>mtu2) ? mtu1 : mtu2 ;
+  }
+
+/*  Test if we think a socket is affected by torus congestion */
+unsigned int bgp_torus_congestion(struct sock *sk) ;
+
+
+static inline unsigned int stack_pointer(void)
+{
+    uint32_t value;
+    asm volatile ("mr %0,1": "=r" (value) );
+    return value;
+}
+
+/*  Fragment reassembly control for 'frames' */
+/*
+ * When the first packet of a frame arrives, examine the eth and ip headers to allocate a skbuff which will have
+ * enough data for the frame. Arrange to assemble the first fragment into the data area.
+ *
+ * When the last packet of a fragment arrives, we know whether the frame is complete. If it is a one-frag frame,
+ * hand it off. I
+ */
+
+typedef struct
+{
+	unsigned int frame_size ;   /*  IP frame size, from IP header */
+	unsigned int frag_size ;   /*  fragment size */
+	unsigned int frag_pad_head ;  /*  Displacement of first byte of first fragment from alignment */
+	unsigned int fragment_index ;  /*  Index of fragment, starts at 0 */
+	unsigned int bytes_accounted_for ;  /*  Number of bytes in accounted for including the current fragment */
+	unsigned char * frag_base ;  /*  Where to pack this frag down to */
+	unsigned char * frag_data ;  /*  First byte free after current fragment is received */
+	unsigned char * frag_payload ;  /*  Aligned address to drop first packet of next fragment into skb */
+} fragment_reassembler;
+
+static inline fragment_reassembler * frag_re(struct sk_buff *skb)
+{
+	return (fragment_reassembler *) &(skb->cb) ;
+}
+
+void dma_tcp_show_reception(dma_tcp_t * dma_tcp) ;
+
+int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos) ;
+extern int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ;
+static void show_dma_descriptor(DMA_InjDescriptor_t *d) __attribute((unused)) ;
+static void show_dma_descriptor(DMA_InjDescriptor_t *d)
+{
+	unsigned int * di = (unsigned int *) d ;
+	TRACEN(k_t_request,"DMA_InjDescriptor_t(0x%08x 0x%08x 0x%08x 0x%08x (0x%08x 0x%08x 0x%08x 0x%08x))",
+			d->word1, d->word2, d->base_offset, d->msg_length, d->hwHdr.word0, d->hwHdr.word1, d->hwHdr.word2, d->hwHdr.word3) ;
+	TRACEN(k_t_request,"prefetch_only=%d local_copy=%d",(di[0] >> 1)& 1,di[0] & 1) ;
+}
+
+typedef struct
+{
+	long long int sxx ;
+	long long int sxy ;
+/* 	long long int m0 ; */
+/* 	long long int m1 ; */
+/* 	long long int det ; */
+	int s1 ;
+	int sx ;
+	int sy ;
+} dma_statistic_t ;
+extern dma_statistic_t bgp_dma_rate ;
+
+enum {
+  k_injCounterId = 0 // Injection counter number to use
+} ;
+
+/*  Support for freeing 'a few' skbuffs when outbound DMA is complete each time we go around */
+enum {
+	k_skb_group_count = 8
+};
+typedef struct {
+	unsigned int count ;
+	struct sk_buff * group[k_skb_group_count] ;
+} skb_group_t ;
+static void skb_group_init(skb_group_t * skb_group) __attribute__((unused)) ;
+static void skb_group_init(skb_group_t * skb_group)
+{
+	skb_group->count = 0 ;
+}
+
+
+static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb) __attribute__((unused)) ;
+static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb)
+{
+	unsigned int count=skb_group->count ;
+	if( count < k_skb_group_count )
+		{
+			skb_group->group[count] = skb ;
+			TRACEN(k_t_general,"Queueing skb_group->group[%d]=%p for free",count,skb) ;
+			skb_group->count = count+1 ;
+		}
+	else
+		{
+			TRACEN(k_t_error,"Overrunning queue of skbs to free skb=%p",skb) ;
+			dev_kfree_skb(skb) ;
+		}
+}
+static void skb_group_free(skb_group_t * skb_group) __attribute__((unused)) ;
+static void skb_group_free(skb_group_t * skb_group)
+{
+	unsigned int count=skb_group->count ;
+	unsigned int index ;
+	struct sk_buff ** skb_array=skb_group->group ;
+	BUG_ON(count > k_skb_group_count) ;
+	if( count > k_skb_group_count) count=k_skb_group_count ;
+	for(index=0;index<count;index+=1)
+		{
+			TRACEN(k_t_general,"freeing skb_array[%d]=%p",index,skb_array[index]) ;
+			if( skb_array[index])
+				{
+					dev_kfree_skb(skb_array[index]) ;
+					skb_array[index]=NULL ;
+				}
+		}
+}
+
+static void skb_group_queue_seq(skb_group_t * group, struct sk_buff ** skb_array, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+		       , unsigned int core, unsigned int desired_fifo, unsigned long long now, unsigned int x
+#endif
+		)
+{
+	unsigned int index ;
+
+	for( index=0 ; index<count; index+=1)
+		{
+			if( skb_array[index])
+				{
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					struct sk_buff *skb=skb_array[index] ;
+					unsigned long long lifetime_in_fifo = now - *(unsigned long long *) skb_array[index]->cb ;
+					TRACEN(k_t_detail ,"core=%d desired_fifo=%d lifetime=0x%016llx",core, desired_fifo,lifetime_in_fifo) ;
+					if( skb->len >= 4096 && desired_fifo < k_injecting_directions && lifetime_in_fifo > max_lifetime_by_direction[desired_fifo])
+						{
+							max_lifetime_by_direction[desired_fifo] = lifetime_in_fifo ;
+						}
+					if( skb->len >= 4096 && lifetime_in_fifo > 0x7fffffff)
+						{
+							struct sock   *sk=skb->sk ;
+							struct inet_sock *inet = inet_sk(sk);
+							struct inet_connection_sock *icsk = inet_csk(sk);
+							unsigned int daddr=inet->daddr ;
+							unsigned int flags = TCP_SKB_CB(skb)->flags ;
+						        TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u x=%d in-fifo-time=0x%016llx",
+						            sk, skb, skb->data, skb->len, flags,
+						            daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+						            x+index,
+						            lifetime_in_fifo
+						             ) ;
+						}
+#endif
+					skb_group_add(group,skb_array[index]) ;
+					skb_array[index] = NULL ;
+				}
+		}
+}
+static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+        		       , unsigned int core, unsigned int desired_fifo, unsigned long long now
+#endif
+		) __attribute__ ((unused)) ;
+static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+        		       , unsigned int core, unsigned int desired_fifo, unsigned long long now
+#endif
+		)
+{
+	TRACEN(k_t_detail , "Queuing skbs for freeing start=%d count=%d", start, count) ;
+	if( start+count <= k_injection_packet_count)
+		{
+			skb_group_queue_seq(group,skb_array+start, count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , core, desired_fifo, now, 0
+#endif
+			) ;
+		}
+	else
+		{
+			skb_group_queue_seq(group,skb_array+start, k_injection_packet_count-start
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , core, desired_fifo, now,0
+#endif
+					) ;
+			skb_group_queue_seq(group,skb_array, count - (k_injection_packet_count-start)
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , core, desired_fifo, now,k_injection_packet_count-start
+#endif
+					)  ;
+		}
+
+}
+
+/*  We will be using the injection machinery as circular buffers; this is the 'circle' function */
+static inline unsigned int packet_mod(unsigned int index)
+  {
+    return index & (k_injection_packet_count-1) ;
+  }
+
+/*  Try to minimise the 'needless' spins if several cores try to inject contemporaneously -- not anymore, best not to overtake on a path */
+static inline int injection_group_hash(dma_tcp_t *dma_tcp,int x,int y, int z)
+{
+/* 	return 0 ; */
+	return ( x/2 + y/2 + z/2 ) & 3 & (dma_tcp->tuning_injection_hashmask);
+}
+
+#if defined(BARRIER_WITH_IOCTL)
+
+static inline void timing_histogram(dma_tcp_t * dma_tcp)
+{
+	int current_tbl=get_tbl() ;
+	int delta_tbl=current_tbl-dma_tcp->prev_tbl ;
+	dma_tcp->timing_histogram_buckets[fls(delta_tbl)] += 1 ;
+	dma_tcp->prev_tbl = current_tbl ;
+
+}
+#endif
+
+
+static inline int wrapped_DMA_InjFifoInjectDescriptorById(
+		DMA_InjFifoGroup_t    *fg_ptr,
+		int                    fifo_id,
+		DMA_InjDescriptor_t   *desc
+		)
+{
+	int rc ;
+	rc = DMA_InjFifoInjectDescriptorById(fg_ptr,fifo_id,desc) ;
+	return rc ;
+}
+
+
+
+/* #define AUDIT_HEADLEN */
+/* #define TRACK_LIFETIME_IN_FIFO */
+
+typedef struct
+{
+	DMA_InjDescriptor_t desc ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+	unsigned long long injection_timestamp ;
+#endif
+#if defined(AUDIT_HEADLEN)
+	unsigned short tot_len ;
+#endif
+	char free_when_done ;
+} frame_injection_cb ;
+extern unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ...
+
+#if defined(AUDIT_FRAME_HEADER)
+typedef struct {
+	struct ethhdr eth ;
+	struct iphdr iph ;
+} frame_header_t ;
+extern frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ...
+#endif
+
+static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value)  __attribute__((unused)) ;
+static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value)
+{
+	struct sk_buff *skb=dma_tcp->rcv_skbs[x] ;
+	if( skb)
+		{
+			  struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+			  unsigned int eth_proto = eth->h_proto ;
+
+			  struct iphdr *iph = (struct iphdr *) (eth+1) ;
+			  unsigned int tot_len=iph->tot_len ;
+			  unsigned int saddr=iph->saddr ;
+			  if( tot_len != tot_len_for_rcv[x])
+				  {
+					  TRACEN(k_t_error,"(!!!) tot_len trampled") ;
+				  }
+
+			  TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x counter_value=0x%04x",
+					  skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x],counter_value
+					                                                                                                                                           ) ;
+			  dumpmem(skb->data,0x42,"eth-ip-tcp header") ;
+			  show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ;
+#if defined(AUDIT_FRAME_HEADER)
+			if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32))
+				{
+					  TRACEN(k_t_request,"(!!!) header not as first seen") ;
+					  dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ;
+					  dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ;
+
+				}
+#endif
+		}
+	else
+		{
+			TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ;
+		}
+
+}
+
+void dma_tcp_set_port(unsigned int port) ;  // Intended for configuring which quarter of the BGP DMA unit to use
+void dma_tcp_open(void) ; // 'ifconfig up' handler
+void dma_tcp_close(void) ; // 'ifconfig down' handler
+
+void dma_tcp_diagnose_init(dma_tcp_t *dma_tcp) ;
+
+void __init
+bgp_dma_memcpyInit(dma_tcp_t *dma_tcp) ;
+
+void __init
+dma_tcp_devfs_procfs_init(dma_tcp_t *dma_tcp) ;
+
+#if defined(TRACK_LIFETIME_IN_FIFO)
+extern unsigned long long max_lifetime_by_direction[k_injecting_directions] ;
+#endif
+
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+extern int tcp_scattergather_frag_limit  ;
+#endif
+
+typedef struct { unsigned char c[240] ; } torus_frame_payload ;
+
+#endif
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c
new file mode 100644
index 0000000..07b0aa9
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c
@@ -0,0 +1,707 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ * Intent: Carry torus packets as messages into memory FIFOs, and interpret them
+ *          as eth frames for TCP
+ *         Later on, add token-based flow control with a view to preventing
+ *          congestion collapse as the machine gets larger and the loading gets higher
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+/* #define CRC_CHECK_FRAMES */
+#define VERIFY_TARGET
+/* #define SIDEBAND_TIMESTAMP */
+#include "bgp_dma_tcp.h"
+
+
+
+
+/* void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) */
+/* { */
+/* 	unsigned int x; */
+/* 	for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) */
+/* 		{ */
+/* 			struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
+/* 			if( skb) */
+/* 				{ */
+/* 					frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; */
+/* 					TRACEN(k_t_general,"Redriving x=%d skb=%p",x,skb) ; */
+/* 					inject_dma_descriptor_propose_accept(dma_tcp,0,&ficb->desc) ; */
+/* 				} */
+/* 		} */
+/* } */
+
+static inline void show_tx_skbs(tx_t *tx, unsigned int node_count)
+{
+	unsigned int slot_index ;
+	unsigned int conn_id ;
+	unsigned int tx_skb_count = 0 ;
+	for(slot_index=0;slot_index<node_count;slot_index += 1)
+		{
+			for( conn_id=0;conn_id < k_connids_per_node;conn_id += 1)
+				{
+					struct sk_buff * skb=get_tx_skb(tx,slot_index,conn_id) ;
+					if(skb)
+						{
+							  struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+							  struct iphdr *iph = (struct iphdr *) (eth+1) ;
+							  unsigned int tot_len=iph->tot_len ;
+							  unsigned int daddr=iph->daddr ;
+							  tx_skb_count += 1 ;
+
+							TRACEN(k_t_request,"(---) slot_index=0x%08x conn_id=0x%02x skb=%p tot_len=0x%04x daddr=%d.%d.%d.%d",
+									slot_index,conn_id,skb,tot_len,daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff) ;
+						}
+				}
+		}
+	TRACEN(k_t_request,"tx_skb_count=%d",tx_skb_count) ;
+}
+
+void dma_tcp_show_reception(dma_tcp_t * dma_tcp)
+{
+	    int x ;
+	    int slot ;
+	    unsigned int inUseCount = 0 ;
+	    TRACEN(k_t_request,"rec hitZero 0x%08x 0x%08x",DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,0),DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,1)) ;
+	    for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+		    {
+			    bgp_dma_tcp_counter_copies[x] = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+x) ;
+			    if( bgp_dma_tcp_counter_copies[x] != 0 || dma_tcp->recCntrInUse[x] != 0)
+				    {
+					    inUseCount += 1 ;
+				    TRACEN(k_t_request,"rec_counter[0x%02x] value=0x%08x inUse=%d", x,bgp_dma_tcp_counter_copies[x],dma_tcp->recCntrInUse[x]) ;
+				    if(dma_tcp->recCntrInUse[x])
+					    {
+						    dma_tcp_show_reception_one(dma_tcp,x,bgp_dma_tcp_counter_copies[x]) ;
+/* 						struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
+/* 						if( skb) */
+/* 							{ */
+/* 								  struct ethhdr *eth = (struct ethhdr *)(skb->data) ; */
+/* 								  unsigned int eth_proto = eth->h_proto ; */
+/*  */
+/* 								  struct iphdr *iph = (struct iphdr *) (eth+1) ; */
+/* 								  unsigned int tot_len=iph->tot_len ; */
+/* 								  unsigned int saddr=iph->saddr ; */
+/* 								  if( tot_len != tot_len_for_rcv[x]) */
+/* 									  { */
+/* 										  TRACEN(k_t_error,"(!!!) tot_len trampled") ; */
+/* 									  } */
+/*  */
+/* 								  TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x", */
+/* 										  skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x] */
+/* 										                                                                                                                                           ) ; */
+/* 								  dumpmem(skb->data,0x42,"eth-ip-tcp header") ; */
+/* 								  show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ; */
+/* #if defined(AUDIT_FRAME_HEADER) */
+/* 					if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32)) */
+/* 						{ */
+/* 							  TRACEN(k_t_request,"(!!!) header not as first seen") ; */
+/* 							  dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ; */
+/* 							  dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ; */
+/*  */
+/* 						} */
+/* #endif */
+/* 							} */
+/* 						else */
+/* 							{ */
+/* 								TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ; */
+/* 							} */
+					    }
+				    }
+		    }
+	    TRACEN(k_t_request,"inUseCount=%d",inUseCount) ;
+	    show_tx_skbs(&dma_tcp->tx_mux,dma_tcp->node_count) ;
+	    TRACEN(k_t_request,"skb_queue_len(pending_rcv_skbs)=%d",skb_queue_len(&dma_tcp->balancer.b[0].pending_rcv_skbs)) ;
+	    {
+		    struct sk_buff *skb = skb_peek(&dma_tcp->balancer.b[0].pending_rcv_skbs) ;
+		    if(skb)
+			    {
+
+					  struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+					  unsigned int eth_proto = eth->h_proto ;
+
+					  struct iphdr *iph = (struct iphdr *) (eth+1) ;
+					  unsigned int tot_len=iph->tot_len ;
+					  unsigned int saddr=iph->saddr ;
+					  TRACEN(k_t_request,"skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d",skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff ) ;
+			    }
+
+	    }
+	    for( slot=0;slot<dma_tcp->node_count; slot+=1)
+		    {
+			    unsigned int proposals_active=get_proposals_active(&dma_tcp->rcvdemux,slot) ;
+			    unsigned int count_pending_f=count_pending_flow(&dma_tcp->rcvdemux,slot) ;
+			    unsigned int located_counters=0 ;
+			    if( proposals_active || count_pending_f )
+				    {
+					    TRACEN(k_t_request,"slot=0x%08x proposals_active=%d count_pending_flow=%d",slot,proposals_active,count_pending_f) ;
+				    }
+			    for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+				    {
+					    struct sk_buff *skb=dma_tcp->rcv_skbs[x] ;
+					    if ( skb && slot == dma_tcp->slot_for_rcv[x] )
+						    {
+							    located_counters += 1 ;
+						    }
+				    }
+			    if( located_counters + count_pending_f != proposals_active || ( 0 == located_counters && count_pending_f > 0 ))
+				    {
+					    TRACEN(k_t_request|k_t_error,"(E) slot=0x%08x located_counters=%d count_pending_f=%d proposals_active=%d",
+							    slot,located_counters,count_pending_f,proposals_active) ;
+				    }
+
+		    }
+}
+
+int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	    int rc ;
+	    dma_tcp_show_reception(&dma_tcp_state ) ;
+	    TRACEN(k_t_entryexit,"(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+	    rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+	    TRACEN(k_t_entryexit,"(<)") ;
+	    return rc ;
+
+}
+
+/*  Routine to report how full the outgoing FIFOs are */
+void bgp_dma_diag_report_transmission_queue(int __user * report)
+  {
+    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+    unsigned int core ;
+    TRACEN(k_t_general,"report=%p",report) ;
+    for( core=0 ; core<k_injecting_cores; core += 1)
+	    {
+		    unsigned int desired_fifo ;
+			   for(desired_fifo=0; desired_fifo<k_injecting_directions; desired_fifo += 1 )
+			   {
+			       unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ;
+			       unsigned int  fifo_current_head =
+			        (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+			       unsigned int  fifo_current_tail =
+			        (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+			       unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+			       unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+			       unsigned int current_injection_used=packet_mod(tailx-headx) ;
+			       put_user(current_injection_used, report) ;
+			       report += 1 ;
+			       TRACEN(k_t_detail,"core=%d desired_fifo=%d current_injection_used=%d",core,desired_fifo,current_injection_used) ;
+
+			   }
+
+
+	    }
+    put_user(dma_tcp->qtyFreeRecCounters, report) ;
+    report += 1 ;
+    put_user(flow_count(dma_tcp,k_send_propose_rpc)-flow_count(dma_tcp,k_act_accept_rpc), report) ;
+    report += 1 ;
+    put_user(flow_count(dma_tcp,k_act_propose_rpc)-flow_count(dma_tcp,k_send_accept_rpc), report) ;
+  }
+static int issueDiagnose(
+		DMA_RecFifo_t      *f_ptr,
+		DMA_PacketHeader_t *packet_ptr,
+		dma_tcp_t * dma_tcp,
+    void  * request ,
+    int payload_bytes,
+    unsigned int src_key,
+    int Put_Offset
+    )
+  {
+	  unsigned int *payload=(unsigned int *)request ;
+	  TRACEN(k_t_request,"src_key=0x%08x Put_Offset=0x%08x payload_bytes=0x%02x [%08x %08x %08x %08x]",
+			  src_key,Put_Offset, payload_bytes,payload[0],payload[1],payload[2],payload[3]) ;
+	  return 0 ;
+  }
+
+static int issueDiagnoseActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg=packet_ptr->SW_Arg  ;
+    int Put_Offset=packet_ptr->Put_Offset ;
+    enable_kernel_fp() ; // TODO: don't think this is needed nowadays
+
+    TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
+		    recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
+    return issueDiagnose(
+		    f_ptr,
+		    packet_ptr,
+        (dma_tcp_t *) recv_func_parm,
+        (void *) payload_ptr,
+        payload_bytes,
+        SW_Arg,
+        Put_Offset
+        ) ;
+  }
+static inline int inject_into_dma_diag_sync(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int SW_Arg ,
+		unsigned int proto_start )
+  {
+    dma_addr_t dataAddr ;
+    DMA_InjDescriptor_t desc;
+    int ret1, ret2 __attribute__((unused));
+    unsigned int firstpacketlength =  length ;
+    TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+    dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+
+/*  First injection is 'start of frame/fragment' */
+    ret1 = DMA_TorusMemFifoDescriptor( &desc,
+                                     x, y, z,
+                                     dma_tcp_ReceptionFifoGroup(dma_tcp),          /*  recv fifo grp id */
+                                     0,          /*  hints */
+                                     k_VC_anyway,          /*  vc - adaptive */
+                                     SW_Arg,          /*  softw arg */
+                                     proto_start,     /*  function id */
+                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+                                     k_injCounterId,  /*  inj counter id */
+                                     dataAddr,        /*  send address */
+                                     firstpacketlength          /*  msg len */
+                                     );
+
+#if defined(SIDEBAND_TIMESTAMP)
+    {
+	    unsigned long now_lo=get_tbl() ;
+	    DMA_DescriptorSetPutOffset(&desc,((-length) & 0x0000ffff ) | (now_lo & 0xffff0000)) ;
+
+    }
+#else
+    DMA_DescriptorSetPutOffset(&desc,-length) ;  /*  For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */
+#endif
+    ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+                                            dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+                                            &desc );
+    TRACEN(k_t_general , "(<)proto_start=%d firstpacketlength=%d ret1=%d ret2=%d",proto_start,firstpacketlength,ret1, ret2);
+    return 1 ;
+  }
+
+static void bgp_dma_diag_drive_sync_at(dma_tcp_t *dma_tcp, int x,int y,int z, int sendBytes)
+{
+  unsigned int desired_fifo= select_transmission_fifo(dma_tcp,x,y,z) ;
+  unsigned long flags ;
+  unsigned int current_injection_used=0xffffffff ;
+  unsigned int aligned_payload_address = (unsigned int)dma_tcp->diag_block_buffer ;
+  unsigned int aligned_payload_length = sendBytes  ;
+  unsigned int pad_head = 0 ;
+
+  int ret = 0;
+  int ring_ok ;
+
+  int my_injection_group ;
+  skb_group_t skb_group ;
+  TRACEN(k_t_general ,"(>) at (%02x,%02x,%02x)", x,y,z);
+  skb_group_init(&skb_group) ;
+
+  my_injection_group=injection_group_hash(dma_tcp,x,y,z) ;
+  spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+   {
+     unsigned int src_key = (dma_tcp->src_key << 6) | (my_injection_group << 4) | pad_head ;
+     idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ;
+      /*  Set up the payload */
+     unsigned int bhx = buffer->buffer_head_index ;
+     unsigned int lastx = packet_mod(bhx) ;
+     unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ;
+     unsigned int  fifo_current_head =
+      (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+     unsigned int  fifo_current_tail =
+      (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+     unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+     unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+     unsigned int injection_count ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+     unsigned long long now=get_powerpc_tb() ;
+     *(unsigned long long*)(skb->cb) = now ;
+#endif
+     current_injection_used=packet_mod(tailx-headx) ;
+      /*  If the network is backing up, we may have to skip out here, */
+      /*  so that we don't overwrite unsent data. */
+     TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d",
+         desired_fifo,headx,tailx,bhx,current_injection_used) ;
+     if( current_injection_used > buffer->injection_high_watermark )
+       {
+         buffer->injection_high_watermark=current_injection_used ;  /*  Congestion statistic */
+       }
+       {
+      	  /*  Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */
+         if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1)
+           {
+              ring_ok = 1 ;
+              TRACEN(k_t_general,"Runway slot granted") ;
+           }
+         else
+           {
+              ring_ok = 0 ;
+              TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ;
+           }
+       }
+     TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x",
+         my_injection_group, desired_fifo, bhx, headx,tailx
+         ) ;
+     if ( ring_ok )
+       {
+          /*  We are going to send something. Display its protocol headers .. */
+
+          /*  Bump the injection counter. Actually only needs doing once per 4GB or so */
+         ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId,  0xffffffff );
+
+	    /*  and inject it */
+		   {
+
+			   injection_count = inject_into_dma_diag_sync(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo,
+					   src_key,
+					   dma_tcp->proto_issue_diag_sync
+					   ) ;
+
+
+
+		   }
+         {
+      	   unsigned int nhx=packet_mod(bhx+injection_count) ;
+		    /*  Record the skbuff so it can be freed later, after data is DMA'd out */
+		   dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = NULL  ;
+		    /*  Remember where we will be pushing the next injection in */
+	           buffer->buffer_head_index = nhx ;
+         }
+          /*  hang on to the skbs until they are sent ... */
+         if( current_injection_used != 0xffffffff)
+           {
+             unsigned int btx = buffer->buffer_tail_index ;  /*  This indexes the oldest skbuff that might still be pending send by the DMA unit */
+             int skql2 = packet_mod(bhx-btx) ;
+             int count_needing_freeing = skql2-current_injection_used ;
+             int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+             TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free);
+             skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , my_injection_group, desired_fifo, now
+#endif
+             ) ;
+             btx = packet_mod(btx+count_to_free) ;
+             buffer->buffer_tail_index = btx ;
+             TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+           }
+       }
+     else
+       {
+         TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x",
+             my_injection_group, desired_fifo, bhx, headx,tailx, lastx
+             ) ;
+       }
+   }
+   spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+   skb_group_free(&skb_group) ;
+   if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+
+ TRACE("(<) desired_fifo=%d",desired_fifo);
+
+}
+static void init_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze)
+{
+	unsigned int x;
+	unsigned int y;
+	unsigned int z;
+	for( x=0; x<xe; x+=1)
+		{
+			for(y=0;y<ye;y+=1)
+				{
+					for( z=0;z<ze;z+=1)
+						{
+							*shuffle_vector = (x<<16)|(y<<8)|z ;
+							shuffle_vector += 1 ;
+						}
+				}
+		}
+}
+
+static inline int next_prbs(int seed)
+{
+	int ncmask = seed >> 31 ;  /*  0x00000000 or 0xffffffff */
+	return (seed << 1) ^ (0x04C11DB7 & ncmask) ;   /*  CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+}
+
+static int scatter_prbs(int seed)
+{
+	int a ;
+	for(a=0;a<32;a+=1)
+		{
+			seed=next_prbs(seed) ;
+		}
+	return seed ;
+}
+static int shuffle_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze, int seed)
+{
+	unsigned int vsize = xe*ye*ze ;
+	unsigned int vmask = vsize-1 ;
+	unsigned int a ;
+
+	for( a=0; a<vsize;a+=1)
+		{
+			unsigned int b = (seed & vmask) ;
+			unsigned int va = shuffle_vector[a] ;
+			unsigned int vb = shuffle_vector[b] ;
+			shuffle_vector[a] = vb ;
+			shuffle_vector[b] = va ;
+			seed=next_prbs(seed) ;
+
+		}
+	return seed ;
+}
+#if 0
+void dma_tcp_transfer_activate(int sendBytes)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	int a ;
+	int my_x=dma_tcp->location.coordinate[0] ;
+	int my_y=dma_tcp->location.coordinate[1] ;
+	int my_z=dma_tcp->location.coordinate[2] ;
+	int ext_x=dma_tcp->extent.coordinate[0] ;
+	int ext_y=dma_tcp->extent.coordinate[1] ;
+	int ext_z=dma_tcp->extent.coordinate[2] ;
+	int vsize=ext_x*ext_y*ext_z ;
+	 /*  Push the 'diagnostic block' through the DMA unit */
+	TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+	dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
+	for(a=0;a<vsize;a+=1)
+		{
+			unsigned int tg=dma_tcp->shuffle_vector[a] ;
+			unsigned int tg_x=tg>>16 ;
+			unsigned int tg_y=(tg>>8) & 0xff ;
+			unsigned int tg_z=tg & 0xff ;
+			TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
+			if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+				{
+					bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+				}
+		}
+}
+
+void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	int my_x=dma_tcp->location.coordinate[0] ;
+	int my_y=dma_tcp->location.coordinate[1] ;
+	int my_z=dma_tcp->location.coordinate[2] ;
+	 /*  Push the 'diagnostic block' through the DMA unit */
+	TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x tg=0x%08x",sendBytes,tg) ;
+		{
+			unsigned int tg_x=tg>>16 ;
+			unsigned int tg_y=(tg>>8) & 0xff ;
+			unsigned int tg_z=tg & 0xff ;
+			if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+				{
+					bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+				}
+		}
+}
+#endif
+void dma_tcp_transfer_activate_sync(int sendBytes)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	int a ;
+	int my_x=dma_tcp->location.coordinate[0] ;
+	int my_y=dma_tcp->location.coordinate[1] ;
+	int my_z=dma_tcp->location.coordinate[2] ;
+	int ext_x=dma_tcp->extent.coordinate[0] ;
+	int ext_y=dma_tcp->extent.coordinate[1] ;
+	int ext_z=dma_tcp->extent.coordinate[2] ;
+	int vsize=ext_x*ext_y*ext_z ;
+	 /*  Push the 'diagnostic block' through the DMA unit */
+	TRACEN(k_t_general,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+	dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
+	for(a=0;a<vsize;a+=1)
+		{
+			unsigned int tg=dma_tcp->shuffle_vector[a] ;
+			unsigned int tg_x=tg>>16 ;
+			unsigned int tg_y=(tg>>8) & 0xff ;
+			unsigned int tg_z=tg & 0xff ;
+			TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
+			if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+				{
+					bgp_dma_diag_drive_sync_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+				}
+		}
+}
+
+/*  'across faces' transfer in x,y,z directions, as a 'towards peak performance' test */
+#if 0
+void dma_tcp_transfer_activate_minicube(int sendBytes)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	int my_x=dma_tcp->location.coordinate[0] ;
+	int my_y=dma_tcp->location.coordinate[1] ;
+	int my_z=dma_tcp->location.coordinate[2] ;
+	 /*  Push the 'diagnostic block' through the DMA unit */
+	TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+	bgp_dma_diag_drive_block_at(dma_tcp,my_x^1,my_y,my_z,sendBytes) ;
+	bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y^1,my_z,sendBytes) ;
+	bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y,my_z^1,sendBytes) ;
+}
+
+int dma_tcp_transfer_wait(int demandCount)
+{
+	int spincount = 0 ;
+	TRACEN(k_t_request,"(>) demandCount=%d",demandCount) ;
+	while(DiagEndCount < demandCount && spincount < 100 )
+		{
+			int rc ;
+			set_current_state(TASK_INTERRUPTIBLE);
+			rc=schedule_timeout(1) ;
+			if( 0 != rc) break ;
+			spincount += 1 ;
+		}
+	TRACEN(k_t_request,"(<) DiagEndCount=%d spincount=%d",DiagEndCount,spincount) ;
+	return DiagEndCount >= demandCount ;
+}
+#endif
+#if defined(BARRIER_WITH_IOCTL)
+volatile static int DiagSyncCount ;
+
+static int issueInlineFrameDiagSync(
+		DMA_RecFifo_t      *f_ptr,
+		DMA_PacketHeader_t *packet_ptr,
+		dma_tcp_t * dma_tcp,
+    void  * request ,
+    int payload_bytes,
+    unsigned int src_key,
+    int Put_Offset
+    )
+  {
+	  timing_histogram(dma_tcp) ;
+	  DiagSyncCount += 1 ;
+	  return 0 ;
+  }
+
+static int issueInlineFrameDiagSyncActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg=packet_ptr->SW_Arg  ;
+    int Put_Offset=packet_ptr->Put_Offset ;
+
+    enable_kernel_fp() ; // TODO: don't think this is needed nowadays
+    TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
+		    recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
+    return issueInlineFrameDiagSync(
+		    f_ptr,
+		    packet_ptr,
+        (dma_tcp_t *) recv_func_parm,
+        (void *) payload_ptr,
+        payload_bytes,
+        SW_Arg,
+        Put_Offset
+        ) ;
+  }
+
+#endif
+
+int dma_tcp_transfer_wait_sync(int demandCount)
+{
+	int spincount = 0 ;
+	TRACEN(k_t_general,"(>) demandCount=%d",demandCount) ;
+	while(DiagSyncCount < demandCount && spincount < 100 )
+		{
+			int rc ;
+			set_current_state(TASK_INTERRUPTIBLE);
+			rc=schedule_timeout(1) ;
+			if( 0 != rc) break ;
+			spincount += 1 ;
+		}
+	TRACEN(k_t_general,"(<) DiagSyncCount=%d spincount=%d",DiagSyncCount,spincount) ;
+	return DiagSyncCount >= demandCount ;
+}
+
+void dma_tcp_transfer_clearcount(void)
+{
+	TRACEN(k_t_general,"count cleared") ;
+/* 	DiagEndCount = 0 ; */
+	DiagSyncCount = 0 ;
+}
+
+void dma_tcp_diagnose_init(dma_tcp_t *dma_tcp)
+  {
+#if defined(BARRIER_WITH_IOCTL)
+    dma_tcp->diag_block_buffer=allocate_diag_block_buffer() ;
+    dma_tcp->shuffle_vector=allocate_shuffle_vector(dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
+    dma_tcp->shuffle_seed = scatter_prbs(dma_tcp->SW_Arg + 1) ;
+    init_shuffle_vector(dma_tcp->shuffle_vector,dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
+    dma_tcp->proto_issue_diag_sync=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDiagSyncActor, dma_tcp, 0, 0);
+    memset(dma_tcp->timing_histogram_buckets,0,33*sizeof(int)) ;
+#endif
+    dma_tcp->proto_diagnose=DMA_RecFifoRegisterRecvFunction(issueDiagnoseActor, dma_tcp, 0, 0);
+
+  }
+
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_frames.c b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c
new file mode 100644
index 0000000..5b526a7
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c
@@ -0,0 +1,2741 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ * Intent: Carry torus packets as messages into memory FIFOs, and interpret them
+ *          as eth frames for TCP
+ *         Later on, add token-based flow control with a view to preventing
+ *          congestion collapse as the machine gets larger and the loading gets higher
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+/* #define CRC_CHECK_FRAMES */
+#define VERIFY_TARGET
+/* #define SIDEBAND_TIMESTAMP */
+
+#include "bgp_dma_tcp.h"
+#include "bgp_bic_diagnosis.h"
+
+
+static inline void frames_receive_torus(dma_tcp_t *dma_tcp,struct sk_buff * skb)
+{
+#if defined(CONFIG_BGP_STATISTICS)
+	struct ethhdr *eth = (struct ethhdr *) (skb->data) ;
+	struct iphdr *iph=(struct iphdr *) (eth+1) ;
+	dma_tcp->bytes_received += iph->tot_len ;
+#endif
+    bgtornet_receive_torus(skb);
+}
+
+#if defined(TRACK_LIFETIME_IN_FIFO)
+unsigned long long max_lifetime_by_direction[k_injecting_directions] ;
+#endif
+
+static void diag_skb_structure(struct sk_buff *skb)
+{
+	int f=skb_shinfo(skb)->nr_frags ;
+	if(0 == f)
+		{
+			TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=0 [0x%04x]",skb->len, skb->data_len, skb_headlen(skb)) ;
+		}
+	else if(1 == f)
+		{
+			TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=1 [0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb),
+					skb_shinfo(skb)->frags[0].size
+					) ;
+		}
+	else if(2 == f)
+		{
+			TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=2 [0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb),
+					skb_shinfo(skb)->frags[0].size,
+					skb_shinfo(skb)->frags[1].size
+					) ;
+		}
+	else
+		{
+			TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=%d [0x%04x 0x%04x 0x%04x 0x%04x ..]",skb_shinfo(skb)->nr_frags,
+					skb->len, skb->data_len, skb_headlen(skb),
+					skb_shinfo(skb)->frags[0].size,
+					skb_shinfo(skb)->frags[1].size,
+					skb_shinfo(skb)->frags[2].size
+					) ;
+		}
+	if( TRACING(k_t_sgdiag_detail))
+		{
+			unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ;
+			dumpmem(skb->data, dump_length, "skb_head") ;
+		}
+}
+
+static inline int torus_frame_payload_memcpy(
+                torus_frame_payload * target,
+                torus_frame_payload * source
+    )
+{
+	*target = *source ;
+	return 0 ;
+}
+
+/*  This is as per the powerpc <asm/time.h> 'get_tb' */
+/*  Dup'd here because we have to compile with ppc also, which doesn't have it defined */
+static inline u64 get_powerpc_tb(void)
+{
+  unsigned int tbhi, tblo, tbhi2;
+
+  tbhi = get_tbu();
+  tblo = get_tbl();
+  tbhi2 = get_tbu();
+  /* tbhi2 might be different from tbhi, but that would indicate that there had been a 32-bit carry.
+   * In that case (tbhi2,0) would be a reasonable representation of the timestamp that we usually
+   * think of as being (tbhi,tblo)
+   */
+  if( tbhi == tbhi2)
+	  {
+		  return ((u64)tbhi << 32) | tblo;
+	  }
+  return ((u64)tbhi2 << 32) ;
+}
+static void display_skb_structure(struct sk_buff *skb) ;
+
+static torus_frame_payload dummy_payload __attribute__((aligned(16)));
+static inline void demux_vacate_slot(dma_tcp_t * dma_tcp, unsigned int slot)
+  {
+    set_rcv_payload(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload);
+    set_rcv_payload_alert(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload);
+    set_rcv_expect(&dma_tcp->rcvdemux, slot, 0xffffffff);
+    set_rcv_skb(&dma_tcp->rcvdemux, slot, NULL);
+    TRACEN(k_t_general,"Slot %d vacated", slot );
+  }
+
+static inline void demux_show_slot(dma_tcp_t * dma_tcp, unsigned int slot)
+  {
+    void *payload = get_rcv_payload(&dma_tcp->rcvdemux, slot);
+    void *alert = get_rcv_payload_alert(&dma_tcp->rcvdemux, slot);
+    unsigned int expect=get_rcv_expect(&dma_tcp->rcvdemux, slot);
+    struct sk_buff *skb=get_rcv_skb(&dma_tcp->rcvdemux, slot);
+    if( payload != &dummy_payload || expect != 0xffffffff || skb )
+      {
+        TRACEN(k_t_error,"(E) not-vacant slot=%08x (%d %d) payload=%p alert=%p expect=0x%08x skb=%p",
+            slot, slot>>2, slot&3, payload, alert, expect, skb
+            ) ;
+      }
+  }
+
+static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count ) ;
+
+static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count )
+  {
+  unsigned int x ;
+  for( x = 0 ; x < k_slots_per_node*node_count ; x += 1)
+    {
+      demux_vacate_slot(dma_tcp,x) ;
+#if defined(ENABLE_LATENCY_TRACKING)
+      rcv_statistic_clear(&(dma_tcp->rcvdemux.rcv_per_slot_vector[x].latency));
+/*       set_min_latency(&dma_tcp->rcvdemux, x, 0x7fffffff) ; */
+/*       set_max_latency(&dma_tcp->rcvdemux, x, 0x80000000) ; */
+#endif
+    }
+  }
+
+
+static void show_protocol_header_tx(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_tx(char * frame)
+  {
+    int * f = (int *) frame ;
+    TRACEN(k_t_request,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+        f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+        );
+  }
+
+static void show_protocol_header_fault(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_fault(char * frame)
+  {
+    int * f = (int *) frame ;
+    TRACEN(k_t_error,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+        f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+        );
+  }
+
+static void show_protocol_header_rx(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_rx(char * frame)
+  {
+    int * f = (int *) frame ;
+    TRACEN(k_t_general,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+        f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+        );
+  }
+
+/*  Polynomial picked as CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+static int frametrace_rx(char * address, int length ) __attribute__ ((unused)) ;
+static int frametrace_rx(char * address, int length )
+  {
+    int * a = (int *) address ;
+    int x ;
+    int csum32 = a[0] ;
+    for(x=1;x<(length/sizeof(int));x+=1)
+      {
+        csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ;
+      }
+    TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ;
+    return csum32 ;
+  }
+
+static int frametrace_tx(char * address, int length ) __attribute__ ((unused)) ;
+static int frametrace_tx(char * address, int length )
+  {
+    int * a = (int *) address ;
+    int x ;
+    int csum32 = a[0] ;
+    for(x=1;x<(length/sizeof(int));x+=1)
+      {
+        csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ;
+      }
+    TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ;
+    return csum32 ;
+  }
+
+/*  For diagnosis, put the local clock into the packet. Drop 4 lsbs off the 64-bit clock. */
+static unsigned int latency_timestamp(void) __attribute__ ((unused)) ;
+static unsigned int latency_timestamp(void)
+  {
+    unsigned int tbu = get_tbu() ;
+    unsigned int tbl = get_tbl() ;
+    unsigned int tbu2 = get_tbu() ;
+    unsigned int tbl2 = (tbu==tbu2) ? tbl : 0 ;
+    return (tbu2 << 28) | (tbl2 >> 4) ;
+  }
+
+
+
+static void spot_examine_tcp_timestamp(int tsval, int tsecr)
+{
+	    if( tsecr != 0 )
+		    {
+			    int rtt=jiffies-tsecr ;
+			    TRACEN(k_t_general,"rtt=%d",rtt) ;
+#if defined(CONFIG_BGP_STATISTICS)
+			    rtt_histogram[fls(rtt)] += 1 ;
+#endif
+		    }
+	    if( tsval != 0 )
+		    {
+			    int transit=jiffies-tsval ;
+			    TRACEN(k_t_general,"transit=%d",transit) ;
+#if defined(CONFIG_BGP_STATISTICS)
+			    if( transit >= 0)
+				    {
+					    transit_histogram[fls(transit)] += 1 ;
+				    }
+#endif
+		    }
+
+}
+
+static void spot_parse_aligned_timestamp(struct tcphdr *th)
+{
+	__be32 *ptr = (__be32 *)(th + 1);
+	int tsecr ;
+	int tsval ;
+	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+		++ptr;
+		tsval = ntohl(*ptr);
+		++ptr;
+		tsecr = ntohl(*ptr);
+#if defined(CONFIG_BGP_TORUS)
+		spot_examine_tcp_timestamp(tsval,tsecr) ;
+#endif
+	}
+}
+
+static void spot_fast_parse_options(struct sk_buff *skb, struct tcphdr *th)
+{
+	if (th->doff == sizeof(struct tcphdr) >> 2) {
+		return;
+	} else if (
+		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+		spot_parse_aligned_timestamp( th) ;
+	}
+}
+
+static inline void analyse_tcp_flags(dma_tcp_t * dma_tcp,struct sk_buff * skb)
+{
+#if defined(KEEP_TCP_FLAG_STATS)
+        struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+        struct iphdr *iph = (struct iphdr *)(eth+1) ;
+        unsigned int * iph_word = (unsigned int *) iph ;
+        struct tcphdr * tcph = (struct tcphdr *)(iph_word+(iph->ihl)) ;
+        unsigned int eth_proto = eth->h_proto ;
+        unsigned int ip_proto = iph->protocol ;
+        if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+        	{
+                     unsigned int flag_fin = tcph->fin ;
+                     unsigned int flag_syn = tcph->syn ;
+                     unsigned int flag_rst = tcph->rst ;
+                     unsigned int flag_psh = tcph->psh ;
+                     unsigned int flag_ack = tcph->ack ;
+                     unsigned int flag_urg = tcph->urg ;
+                     unsigned int flag_ece = tcph->ece ;
+                     unsigned int flag_cwr = tcph->cwr ;
+                     dma_tcp->tcp_received_flag_count[7] += flag_fin ;
+                     dma_tcp->tcp_received_flag_count[6] += flag_syn ;
+                     dma_tcp->tcp_received_flag_count[5] += flag_rst ;
+                     dma_tcp->tcp_received_flag_count[4] += flag_psh ;
+                     dma_tcp->tcp_received_flag_count[3] += flag_ack ;
+                     dma_tcp->tcp_received_flag_count[2] += flag_urg ;
+                     dma_tcp->tcp_received_flag_count[1] += flag_ece ;
+                     dma_tcp->tcp_received_flag_count[0] += flag_cwr ;
+                     spot_fast_parse_options(skb,tcph) ;
+        	}
+
+#endif
+}
+
+static inline int deliver_eagerly(const dma_tcp_t * dma_tcp)
+{
+	return dma_tcp->tuning_deliver_eagerly ;
+}
+/*
+ * Frames from a source generally arrive in the order that they left the sender, but it is possible for some
+ * nondeterminism to be introduced because of adaptive routing and because 'short' frames get sent 'eagerly' rather than
+ * with DMA.
+ * It is desireable to deliver frames for a given TCP session in-order, otherwise the network layer may call for a
+ * 'fast' retransmit (thinking that a frame has been lost). This routine defers out-of-order frames until they can be
+ * presnted in-order.
+ */
+static void deliver_from_slot(dma_tcp_t * dma_tcp, unsigned int slot, unsigned int conn_id, struct sk_buff * skb)
+{
+	if( ! deliver_eagerly(dma_tcp))
+		{
+		unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ;
+		unsigned int slot_advancement= (conn_id-slot_conn) & (k_concurrent_receives-1) ;
+		TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x slot_conn=0x%08x skb=%p slot_advancement=%d",slot,conn_id,slot_conn,skb,slot_advancement) ;
+	#if defined(CONFIG_BGP_STATISTICS)
+		dma_tcp->resequence_histogram[slot_advancement] += 1;
+	#endif
+		if( 0 == slot_advancement)
+				{
+					 /*  'oldest' skb has arrived. Deliver it */
+					frames_receive_torus(dma_tcp,skb) ;
+					 /*  and check if any 'arrivals ahead' can be delivered now */
+					{
+						int x ;
+						struct sk_buff * slot_skb  ;
+						for(x=1; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1)
+							{
+								TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ;
+								frames_receive_torus(dma_tcp,slot_skb) ;
+								set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ;
+							}
+						set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+					}
+				}
+		else
+			{
+				struct sk_buff * slot_skb_old = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id);
+				TRACEN(k_t_general,"Queuing slot=0x%08x conn_id=0x%08x skb=%p skb->len=%d slot_skb_old=%p",slot,conn_id,skb,skb->len,slot_skb_old) ;
+				if( slot_skb_old)
+					{
+						 /*  Wrapped around all the possible reorder slots. Something seems to have gone missing. */
+						TRACEN(k_t_error,"(E) resequence buffer wrapped, skb=%p conn_id=0x%08x. Delivering ",skb,conn_id) ;
+						 /*  and check if any 'arrivals ahead' can be delivered now */
+						{
+							int x ;
+							struct sk_buff * slot_skb  ;
+							for(x=0; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1)
+								{
+									TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ;
+									frames_receive_torus(dma_tcp,slot_skb) ;
+									set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ;
+								}
+							set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+							slot_conn = slot_conn+x ;
+						}
+						if( 0 == ((slot_conn-conn_id) & (k_concurrent_receives-1)))
+								{
+									 /*  Everything is delivered ... */
+									frames_receive_torus(dma_tcp,skb) ;
+									set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+1) ;
+								}
+						else
+							{
+								 /*  There's another gap, save the skb for future delivery */
+								set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ;
+							}
+
+
+					}
+				else
+					{
+						set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ;
+					}
+
+			}
+		}
+	else
+		{
+			TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x skb=%p",slot,conn_id,skb) ;
+			if( TRACING(k_t_sgdiag_detail))
+				{
+					unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ;
+					dumpmem(skb->data, dump_length, "received skb") ;
+				}
+			frames_receive_torus(dma_tcp,skb) ;
+		}
+
+}
+
+static void display_pending_slot(dma_tcp_t * dma_tcp,unsigned int slot)
+{
+#if defined(RESEQUENCE_ARRIVALS)
+	unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ;
+	int x ;
+	int pending_count=0;
+	for(x=0; x<k_concurrent_receives; x+=1)
+		{
+			struct sk_buff * skb=get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+			if(skb)
+				{
+					struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+					struct iphdr *iph = (struct iphdr *) (eth+1) ;
+					unsigned int saddr=iph->saddr ;
+					pending_count += 1;
+					TRACEN(k_t_request,
+							"(---) Pending slot=0x%08x slot_conn=0x%02x x=%d skb=%p skb->len=%d tot_len=0x%04x saddr=%d.%d.%d.%d\n",
+							slot,slot_conn & (k_concurrent_receives-1),x,skb,skb->len, iph->tot_len,
+							saddr>>24,
+							(saddr >> 16) & 0xff,
+							(saddr >> 8) & 0xff,
+							saddr & 0xff
+							) ;
+				}
+		}
+	if( pending_count >0 )
+		{
+			TRACEN(k_t_request,"slot=0x%08x pending_count=%d",slot,pending_count) ;
+		}
+
+#endif
+}
+
+void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount )
+{
+	unsigned int slot ;
+	for( slot=0; slot<nodecount; slot+=1 )
+		{
+			display_pending_slot(dma_tcp,slot) ;
+		}
+}
+
+
+static void issueInlineFrameDataSingle(dma_tcp_t * dma_tcp,
+    void  * request ,
+    unsigned int src_key ,
+    int payload_bytes)
+  {
+   unsigned int pad_head = src_key & 0x0f ;
+    TRACEN(k_t_detail | k_t_general,"(>)(%08x)", src_key);
+    if( k_dumpmem_diagnostic)
+	    {
+		    dumpmem(request,payload_bytes,"issueInlineFrameData") ;
+	    }
+      {
+/*  We have a packet which represents a complete frame; quite a small frame ... */
+        struct ethhdr *eth = (struct ethhdr *) (request+pad_head) ;
+        struct iphdr *iph = (struct iphdr *)(request+pad_head+sizeof(struct ethhdr)) ;
+        if( eth->h_proto == ETH_P_IP)
+          {
+             unsigned int totlen=iph->tot_len ;
+             int bytes_remaining = totlen+sizeof(struct ethhdr)+pad_head-payload_bytes ;
+             TRACEN(k_t_detail,"Frame total length=%d",totlen) ;
+             if( bytes_remaining <= 0)
+               {
+/*  Largest amount of data we might need is ... */
+/*                  k_injection_packet_size+k_torus_skb_alignment */
+                 struct sk_buff * skb = alloc_skb(k_injection_packet_size+k_torus_skb_alignment , GFP_ATOMIC);
+                 if(skb )
+                   {
+                     char * payload ;
+                     skb_reserve(skb, k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment);
+                     payload = skb->data ;
+/*  TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */
+/*                      torus_frame_payload_load(request) ; */
+/*                      torus_frame_payload_store(payload) ; */
+                     torus_frame_payload_memcpy((torus_frame_payload *)payload,(torus_frame_payload *)request) ;
+                     TRACEN(k_t_detail,"(=)(%08x) skb=%p payload=%p bytes_remaining=%d", src_key,skb,skb->data,bytes_remaining);
+                     skb_reserve(skb,pad_head) ;
+                     skb_put(skb,totlen+sizeof(struct ethhdr)) ;
+                     analyse_tcp_flags(dma_tcp, skb) ;
+                     deliver_from_slot(dma_tcp,-1,-1,skb) ;
+                   }
+                 else
+                   {
+                     TRACEN(k_t_protocol,"(E) (%08x) skb was null", src_key);
+                     dma_tcp->device_stats->rx_dropped += 1;
+                     if( k_detail_stats)
+                	     {
+                		     dma_tcp->count_no_skbuff += 1 ;
+                	     }
+                   }
+               }
+             else
+               {
+                 TRACEN(k_t_protocol,"(E) frame does not fit packet, discarded");
+                 dma_tcp->device_stats->rx_frame_errors += 1;
+               }
+          }
+        else
+          {
+            TRACEN(k_t_protocol,"Packet not IP ethhdr=[%02x:%02x:%02x:%02x:%02x:%02x][%02x:%02x:%02x:%02x:%02x:%02x](%04x)",
+                eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5],
+                eth->h_source[0],eth->h_source[1],eth->h_source[2],eth->h_source[3],eth->h_source[4],eth->h_source[5],
+                eth->h_proto) ;
+            dma_tcp->device_stats->rx_frame_errors += 1;
+          }
+      }
+    TRACEN(k_t_detail,"(<)((%08x)", src_key);
+  }
+
+static int issueInlineFrameDataSingleActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg=packet_ptr->SW_Arg ;
+/*     enable_kernel_fp() ; // TODO: don't think this is needed nowadays */
+
+    issueInlineFrameDataSingle(
+        (dma_tcp_t *) recv_func_parm,
+        (void *) payload_ptr,
+        SW_Arg,
+        payload_bytes
+        ) ;
+    return 0 ;
+  }
+
+#if defined(USE_ADAPTIVE_ROUTING)
+typedef struct
+{
+	unsigned int conn_id ;
+	unsigned int packet_count ;
+	unsigned int packets_to_go ;
+	int framestart_offset ;
+	int prev_offset ;  /*  For constructing 'reordering' statistics */
+} adaptive_skb_cb_t;
+
+static void issueInlineFrameDataAdaptive(dma_tcp_t * dma_tcp,
+    void  * request ,
+    unsigned int src_key ,
+    int payload_bytes,
+    int Put_Offset
+    )
+  {
+	  unsigned int conn_id =  ((unsigned int) Put_Offset) >> 25  ;
+	  unsigned int packet_count  = (((unsigned int) Put_Offset) >> 16) & 0x1ff ;
+	  int offset_in_frame = (Put_Offset & 0xfff0) | 0xffff0000 ;
+	  unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+	  rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+	  unsigned int slot = (src_key >> 4) & node_slot_mask ;
+	  unsigned int pad_head = src_key & 0x0f ;
+	  struct sk_buff * candidate_skb=get_rcv_skb_for_conn(rcvdemux,slot,conn_id) ;
+	  TRACEN(k_t_detail,
+			"(>) request=%p slot=%08x pad_head=0x%08x payload_bytes=0x%02x Put_Offset=0x%08x\n",
+			request,slot,pad_head,payload_bytes,Put_Offset);
+	  if( candidate_skb)
+		  {
+			  adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+			  if(askb->conn_id != conn_id || askb->packet_count != packet_count)
+				  {
+					  TRACEN(k_t_error,"(E) askb mismatch, slot=%08x askb->conn_id=%04x conn_id=%04x askb->packet_count=%04x packet_count=%04x askb->packets_to_go=%04x",
+							  slot,askb->conn_id,conn_id,askb->packet_count,packet_count,askb->packets_to_go) ;
+					  dev_kfree_skb(candidate_skb) ;
+					  candidate_skb = NULL ;
+				  }
+		  }
+	  if( NULL == candidate_skb)
+		  {
+			  instrument_flow(dma_tcp,k_receive_eager) ;
+			  candidate_skb=alloc_skb(packet_count*k_injection_packet_size+2*k_torus_skb_alignment+k_injection_packet_size,GFP_ATOMIC) ;  /*  TODO: refine the size */
+			  if( candidate_skb)
+			  {
+				  adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+				  askb->conn_id = conn_id ;
+				  askb->packet_count = packet_count ;
+				  askb->packets_to_go = packet_count ;
+				  askb->framestart_offset = 0 ;
+				  askb->prev_offset = -65536 ;
+				  skb_reserve(candidate_skb, (k_torus_skb_alignment - ((unsigned int)(candidate_skb->data)) % k_torus_skb_alignment));
+				  skb_put(candidate_skb,packet_count*k_injection_packet_size) ;
+			  }
+			  else
+				  {
+					  TRACEN(k_t_error,"(E) skbuff allocation failed packet_count=%d slot=0x%08x conn_id=0x%08x",packet_count,slot,conn_id) ;
+				  }
+			  set_rcv_skb_for_conn(rcvdemux,slot,conn_id,candidate_skb) ;
+		  }
+	  if( candidate_skb)
+		  {
+			  unsigned char * end_of_frame=candidate_skb->tail ;
+			  unsigned char * target = end_of_frame+offset_in_frame ;
+			  int cand_start_offset = offset_in_frame + pad_head ;
+			  TRACEN(k_t_detail,"candidate_skb skb=%p head=%p data=%p tail=%p end=%p offset_in_frame=0x%08x target=%p cand_start_offset=0x%08x",
+					  candidate_skb,candidate_skb->head,candidate_skb->data,candidate_skb->tail,candidate_skb->end,offset_in_frame,target,cand_start_offset) ;
+			  if( target < candidate_skb->head)
+				  {
+					  TRACEN(k_t_error,"data offset outside skb, dropping packet") ;
+				  }
+			  else
+				  {
+					  adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+					  int new_packets_to_go=askb->packets_to_go - 1 ;
+					  int prev_offset = askb->prev_offset ;
+#if defined(USE_ADAPTIVE_ROUTING)
+/*  Statistics, count how often a packet came out-of-order */
+					  if( offset_in_frame < prev_offset)
+						  {
+							  instrument_flow(dma_tcp,k_reordered) ;
+						  }
+					  askb->prev_offset = offset_in_frame ;
+#endif
+					  if( cand_start_offset < askb->framestart_offset )
+						  {
+							  askb->framestart_offset=cand_start_offset ;
+						  }
+
+					  TRACEN(k_t_detail,"memcpy(%p,%p,0x%08x) new_packets_to_go=%d",
+							  target,request,payload_bytes,new_packets_to_go) ;
+					  if( payload_bytes == k_injection_packet_size)
+						  {
+							   /*  doublehummer memcpy optimisation for 'full' packet */
+					                      /*  TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */
+							  torus_frame_payload_memcpy((torus_frame_payload *)target,(torus_frame_payload *)request) ;
+						  }
+					  else
+						  {
+							  memcpy(target,request,payload_bytes) ;
+						  }
+					  if( new_packets_to_go <= 0)
+						  {
+					                     analyse_tcp_flags(dma_tcp, candidate_skb) ;
+					                     skb_reserve(candidate_skb,packet_count*k_injection_packet_size+askb->framestart_offset);
+					                     dumpframe(candidate_skb->data,candidate_skb->len,"Proposed frame") ;
+					                     deliver_from_slot(dma_tcp,slot,conn_id,candidate_skb) ;
+					                     set_rcv_skb_for_conn(rcvdemux,slot,conn_id,NULL) ;
+						  }
+					  else
+						  {
+							  askb->packets_to_go = new_packets_to_go ;
+						  }
+				  }
+		  }
+	  else
+		  {
+			  TRACEN(k_t_error,"(E) No memory for skb, dropping packet") ;
+		  }
+
+  }
+
+static int issueInlineFrameDataAdaptiveActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg=packet_ptr->SW_Arg ;
+    int Put_Offset=packet_ptr->Put_Offset ;
+/*     enable_kernel_fp() ; // TODO: don't think this is needed nowadays */
+
+    issueInlineFrameDataAdaptive(
+        (dma_tcp_t *) recv_func_parm,
+        (void *) payload_ptr,
+        SW_Arg,
+        payload_bytes,
+        Put_Offset
+        ) ;
+    return 0 ;
+  }
+#endif
+
+#if defined(AUDIT_FRAME_HEADER)
+
+frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ;
+#endif
+unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+
+static inline void create_dma_descriptor_propose_accept(dma_tcp_t *dma_tcp,
+                void * address,
+                unsigned int length,
+                unsigned int x, unsigned int y, unsigned int z,
+                unsigned int proto,
+                unsigned int SW_Arg,
+                unsigned int conn_id,
+                unsigned int tag,
+                DMA_InjDescriptor_t *desc,
+                unsigned int propose_length
+		)
+{
+	    dma_addr_t dataAddr ;
+	    int ret1 ;
+	    int PutOffset = (conn_id << 25) | (tag << 16) | ((-length) & 0xfff0) ;
+	    TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d proto=%d desc=%p",address,length,x,y,z,proto,desc);
+	    dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+	    ret1 = DMA_TorusMemFifoDescriptor( desc,
+                            x, y, z,
+                            dma_tcp_ReceptionFifoGroup(dma_tcp),          /*  recv fifo grp id */
+                            0,          /*  hints */
+                            virtual_channel(dma_tcp,k_VC_anyway),          /*  vc - adaptive */
+                            SW_Arg,          /*  softw arg */
+                            proto,     /*  function id */
+                            dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+                            k_injCounterId,  /*  inj counter id */
+                            dataAddr,        /*  send address */
+                            propose_length          /*  proposal length */
+                            );
+	    if(ret1 != 0 )
+		    {
+			    TRACEN(k_t_error,"(E) ret1=%d",ret1) ;
+		    }
+
+	    DMA_DescriptorSetPutOffset(desc,PutOffset) ;  /*  For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+
+	    TRACEN(k_t_general , "(<) ret1=%d",ret1);
+
+}
+
+static inline unsigned int ethhdr_src_x(struct ethhdr * eth)
+{
+	return eth->h_source[3] ;
+}
+static inline unsigned int ethhdr_src_y(struct ethhdr * eth)
+{
+	return eth->h_source[4] ;
+}
+static inline unsigned int ethhdr_src_z(struct ethhdr * eth)
+{
+	return eth->h_source[5] ;
+}
+
+static inline unsigned int ethhdr_dest_x(struct ethhdr * eth)
+{
+	return eth->h_dest[3] ;
+}
+static inline unsigned int ethhdr_dest_y(struct ethhdr * eth)
+{
+	return eth->h_dest[4] ;
+}
+static inline unsigned int ethhdr_dest_z(struct ethhdr * eth)
+{
+	return eth->h_dest[5] ;
+}
+
+#if defined(USE_SKB_TO_SKB)
+static int get_reception_counter(dma_tcp_t * dma_tcp)
+{
+	unsigned int counters_available = dma_tcp->qtyFreeRecCounters ;
+	if( counters_available > 0)
+		{
+			int cx ;
+			int scanRecCounter=dma_tcp->scanRecCounter ;
+			dma_tcp->qtyFreeRecCounters=counters_available-1 ;
+			for(cx=0;cx<DMA_NUM_COUNTERS_PER_GROUP;cx+=1)
+				{
+					int cxx=(scanRecCounter+cx) & (DMA_NUM_COUNTERS_PER_GROUP-1) ;
+					if(0 == dma_tcp->recCntrInUse[cxx])
+						{
+							dma_tcp->scanRecCounter=cxx+1 ;
+							dma_tcp->recCntrInUse[cxx] = 1 ;
+							return cxx ;
+						}
+				}
+			TRACEN(k_t_error,"(E) Should have been %d counters available",counters_available) ;
+		}
+	return -1 ;   /*  No reception counters available */
+}
+
+enum {
+	k_PSKB_noRecCounter = 0x01 ,
+	k_PSKB_freedRecCounter = 0x02
+};
+typedef struct
+{
+	unsigned int src_key ;
+	  unsigned int slot ;
+  unsigned int conn_id ;
+  unsigned short tot_len ;
+  unsigned char pad_head ;
+} propose_skb_cb ;
+
+/* Frame injection control, may live in skb->cb . */
+/* 'desc' describes the 'non-fragmented' initial part of the skb data; code where the ficb is used will */
+/* handle what has to happen to get the 'fragmented' part of the skb sent out */
+enum {
+	k_cattle_class,
+	k_first_class
+};
+
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+    dma_tcp_t *dma_tcp,
+    struct sk_buff *skb,
+    unsigned int queue_at_head,
+    unsigned int transport_class
+    ) ;
+
+static int isProp(dma_tcp_t * dma_tcp,struct ethhdr *eth,struct iphdr *iph)
+{
+	int h_source_x=eth->h_source[3] ;
+	int h_source_y=eth->h_source[4] ;
+	int h_source_z=eth->h_source[5] ;
+	int my_x=dma_tcp->location.coordinate[0] ;
+	int my_y=dma_tcp->location.coordinate[1] ;
+	int my_z=dma_tcp->location.coordinate[2] ;
+
+	if( h_source_x == my_x && h_source_y == my_y && h_source_z == my_z )
+		{
+			TRACEN(k_t_general,"non-propose from (%d,%d,%d)",eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+			return 0 ;
+		}
+	return 1 ;
+}
+
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+    dma_tcp_t *dma_tcp,
+    struct sk_buff *skb,
+    unsigned int queue_at_head,
+    unsigned int transport_class
+    ) ;
+
+struct accepthdr {
+	struct iphdr iph ;
+	unsigned int conn_id ;
+	int reception_counter ;
+};
+
+static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp,
+                unsigned int x, unsigned int y, unsigned int z,
+                int injection_counter,
+                int reception_counter,
+                dma_addr_t dataAddr,
+                int msglen,
+                DMA_InjDescriptor_t *desc,
+                unsigned int offset
+		) ;
+
+static void display_iphdr(struct iphdr *iph)
+{
+	TRACEN(k_t_request,"iphdr tot_len=0x%04x saddr=0x%08x daddr=0x%08x",iph->tot_len,iph->saddr,iph->daddr) ;
+}
+
+static unsigned int counted_length(struct sk_buff *skb)
+{
+	unsigned int rc=skb_headlen(skb) ;
+	int f ;
+	int nfrags = skb_shinfo(skb)->nr_frags ;
+	struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[0] ;
+	for(f=0; f<nfrags; f+=1)
+		{
+			rc += frag[f].size ;
+		}
+	return rc ;
+
+}
+
+static int audit_skb_at_accept(dma_tcp_t * dma_tcp,struct sk_buff *skb, unsigned int totlen_at_propose, struct iphdr *iph_at_rcv)
+{
+	unsigned int ctlen = counted_length(skb) ;
+	if( totlen_at_propose == 0 || totlen_at_propose > dma_tcp->mtu || totlen_at_propose != iph_at_rcv->tot_len || totlen_at_propose +sizeof(struct ethhdr) != ctlen)
+		{
+			TRACEN(k_t_error,"(E) skb=%p inconsistent, totlen_at_propose=0x%04x iph_at_rcv->tot_len=0x%04x skb->data_len=0x%04x counted_length(skb)=0x%04x",
+					skb, totlen_at_propose, iph_at_rcv->tot_len, skb->data_len, ctlen
+					) ;
+			  display_skb_structure(skb) ;
+			  display_iphdr(iph_at_rcv) ;
+			  instrument_flow(dma_tcp,k_accept_audit_fail) ;
+			  return 1 ;
+		}
+	return 0 ;
+}
+void issue_accept(dma_tcp_t * dma_tcp,struct accepthdr * accepth, unsigned int src_key )
+{
+	unsigned int conn_id=accepth->conn_id ;
+	int reception_counter=accepth->reception_counter ;
+	unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+	unsigned int slot = (src_key >> 4) & node_slot_mask ;
+	struct sk_buff *skb=get_tx_skb(&dma_tcp->tx_mux,slot,conn_id) ;
+	TRACEN(k_t_general,"src_key=0x%08x conn_id=0x%08x reception_counter=0x%08x",src_key,conn_id,reception_counter) ;
+	instrument_flow(dma_tcp,k_act_accept_rpc) ;
+	if( skb)
+		  {
+			  struct ethhdr* eth = (struct ethhdr*)(skb->data) ;
+			  unsigned int x=ethhdr_dest_x(eth) ;
+			  unsigned int y=ethhdr_dest_y(eth) ;
+			  unsigned int z=ethhdr_dest_z(eth) ;
+			  frame_injection_cb *ficb = (frame_injection_cb *) skb->cb ;
+			  unsigned int payload_length = skb_headlen(skb)  ;
+			  unsigned int payload_address = (unsigned int)(skb->data) ;
+			  unsigned int pad_head = payload_address & 0x0f ;
+			  unsigned int aligned_payload_length = payload_length + pad_head ;
+			  dma_addr_t dataAddr = dma_map_single(NULL, skb->data-pad_head, aligned_payload_length, DMA_TO_DEVICE);
+
+			  set_tx_skb(&dma_tcp->tx_mux,slot,conn_id,NULL) ;
+			  TRACEN(k_t_general,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x",
+					  slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ;
+			  if(TRACING(k_t_sgdiag))
+				  {
+					  TRACEN(k_t_sgdiag,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x",
+							  slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ;
+					  diag_skb_structure(skb) ;
+				  }
+#if defined(AUDIT_HEADLEN)
+			  {
+				  int rca = audit_skb_at_accept(dma_tcp,skb,ficb->tot_len,&accepth->iph) ;
+				  if( rca)
+					  {
+						  TRACEN(k_t_error,"(!!!) dropping skb, will cause (x=%d y=%d z=%d) counter 0x%02x to leak", x,y,z,reception_counter) ;
+						  dev_kfree_skb(skb) ;
+						  return ;
+					  }
+			  }
+#endif
+			  {
+				  int transfer_length = k_abbreviate_headlen ? (payload_length+pad_head-eth->h_source[0]) : (payload_length+pad_head) ;
+				  dma_addr_t transfer_address = k_abbreviate_headlen ? (dataAddr+eth->h_source[0]) : dataAddr ;
+				  unsigned int receive_offset = k_abbreviate_headlen ? eth->h_source[0] : 0 ;
+			  if( 0 != transfer_length)
+				  {
+					  create_dma_descriptor_direct_put_offset(
+							  dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,transfer_length,&ficb->desc,receive_offset
+							  ) ;
+				  }
+			  else
+				  {
+					  TRACEN(k_t_general,"(I) head length is zero") ;
+					   /*  Set up a descriptor for a non-zero length, then set its length to zero so that code later on can pick up the special case */
+					  create_dma_descriptor_direct_put_offset(
+							  dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,1,&ficb->desc,receive_offset
+							  ) ;
+					  ficb->desc.msg_length = 0 ;
+					   instrument_flow(dma_tcp,k_headlength_zero) ;
+				  }
+			  }
+			  ficb->free_when_done=1 ;
+			  bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_first_class) ;
+
+		  }
+	else
+		  {
+			  TRACEN(k_t_error,"(E) Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb is null",
+					  slot,conn_id,reception_counter ) ;
+		  }
+}
+
+static int should_park(dma_tcp_t * dma_tcp,unsigned int proposals_active, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+	unsigned int free_counters = dma_tcp->qtyFreeRecCounters ;
+	unsigned int tuning_counters_per_source = dma_tcp->tuning_counters_per_source ;
+/* 	unsigned int reported_transmission_fifo = report_transmission_fifo(dma_tcp,x0,y0,z0) ; */
+	return ( tuning_counters_per_source > 0 )
+		? (proposals_active > tuning_counters_per_source )
+		: ((proposals_active > 1) && (proposals_active * proposals_active > free_counters )) ;
+}
+
+static void stamp_skb(struct sk_buff *skb, unsigned int size )
+{
+	if( skb->data + size <= skb->end)
+		{
+			memset(skb->data,0x11,size) ;
+		}
+	else
+		{
+			TRACEN(k_t_error,"(E) Stamp for 0x%08x bytes out of range, skb=%p head=%p data=%p tail=%p end=%p, skipped",
+					size,skb,skb->head,skb->data,skb->tail,skb->end) ;
+		}
+}
+
+static inline int defer_skb_for_counter(const dma_tcp_t * dma_tcp)
+{
+	return k_allow_defer_skb_for_counter ? dma_tcp->tuning_defer_skb_until_counter : 0 ;
+}
+static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index,
+		unsigned int pad_head, unsigned int slot, unsigned int conn_id,
+		unsigned int x, unsigned int y,unsigned int z,
+		unsigned int tot_len,
+		unsigned int src_key) ;
+static void pending_rcv_skb_queue(dma_tcp_t *dma_tcp, struct sk_buff * skb, unsigned int x0, unsigned int y0, unsigned int z0 )
+{
+/* 	if( 1 == dma_tcp->tuning_select_fifo_algorithm) */
+/* 		{ */
+/* 			skb_queue_tail(&dma_tcp->balancer.b[k_pending_rcv_skb_classes-1].pending_rcv_skbs,skb) ; */
+/* 		} */
+/* 	else */
+/* 		{ */
+			unsigned int reported_fifo=report_transmission_fifo(dma_tcp,x0,y0,z0) ;
+			TRACEN(k_t_general,"skb=%p would come from fifo=%d on node [%d,%d,%d]",skb,reported_fifo,x0,y0,z0) ;
+			if( reported_fifo < k_pending_rcv_skb_classes)
+				{
+					skb_queue_tail(&dma_tcp->balancer.b[reported_fifo].pending_rcv_skbs,skb) ;
+				}
+			else
+				{
+					TRACEN(k_t_error,"(!!!) skb=%p would come from fifo=%d on node [%d,%d,%d] (out of range)",skb,reported_fifo,x0,y0,z0) ;
+					skb_queue_tail(&dma_tcp->balancer.b[0].pending_rcv_skbs,skb) ;
+				}
+/* 		} */
+}
+
+static inline int over_quota(bgp_dma_balancer_direction *b)
+{
+	int ql = skb_queue_len(&b->pending_rcv_skbs) ;
+	return ql ? b->outstanding_counters : 0x7fffffff ;
+}
+static struct sk_buff* pending_rcv_skb_dequeue(dma_tcp_t *dma_tcp)
+{
+	unsigned int q=0 ;
+	int qq=over_quota(dma_tcp->balancer.b+0) ;
+	int x ;
+	for(x=1;x<k_pending_rcv_skb_classes;x+=1)
+		{
+			int qp=over_quota(dma_tcp->balancer.b+x) ;
+			if( qp < qq)
+				{
+					qq=qp ;
+					q=x ;
+				}
+		}
+	return skb_dequeue(&dma_tcp->balancer.b[q].pending_rcv_skbs) ;
+}
+
+static void issueProp(dma_tcp_t * dma_tcp,
+    void  * request ,
+    unsigned int src_key ,
+    int payload_bytes,
+    int Put_Offset
+    )
+  {
+	  unsigned int conn_id =  ((unsigned int) Put_Offset) >> 25  ;
+	  unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+	  unsigned int slot = (src_key >> 4) & node_slot_mask ;
+	  unsigned int pad_head = src_key & 0x0f ;
+
+	  struct ethhdr *eth = (struct ethhdr *)(request+pad_head) ;
+	  unsigned int eth_proto = eth->h_proto ;
+
+	  struct iphdr *iph = (struct iphdr *) (eth+1) ;
+	  unsigned int tot_len=iph->tot_len ;
+	  if( isProp(dma_tcp,eth,iph))
+		  {
+			  unsigned int x=ethhdr_src_x(eth) ;
+			  unsigned int y=ethhdr_src_y(eth) ;
+			  unsigned int z=ethhdr_src_z(eth) ;
+			  rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+			  unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+				  instrument_flow(dma_tcp,k_act_propose_rpc) ;
+				  set_proposals_active(rcvdemux,slot,proposals_active+1) ;
+				   /*  If we're flow controlling by counters, we have a choice here. */
+				   /*  We can either get on with it, or park it for later when a previously-started frame completes */
+				  if( 0 == k_counter_flow_control || ! should_park(dma_tcp,proposals_active,x,y,z) )
+				  {
+					  int reception_counter=get_reception_counter(dma_tcp) ;
+					  TRACEN(k_t_general|k_t_sgdiag,"Prop from slot=0x%08x conn_id=0x%04x eth_proto=0x%04x pad_head=0x%02x tot_len=0x%04x x=0x%02x y=0x%02x z=0x%02x msglen=0x%04x payload_bytes=0x%02x", slot,conn_id,eth_proto,pad_head,tot_len, x, y, z,tot_len+pad_head, payload_bytes) ;
+
+					   /*  Now we need an 'skbuff' and a reception counter. Reception counters might be scarce */
+					  if( reception_counter != -1 )
+						  {
+							  unsigned int allocation_size=tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment ;/*  TODO: refine the size */
+							  struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ;  /*  TODO: refine the size */
+							  if( skb)
+								  {
+									  if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+									  skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+									   /*  Bring in the frame header for diagnosis later ... */
+									  memcpy(skb->data-pad_head,request,payload_bytes) ;
+									  skb_put(skb,tot_len+sizeof(struct ethhdr)) ;
+									  if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+									  {
+										  receive_skb_using_counter(dma_tcp,skb,reception_counter,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ;
+									  }
+								  }
+							  else
+								  {
+									  TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+								  }
+						  }
+					  else
+						  {
+							  unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ?  (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+							  unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ;
+							  /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+							  struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ;  /*  TODO: refine the size */
+							  TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ;
+							  instrument_flow(dma_tcp, k_no_reception_counter) ;
+							  if( skb)
+								  {
+									  if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ;
+									  skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+									   /*  Bring in the frame header for diagnosis later ... */
+									  memcpy(skb->data-pad_head,request,payload_bytes) ;
+									  skb_put(skb,put_size) ;
+									  if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+									  {
+										  propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ;
+										  pskbcb->src_key=src_key ;
+										  pskbcb->slot = slot ;
+										  pskbcb->conn_id = conn_id ;
+										  pskbcb->tot_len = tot_len ;
+										  pskbcb->pad_head = pad_head ;
+									  }
+									  instrument_flow(dma_tcp,k_defer_accept_rpc_counters) ;
+									  pending_rcv_skb_queue(dma_tcp,skb,x,y,z) ;
+									  TRACEN(k_t_flowcontrol|k_t_general,"No reception counters (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head) ;
+								  }
+							  else
+								  {
+									  TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+								  }
+						  }
+				  }
+				  else
+				  {
+					 /*  Park the 'propose' until a previous frame from this node completes */
+
+					  unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ?  (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+					  unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ;
+					  /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+					  struct sk_buff *skb = alloc_skb(allocation_size, GFP_ATOMIC) ;  /*  TODO: refine the size */
+					  TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ;
+					  instrument_flow(dma_tcp, k_parked) ;
+					  if( skb)
+						  {
+							  if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ;
+							  skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+							   /*  Bring in the frame header for diagnosis later ... */
+							  memcpy(skb->data-pad_head,request,payload_bytes) ;
+							  skb_put(skb,put_size) ;
+							  if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+							  {
+								  propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ;
+								  pskbcb->src_key=src_key ;
+								  pskbcb->slot = slot ;
+								  pskbcb->conn_id = conn_id ;
+								  pskbcb->tot_len = tot_len ;
+								  pskbcb->pad_head = pad_head ;
+							  }
+							  instrument_flow(dma_tcp,k_defer_accept_rpc_nodeflow) ;
+							  enq_pending_flow(&dma_tcp->rcvdemux,slot,skb) ;
+							  TRACEN(k_t_general,"Flow control (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x proposals_active=%d qtyFreeRecCounters=%d",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head,proposals_active,dma_tcp->qtyFreeRecCounters) ;
+						  }
+					  else
+						  {
+							  TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+						  }
+				  }
+		  }
+	  else
+		  {
+			   /*  an 'accept' packet sent as a modified 'propose' ... */
+			struct accepthdr * accepth=(struct accepthdr *)(eth+1) ;
+			TRACEN(k_t_general,"'accept' src_key=0x%08x",src_key) ;
+			issue_accept(dma_tcp,accepth,src_key) ;
+		  }
+  }
+
+static int issuePropActor(DMA_RecFifo_t      *f_ptr,
+                           DMA_PacketHeader_t *packet_ptr,
+                           void               *recv_func_parm,
+                           char               *payload_ptr,
+                           int                 payload_bytes
+                           )
+  {
+    unsigned int SW_Arg=packet_ptr->SW_Arg ;
+    int Put_Offset=packet_ptr->Put_Offset ;
+
+    issueProp(
+        (dma_tcp_t *) recv_func_parm,
+        (void *) payload_ptr,
+        SW_Arg,
+        payload_bytes,
+        Put_Offset
+        ) ;
+    return 0 ;
+  }
+typedef struct
+{
+  unsigned int reception_counter ;
+  unsigned char x, y, z ;
+} accept_skb_cb ;
+
+static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp,
+                unsigned int x, unsigned int y, unsigned int z,
+                int injection_counter,
+                int reception_counter,
+                dma_addr_t dataAddr,
+                int msglen,
+                DMA_InjDescriptor_t *desc,
+                unsigned int offset
+		)
+{
+	    int ret1 __attribute((unused));
+	    TRACEN(k_t_general|k_t_sgdiag , "(>) injecting x=%d y=%d z=%d injection_counter=0x%02x reception_counter=0x%02x dataAddr=0x%08llx msglen=0x%08x desc=%p offset=0x%04x",
+			    x,y,z,injection_counter,reception_counter,dataAddr,msglen,desc,offset);
+	    ret1 = DMA_TorusDirectPutDescriptor( desc,
+	                                     x, y, z,
+	                                     0,          /*  hints */
+	                                     virtual_channel(dma_tcp,k_VC_anyway),          /*  vc - adaptive */
+	                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+	                                     injection_counter,  /*  inj counter id */
+	                                     dataAddr,        /*  send offset */
+	                                     dma_tcp_ReceptionCounterGroup(dma_tcp),        /*  rec ctr grp */
+	                                     reception_counter,
+	                                     offset,        /*  reception offset */
+	                                     msglen          /*  message length */
+	                                     );
+	    TRACEN(k_t_general , "(<) ret1=%d",ret1);
+
+}
+
+#endif
+
+static dma_addr_t locate_dma_address(dma_tcp_t *dma_tcp,struct sk_buff *skb,unsigned int pad_head, unsigned int propose_len)
+  {
+    if( 0 == k_abbreviate_headlen || 0 == dma_tcp->tuning_enable_siw_placement || NULL == dma_tcp->siw_placement_callback)
+      {
+      return dma_map_single(NULL, skb->data-pad_head, skb->len+pad_head, DMA_FROM_DEVICE);
+      }
+    {
+      dma_addr_t rc0 = (*dma_tcp->siw_placement_callback)(skb) ;
+      dma_addr_t rc  =  rc0-pad_head+propose_len ;
+      instrument_flow(dma_tcp,rc0 ? k_siw_placement_hit : k_siw_placement_miss) ;
+      TRACEN(k_t_request,"siw_placement_callback returns 0x%016llx, rc=0x%016llx",rc0,rc) ;
+      return rc0 ? rc : dma_map_single(NULL, skb->data-pad_head, skb->len+pad_head, DMA_FROM_DEVICE);
+    }
+
+  }
+static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index,
+		unsigned int pad_head, unsigned int slot, unsigned int conn_id,
+		unsigned int x, unsigned int y,unsigned int z,
+		unsigned int tot_len,
+		unsigned int src_key)
+{
+	struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ;
+  unsigned int propose_len = eth->h_source[0] ;
+  unsigned int dma_count = k_abbreviate_headlen ? (skb_next->len+pad_head-propose_len) : (skb_next->len+pad_head) ;
+	frame_injection_cb * ficb = (frame_injection_cb *) skb_next->cb ;
+  dma_addr_t dataAddr = locate_dma_address(dma_tcp, skb_next, pad_head, propose_len);
+	  unsigned int counter_base=dataAddr>>4 ;
+	  unsigned int counter_max=((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1 ;
+
+#if defined(AUDIT_FRAME_HEADER)
+	  memcpy(all_headers_in_counters+counter_index,skb_next->data,sizeof(frame_header_t)) ;
+#endif
+
+	  dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters += 1 ;
+
+	dma_tcp->slot_for_rcv[counter_index]=slot ;
+	dma_tcp->conn_for_rcv[counter_index]=conn_id | 0x80 ;  /*  Mark it up as having been delayed */
+	TRACEN(k_t_general|k_t_scattergather|k_t_sgdiag,"Reception counter 0x%02x [%08x %08x %08x] assigned to (%d,%d,%d) conn_id=0x%08x skb=%p propose_len=0x%02x",
+			counter_index,dma_count,counter_base,counter_max,x,y,z,conn_id,skb_next,propose_len) ;
+	  ficb->free_when_done = 0 ;
+
+	dma_tcp->rcv_skbs[counter_index] = skb_next ;
+	dma_tcp->rcv_timestamp[counter_index] = jiffies ;
+	{
+		unsigned int proposed_dma_length = tot_len+pad_head+sizeof(struct ethhdr) ;
+		unsigned int available_skb_length = skb_next->end - (skb_next->data-pad_head) ;
+		if( proposed_dma_length > available_skb_length )
+			{
+				TRACEN(k_t_error,"(!!!) skb=%p not big enough, dma=0x%08x bytes, pad_head=0x%02x, skb(head=%p data=%p tail=%p end=%p)",
+						skb_next,proposed_dma_length,pad_head,skb_next->head,skb_next->data,skb_next->tail,skb_next->end
+						) ;
+				show_stack(NULL,NULL) ;
+			}
+	}
+	  DMA_CounterSetValueBaseMaxHw(dma_tcp->recCounterGroup.counter[counter_index].counter_hw_ptr,dma_count,dataAddr >> 4, ((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1) ;
+	  instrument_flow(dma_tcp,k_send_accept_rpc) ;
+		  {
+			   /*  Push out a 'reverse propose' frame, adjust it so it overlays the area beyond the initial frame which will be replaced by the response DMA */
+			  struct iphdr* iph = (struct iphdr*)(eth+1) ;
+			  struct ethhdr* accept_eth0 = (struct ethhdr *)(iph+1) ;
+			  struct ethhdr* accept_eth = (struct ethhdr *)(skb_next->data-pad_head+propose_len) ;
+			  struct accepthdr * accepth=(struct accepthdr *)(accept_eth+1) ;
+			  TRACEN(k_t_general,"accept_eth0=%p accepth=%p",accept_eth0,accept_eth) ;
+			  tot_len_for_rcv[counter_index] = iph->tot_len ; // For diagnostics if the torus hangs
+			  memcpy(accept_eth,eth,sizeof(struct ethhdr)) ;
+			  memcpy(&accepth->iph,iph,sizeof(iph)) ; // TODO: Diagnose the apparent 'scribble' at the sender, then take this away
+			  accepth->conn_id=conn_id ;
+			  accepth->reception_counter=counter_index ;
+			  if( (unsigned int)(accepth+1) > (unsigned int)(skb_next->end))
+				  {
+						TRACEN(k_t_error,"(!!!) skb=%p not big enough, (accepth+1)=%p, skb(head=%p data=%p tail=%p end=%p)",
+								skb_next,accepth+1,skb_next->head,skb_next->data,skb_next->tail,skb_next->end
+								) ;
+						show_stack(NULL,NULL) ;
+
+				  }
+			  TRACEN(k_t_general,"accept_eth=%p accepth=%p src_key=0x%08x conn_id=0x%08x counter_index=0x%08x",accept_eth,accepth,src_key,conn_id,counter_index) ;
+			  create_dma_descriptor_propose_accept(dma_tcp,
+					  (void *)(accept_eth),
+					  48,
+					  x,y, z,
+					  dma_tcp->proto_transfer_propose,
+					  (dma_tcp->src_key << 4),
+					  conn_id,
+					  0,
+					  &ficb->desc,
+					  48
+					) ;
+			  DMA_CounterSetEnableById(&dma_tcp->recCounterGroup,counter_index) ;
+			  bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb_next,0, k_first_class) ;
+		  }
+
+}
+
+static void handle_empty_recCounter_deliver(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+	  rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+	struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+	unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+	  unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+	  set_proposals_active(rcvdemux,slot,proposals_active-1) ;
+	TRACEN(k_t_general|k_t_sgdiag,"counter_index=0x%02x skb=%p",counter_index,skb) ;
+	if( skb)
+		{
+#if defined(AUDIT_FRAME_HEADER)
+			if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32))
+				{
+					  TRACEN(k_t_request,"(!!!) header not as first seen") ;
+					  dumpmem(skb->data,sizeof(frame_header_t),"header-now") ;
+					  dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ;
+
+				}
+#endif
+
+			{
+				struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+				unsigned int x=ethhdr_src_x(eth) ;
+				unsigned int y=ethhdr_src_y(eth) ;
+				unsigned int z=ethhdr_src_z(eth) ;
+				eth->h_source[0] = eth->h_dest[0] ; // Replug the item that got taken for DMA sideband
+				dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters -= 1 ;
+			}
+			deliver_from_slot(dma_tcp,slot,dma_tcp->conn_for_rcv[counter_index],skb) ;
+		}
+	else
+		{
+			TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ;
+		}
+
+}
+
+static void handle_empty_recCounter_flush(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+	  rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+	struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+	unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+	  unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+	  unsigned int counter_value = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+counter_index) ;
+	  set_proposals_active(rcvdemux,slot,proposals_active-1) ;
+	TRACEN(k_t_request,"(!!!) flushing counter_index=0x%02x skb=%p",counter_index,skb) ;
+	DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ;
+	dma_tcp_show_reception_one(dma_tcp,counter_index,counter_value) ;
+	if( skb)
+		{
+#if defined(AUDIT_FRAME_HEADER)
+			if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32))
+				{
+					  TRACEN(k_t_request,"(!!!) header not as first seen") ;
+					  dumpmem(skb->data,sizeof(frame_header_t),"header-now") ;
+					  dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ;
+
+				}
+#endif
+			dev_kfree_skb(skb) ;
+		}
+	else
+		{
+			TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ;
+		}
+
+}
+
+static void handle_empty_recCounter_reload(dma_tcp_t *dma_tcp, unsigned int counter_index, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+	  rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+		struct sk_buff * skb_next  ;
+	unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+	  unsigned int proposals_active=get_proposals_active(rcvdemux,slot)+1 ;
+	if( k_counter_flow_control )
+		{
+			 /*  We're going to get a queued frame, but which queue we try first will depend on whether this source */
+			 /*  is over quota at the moment */
+			if (proposals_active > count_pending_flow(rcvdemux,slot)+1 && should_park(dma_tcp,proposals_active,x0,y0,z0))
+				{
+					 /*  If we have a 'queued' frame, take that */
+					skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+					TRACEN(k_t_general,"skb_next=%p",skb_next) ;
+					if( ! skb_next)
+						{
+							 /*  Try a 'parked' frame */
+							skb_next=deq_pending_flow(rcvdemux,slot) ;
+						}
+
+				}
+			else
+				{
+					 /*  If we have a 'parked' frame from the same source, get it moving now */
+					skb_next=deq_pending_flow(rcvdemux,slot) ;
+					TRACEN(k_t_general,"skb_next=%p",skb_next) ;
+					if( ! skb_next)
+						{
+							 /*  If nothing 'parked', try the general queue */
+							skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+						}
+
+				}
+		}
+	else
+		{
+			skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+		}
+	if( skb_next)
+		{
+			 /*  A request was waiting for a receive counter, which is now available */
+			propose_skb_cb * pskcb = (propose_skb_cb *)skb_next->cb ;
+			unsigned int src_key=pskcb->src_key ;
+			struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ;
+			unsigned int x=ethhdr_src_x(eth) ;
+			unsigned int y=ethhdr_src_y(eth) ;
+			unsigned int z=ethhdr_src_z(eth) ;
+			unsigned int slot=pskcb->slot ;
+			unsigned int conn_id=pskcb->conn_id ;
+			unsigned int pad_head=pskcb->pad_head ;
+			unsigned int tot_len=pskcb->tot_len ;
+			if( defer_skb_for_counter(dma_tcp))
+				{
+					 /*  Need a new sk_buff; need to set up alignment */
+					 /*  TODO: shouldn't need alignment */
+					 /*  TODO: Copy in the data from the old skbuff, so that the DMA doesn't need to resend it */
+					  unsigned int allocation_size =  (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+					  /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+					  struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ;  /*  TODO: refine the size */
+					  TRACEN(k_t_general,"skb_next=%p skb=%p allocation_size=%d copying_length=%d src_key=0x%08x slot=0x%08x conn_id=0x%08x pad_head=0x%02x tot_len=0x%04x",skb_next,skb,allocation_size,skb_next->len,src_key,slot,conn_id,pad_head,tot_len) ;
+					  if( skb)
+						  {
+							  if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+							  skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+							  memcpy(skb->data,skb_next->data,skb_next->len) ;
+							  skb_put(skb,tot_len+sizeof(struct ethhdr)) ;
+							   TRACEN(k_t_general,"skb->data=%p skb->len=0x%04x skb_next->data=%p skb_next->len=0x%04x",
+									  skb->data, skb->len, skb_next->data, skb_next->len) ;
+							  if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+						  }
+					  else
+						  {
+							  TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ;
+							   /*  TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */
+						  }
+					  dev_kfree_skb(skb_next) ;
+					  skb_next=skb ;
+					  eth=(struct ethhdr *)(skb_next->data) ;  // Fix up, 'accept' setup uses this
+
+				}
+			if( skb_next)
+				{
+					receive_skb_using_counter(dma_tcp,skb_next,counter_index,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ;
+				}
+			else
+				{
+					  TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ;
+					   /*  TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */
+				}
+		}
+	else
+		{
+			TRACEN(k_t_general|k_t_scattergather,"Reception counter 0x%02x vacant",counter_index) ;
+			dma_tcp->recCntrInUse[counter_index] = 0 ;
+			dma_tcp->rcv_skbs[counter_index] = NULL ;
+			dma_tcp->qtyFreeRecCounters += 1 ;
+			DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ;
+		}
+
+}
+
+static void handle_empty_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+	struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+	struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+	unsigned int x0 = ethhdr_src_x(eth) ;
+	unsigned int y0 = ethhdr_src_y(eth) ;
+	unsigned int z0 = ethhdr_src_z(eth) ;
+	handle_empty_recCounter_deliver(dma_tcp,counter_index) ;
+	handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ;
+}
+
+static void handle_stuck_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+	struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+	struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+	unsigned int x0 = ethhdr_src_x(eth) ;
+	unsigned int y0 = ethhdr_src_y(eth) ;
+	unsigned int z0 = ethhdr_src_z(eth) ;
+
+	instrument_flow(dma_tcp,k_receive_incomplete) ;
+	handle_empty_recCounter_flush(dma_tcp,counter_index) ;
+	handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ;
+}
+
+static void check_stuck_recCounters(dma_tcp_t *dma_tcp)
+{
+	unsigned int x ;
+	int j = jiffies ;
+	for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+		{
+			if(dma_tcp->rcv_skbs[x] && (j-dma_tcp->rcv_timestamp[x]) >= 3*HZ )
+				{
+					TRACEN(k_t_request,"(!!!) counter 0x%02x not completed after %d jiffies, freeing it",x,j-dma_tcp->rcv_timestamp[x]) ;
+					handle_stuck_recCounter(dma_tcp,x) ;
+				}
+		}
+}
+
+void bgp_dma_tcp_empty_fifo_callback(void)
+{
+	dma_tcp_t *dma_tcp = &dma_tcp_state ;
+	unsigned int word0 , word1 ;
+	DMA_CounterGetAllHitZero(&dma_tcp->recCounterGroup, &word0, &word1) ;
+	if( word0 != 0 )
+		{
+			DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 0, word0) ;
+			TRACEN(k_t_general,"recCounterGroup word0=0x%08x",word0) ;
+			do {
+				unsigned int counter_index=32-fls(word0) ;  /*  Find the highest-order bit that is set */
+				word0 &= (0x7fffffff >> counter_index) ;   /*  Clear it */
+				handle_empty_recCounter(dma_tcp,counter_index) ;
+			} while ( word0 != 0) ;
+		}
+	if( word1 != 0)
+		{
+			DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 1, word1) ;
+			TRACEN(k_t_general,"recCounterGroup word1=0x%08x",word1) ;
+			do {
+				unsigned int counter_index=32-fls(word1) ;  /*  Find the highest-order bit that is set */
+				word1 &= (0x7fffffff >> counter_index) ;   /*  Clear it */
+				handle_empty_recCounter(dma_tcp,32+counter_index) ;
+			} while ( word1 != 0) ;
+		}
+	 /*   'clear orphaned reception counters' only works correctly if we are doing eager delivery */
+	if( deliver_eagerly(dma_tcp))
+		{
+			int checked_time = dma_tcp->rcv_checked_time ;
+			int j = jiffies ;
+			int elapsed = j - checked_time ;
+			if( elapsed > HZ)
+				{
+					dma_tcp->rcv_checked_time = j ;
+					check_stuck_recCounters(dma_tcp) ;
+				}
+
+		}
+
+
+}
+
+int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ;
+
+
+static inline int inject_into_dma_taxi(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int proto, unsigned int SW_Arg )
+  {
+    dma_addr_t dataAddr ;
+    DMA_InjDescriptor_t desc;
+    int ret1, ret2 ;
+    TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+/*     TRACEN(k_t_scattergather,"injecting, length=0x%04x my_injection_group=%d desired_fifo=%d",length,my_injection_group,desired_fifo) ; */
+    dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+    ret1 = DMA_TorusMemFifoDescriptor( &desc,
+                                     x, y, z,
+                                     dma_tcp_ReceptionFifoGroup(dma_tcp),          /*  recv fifo grp id */
+                                     0,          /*  hints */
+                                     virtual_channel(dma_tcp,k_VC_anyway),          /*  go whichver way it wants */
+                                     SW_Arg,          /*  softw arg */
+                                     proto,     /*  function id */
+                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+                                     k_injCounterId,  /*  inj counter id */
+                                     dataAddr,        /*  send address */
+                                     length          /*  msg len */
+                                     );
+
+
+    DMA_DescriptorSetPutOffset(&desc,-length) ;  /*  For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */
+    ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+                                            dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+                                            &desc );
+    TRACEN(k_t_scattergather , "tgt=[%d %d %d] length=0x%04x injfifo[%d %02x]\n",
+		    x,y,z,length,
+		    my_injection_group,desired_fifo ) ;
+    TRACEN(k_t_general , "(<) ret1=%d ret2=%d",ret1, ret2);
+    return 1 ;
+  }
+
+
+
+/*  The injectors are currently set up so that each 'software FIFO' pushes to a single (different) 'hardware FIFO' */
+/*  This isn't needed for 'adaptive'; things could be rearranged for all 'software FIFOs' to have access to all 'hardware FIFOs' */
+enum {
+	k_my_vc_for_adaptive = k_VC_anyway
+/*  Diagnostically flip it to 'deterministic' ... */
+/* 	k_my_vc_for_adaptive = k_VC_ordering */
+};
+static inline int inject_into_dma_adaptive(dma_tcp_t *dma_tcp,
+		                           void * address,
+		                           unsigned int length,
+		                           unsigned int x, unsigned int y, unsigned int z,
+		                           unsigned int my_injection_group,
+		                           unsigned int desired_fifo,
+		                           unsigned int proto,
+		                           unsigned int SW_Arg,
+		                           unsigned int conn_id )
+  {
+    dma_addr_t dataAddr ;
+    DMA_InjDescriptor_t desc;
+    int ret1, ret2 __attribute((unused));
+    unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ;
+    unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ;
+    unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ;
+    int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ;
+    TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+    dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+    if( length >= 10000)
+	    {
+		    TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ;
+	    }
+
+/*  First injection is 'start of frame/fragment' */
+    ret1 = DMA_TorusMemFifoDescriptor( &desc,
+                                     x, y, z,
+                                     dma_tcp_ReceptionFifoGroup(dma_tcp),          /*  recv fifo grp id */
+                                     0,          /*  hints */
+                                     virtual_channel(dma_tcp,k_my_vc_for_adaptive),          /*  vc - adaptive */
+                                     SW_Arg,          /*  softw arg */
+                                     proto,     /*  function id */
+                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+                                     k_injCounterId,  /*  inj counter id */
+                                     dataAddr,        /*  send address */
+                                     packetcount*firstpacketlength          /*  msg len */
+                                     );
+
+
+    DMA_DescriptorSetPutOffset(&desc,PutOffset) ;  /*  For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+    ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+                                            dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+                                            &desc );
+    TRACEN(k_t_scattergather ,"tgt=[%d %d %d] length=0x%04x injfifo[%d %02x] conn_id=0x%02x\n",
+		    x,y,z,length,
+		    my_injection_group,desired_fifo,conn_id ) ;
+    TRACEN(k_t_general , "proto=%d firstpacketlength=%d ret1=%d ret2=%d",proto,firstpacketlength,ret1, ret2);
+
+    return 1 ;
+
+  }
+
+static inline void create_dma_descriptor_adaptive(dma_tcp_t *dma_tcp,
+		                           void * address,
+		                           unsigned int length,
+		                           unsigned int x, unsigned int y, unsigned int z,
+		                           unsigned int proto,
+		                           unsigned int SW_Arg,
+		                           unsigned int conn_id,
+		                           DMA_InjDescriptor_t *desc)
+  {
+    dma_addr_t dataAddr ;
+    int ret1 __attribute__((unused));
+    unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ;
+    unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ;
+    unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ;
+    int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ;
+    TRACEN(k_t_general , "(>) address=%p length=0x%08x x=%d y=%d z=%d proto=%d SW_Arg=0x%08x desc=%p",address,length,x,y,z,proto,SW_Arg,desc);
+    dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+    if( length >= 10000)
+	    {
+		    TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ;
+	    }
+
+/*  First injection is 'start of frame/fragment' */
+    ret1 = DMA_TorusMemFifoDescriptor( desc,
+                                     x, y, z,
+                                     dma_tcp_ReceptionFifoGroup(dma_tcp),          /*  recv fifo grp id */
+                                     0,          /*  hints */
+                                     virtual_channel(dma_tcp,k_my_vc_for_adaptive),          /*  vc - adaptive */
+                                     SW_Arg,          /*  softw arg */
+                                     proto,     /*  function id */
+                                     dma_tcp_InjectionCounterGroup(dma_tcp),          /*  inj cntr group id */
+                                     k_injCounterId,  /*  inj counter id */
+                                     dataAddr,        /*  send address */
+                                     packetcount*firstpacketlength          /*  msg len */
+                                     );
+
+    DMA_DescriptorSetPutOffset(desc,PutOffset) ;  /*  For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+    TRACEN(k_t_general , "(<) firstpacketlength=%d ret1=%d",firstpacketlength,ret1);
+
+  }
+
+static inline int inject_dma_descriptor_adaptive(dma_tcp_t *dma_tcp,
+		                           unsigned int my_injection_group,
+		                           unsigned int desired_fifo,
+		                           DMA_InjDescriptor_t *desc)
+  {
+    int ret __attribute__((unused));
+    TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc);
+    TRACEN(k_t_sgdiag,"injecting 0x%04x bytes",desc->msg_length) ;
+    ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+                                            dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+                                            desc );
+
+    TRACEN(k_t_general , "(<) ret=%d",ret);
+    return 1 ;
+
+  }
+
+static inline int inject_dma_descriptors_adaptive(dma_tcp_t *dma_tcp,
+		                           unsigned int my_injection_group,
+		                           unsigned int desired_fifo,
+		                           DMA_InjDescriptor_t **desc,
+		                           unsigned int count )
+  {
+    int ret __attribute__((unused));
+    int r2 __attribute__((unused));
+    unsigned int fifo_index = my_injection_group*k_injecting_directions+desired_fifo ;
+    TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p count=%d fifo_id=0x%02x",
+	    my_injection_group,desired_fifo,desc,count, dma_tcp->injFifoFramesIds[fifo_index]);
+    if( 0 == desc[0]->msg_length)
+	    {
+		    TRACEN(k_t_general,"(I) msg_length[0] zero, injection skipped") ;
+		    desc += 1 ;
+		    count -= 1 ;
+	    }
+    ret = DMA_InjFifoInjectDescriptorsById( &dma_tcp->injFifoGroupFrames,
+					    dma_tcp->injFifoFramesIds[fifo_index],
+					    count,
+					    desc );
+    r2=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId,  0xffffffff );
+    if( ret != count)
+	    {
+		    TRACEN(k_t_error,"(!!!) count=%d ret=%d",count,ret) ;
+	    }
+
+    TRACEN(k_t_general , "(<) count=%d fifo_id=0x%02x",
+		    count,dma_tcp->injFifoFramesIds[fifo_index]);
+
+    return count ;
+  }
+
+/* Don't actually need this; the length is precise anyway, we just may waste some cells in the last packet */
+#if 0
+static inline int inject_dma_descriptor_adaptive_precise_length(dma_tcp_t *dma_tcp,
+		                           unsigned int my_injection_group,
+		                           unsigned int desired_fifo,
+		                           DMA_InjDescriptor_t *desc)
+  {
+	unsigned int size=desc->msg_length ;
+	unsigned int full_frame_count=size / k_torus_link_payload_size ;
+	unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ;
+	unsigned int trailing_frame_size = size - full_frame_size ;
+	unsigned int rc=0 ;
+	if(0 == trailing_frame_size || 0 == full_frame_count)  // These cases were already 'precise'
+		{
+		    int ret __attribute__((unused));
+		    TRACEN(k_t_general , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc);
+		    ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+							    dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+							    desc );
+		    TRACEN(k_t_general , "(<) ret=%d",ret);
+		    return 1 ;
+		}
+	else
+		{
+			 /*  Need to split into 2 injections in order not to transmit extra cells */
+			int ret __attribute__((unused));
+			desc->msg_length=full_frame_size ;
+			ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+			 			               dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+							       desc );
+			desc->msg_length=trailing_frame_size ;
+			desc->base_offset += full_frame_size ;
+			desc->hwHdr.Chunks = DMA_PacketChunks(trailing_frame_size) - 1 ;
+			ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+			 			               dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+							       desc );
+			return 2 ;
+
+
+
+		}
+
+  }
+#endif
+
+
+static void analyse_skb(struct sk_buff *skb) __attribute__ ((unused)) ;
+static void analyse_skb(struct sk_buff *skb)
+  {
+    struct sock   *sk=skb->sk ;
+    struct inet_sock *inet = inet_sk(sk);
+    struct inet_connection_sock *icsk = inet_csk(sk);
+    unsigned int daddr=inet->daddr ;
+    unsigned int flags = TCP_SKB_CB(skb)->flags ;
+    if(icsk->icsk_retransmits > 0 )
+      {
+        TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)",
+            sk, skb, skb->data, skb->len, flags,
+            daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+            icsk->icsk_retransmits, icsk->icsk_rto ) ;
+      }
+  }
+
+static inline int selfsend(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z)
+{
+	unsigned int tx=t->coordinate[0] ;
+	unsigned int ty=t->coordinate[1] ;
+	unsigned int tz=t->coordinate[2] ;
+	return (tx == x && ty == y && tz == z) ;
+}
+
+static inline int offfabric(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z)
+{
+	unsigned int tx=t->coordinate[0] ;
+	unsigned int ty=t->coordinate[1] ;
+	unsigned int tz=t->coordinate[2] ;
+	return (x >= tx || y >= ty || z >= tz) ;
+}
+static inline void clear_dir_in_use(unsigned char * direction_is_in_use)
+{
+	int x ;
+	for(x=0;x<=k_injecting_directions;x+=1)
+		{
+			direction_is_in_use[x] = 0 ;
+		}
+}
+
+static inline void record_dir_in_use(dma_tcp_t * dma_tcp,unsigned char * direction_is_in_use)
+{
+	int x ;
+	for(x=0;x<k_injecting_directions;x+=1)
+		{
+			dma_tcp->tx_in_use_count[x] += direction_is_in_use[x] ;
+		}
+	dma_tcp->tx_in_use_count[k_injecting_directions] += 1 ;
+}
+
+/*  Routine to free all the skbuffs that control data which has left the node */
+static void dma_tcp_frames_transmission_free_skb(unsigned long parm)
+  {
+    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+    unsigned int core ;
+    unsigned int total_injection_used = 0 ;
+    unsigned char direction_is_in_use[k_skb_controlling_directions] ;
+    clear_dir_in_use(direction_is_in_use) ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+    unsigned long long now=get_powerpc_tb() ;
+#endif
+    for( core=0 ; core<k_injecting_cores; core += 1)
+	    {
+		    unsigned int desired_fifo ;
+			   for(desired_fifo=0; desired_fifo<k_skb_controlling_directions; desired_fifo += 1 )
+			   {
+			       spinlock_t * injectionLock = &dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo] ;
+			       idma_direction_t * buffer = dma_tcp->idma.idma_core[core].idma_direction+desired_fifo ;
+			       unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ;
+			       unsigned int bhx = buffer->buffer_head_index ;
+			       unsigned int btx = buffer->buffer_tail_index ;  /*  This indexes the oldest skbuff that might still be pending send by the DMA unit */
+			       unsigned int  fifo_current_head =
+			        (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+			       unsigned int  fifo_current_tail =
+			        (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+			       unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+			       unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+			       unsigned int current_injection_used=packet_mod(tailx-headx) ;
+			       int skql2 = packet_mod(bhx-btx) ;
+			       if( 0 != current_injection_used ) direction_is_in_use[desired_fifo] = 1 ;
+			       if( skql2 != current_injection_used)
+				       {
+					       skb_group_t skb_group ;
+
+					       skb_group_init(&skb_group) ;
+					       if( spin_trylock(injectionLock))
+					       {
+						       unsigned int bhx = buffer->buffer_head_index ;
+						       unsigned int btx = buffer->buffer_tail_index ;  /*  This indexes the oldest skbuff that might still be pending send by the DMA unit */
+						       unsigned int  fifo_current_head =
+							(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+						       unsigned int  fifo_current_tail =
+							(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+						       unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+						       unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+						       unsigned int current_injection_used=packet_mod(tailx-headx) ;
+						       int skql2 = packet_mod(bhx-btx) ;
+					               int count_needing_freeing = skql2-current_injection_used ;
+					               int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+					               TRACEN(k_t_detail,"current_injection_used=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,skql2,count_needing_freeing,count_to_free);
+					               skb_group_queue(&skb_group,dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , core, desired_fifo, now
+#endif
+					        		       ) ;
+					               btx = packet_mod(btx+count_to_free) ;
+					               buffer->buffer_tail_index = btx ;
+					               TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+						       total_injection_used += current_injection_used ;
+
+						       spin_unlock(injectionLock) ;
+						       skb_group_free(&skb_group) ;
+					       }
+					       else
+						       {
+							       total_injection_used += current_injection_used ;
+						       }
+				       }
+			   }
+	    }
+    TRACEN(k_t_detail,"total_injection_used=%d",total_injection_used) ;
+    record_dir_in_use(dma_tcp,direction_is_in_use) ;
+    if( total_injection_used > 0 )
+	    {
+		     mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+	    }
+  }
+
+
+static void display_skb_structure(struct sk_buff *skb)
+{
+	int f ;
+	unsigned int headlen=skb_headlen(skb) ;
+	TRACEN(k_t_request, "sk_buff(head=%p data=%p tail=%p end=%p len=0x%08x data_len=0x%08x nr_frags=%d",
+			skb->head, skb->data, skb->tail, skb->end, skb->len, skb->data_len, skb_shinfo(skb)->nr_frags) ;
+	dumpmem(skb->data,(headlen > 256) ? 256 : headlen,"skb head") ;
+	for(f=0; f<skb_shinfo(skb)->nr_frags; f+=1)
+		{
+			   struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+			   unsigned int page_offset=frag->page_offset ;
+			   unsigned int size = frag->size ;
+			   TRACEN(k_t_request, " frags[%d](page_offset=0x%08x size=0x%08x)",
+					f,page_offset,size) ;
+		}
+}
+
+static inline unsigned int imin2(unsigned int a, unsigned int b)
+{
+	return (a>b) ? b : a ;
+}
+#if defined(USE_SKB_TO_SKB)
+static void bgp_dma_tcp_s_and_f_frames_dma(
+    dma_tcp_t *dma_tcp,
+    struct sk_buff *skb
+    )
+{
+	  frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+	    struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+	    unsigned int x = eth->h_dest[3] ;
+	    unsigned int y = eth->h_dest[4] ;
+	    unsigned int z = eth->h_dest[5] ;
+	unsigned int payload_address = (unsigned int)(skb->data) ;
+	unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+	unsigned int pad_head = payload_address & 0x0f ;
+	unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ;  /*  Everything to a given node will go on the same stream, no point coding injection group in */
+	unsigned int headlen = skb_headlen(skb) ;
+	TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d headlen=%d", skb,x,y,z,skb->data, skb->len, skb->data_len,headlen);
+	dumpframe(skb->data, skb_headlen(skb), "skbuff to send") ;
+
+	TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)",
+		x, y, z );
+
+	 /*  Make sure we're not trying to send off the partition or to self */
+	if( k_verify_target)
+		    {
+			    if( offfabric(&(dma_tcp->extent),x,y,z))
+				    {
+						TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ;
+						WARN_ON(1) ;
+						dev_kfree_skb(skb) ;
+						return ;
+				    }
+			    if( selfsend(&(dma_tcp->location),x,y,z))
+				    {
+						TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ;
+						WARN_ON(1) ;
+						dev_kfree_skb(skb) ;
+						return ;
+				    }
+		    }
+
+	TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d",
+		 x,y,z,skb->len) ;
+
+	 /*  copy descriptor into the inj fifo */
+	{
+		unsigned int dest_key =  x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+					      +y*dma_tcp->extent.coordinate[2]
+					      +z ;
+		unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ;
+		atomic_inc(&dma_tcp->framesProposed) ;
+	    TRACEN(k_t_general,"Saving skb=%p for dest_key=0x%08x conn_id=0x%08x",skb,dest_key,conn_id) ;
+	    set_tx_skb(&dma_tcp->tx_mux,dest_key,conn_id,skb) ;
+	    ficb->free_when_done = 0 ;
+
+#if defined(AUDIT_HEADLEN)
+	    {
+		    struct iphdr *iph = (struct iphdr *)(eth+1) ;
+		    ficb->tot_len = iph->tot_len ;
+	    }
+#endif
+	    {
+		     /*  If we have a 'scatter-gather' skb, try to put the head into the 'propose' packet */
+		    unsigned int nr_frags = skb_shinfo(skb)->nr_frags ;
+		    unsigned int propose_length = (nr_frags == 0 ) ? 48 : imin2(pad_head+headlen,k_torus_link_payload_size) ;
+		    eth->h_source[0] = propose_length ; // Use a byte on-the-side to say how much data was actually sent
+		    TRACEN(k_t_general,"nr_frags=%d propose_length=%d",nr_frags,propose_length) ;
+		  create_dma_descriptor_propose_accept(dma_tcp,
+				  (void *)aligned_payload_address,
+				  propose_length,
+				  x,y, z,
+				  dma_tcp->proto_transfer_propose,
+				  src_key,
+				  conn_id,
+				  0,
+				  &ficb->desc,
+				  propose_length
+				) ;
+	    }
+	}
+	instrument_flow(dma_tcp,k_send_propose_rpc) ;
+	  bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_cattle_class) ;
+}
+#endif
+
+static int inject_scattergather(
+		    dma_tcp_t *dma_tcp,
+		    struct sk_buff *skb,
+                    unsigned int my_injection_group,
+                    unsigned int desired_fifo
+)
+{
+	    frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+	  unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	    struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+	    unsigned int aligned_payload_length = ficb->desc.msg_length ;
+	    unsigned int x=ficb->desc.hwHdr.X ;
+	    unsigned int y=ficb->desc.hwHdr.Y ;
+	    unsigned int z=ficb->desc.hwHdr.Z ;
+	   unsigned int f ;
+	   unsigned int dest_offset=k_abbreviate_headlen ? (aligned_payload_length+eth->h_source[0]): aligned_payload_length ;
+	   unsigned int base_offset=ficb->desc.base_offset ;
+	   unsigned int rctr=ficb->desc.hwHdr.rDMA_Counter % DMA_NUM_COUNTERS_PER_GROUP ;
+	    struct iphdr *iph = (struct iphdr *)(eth+1) ;
+	    unsigned int daddr=iph->daddr ;
+
+	    DMA_InjDescriptor_t descVector[MAX_SKB_FRAGS] ;
+	    DMA_InjDescriptor_t * descPtr[1+MAX_SKB_FRAGS] ;
+	    unsigned int total_inj_length = ficb->desc.msg_length ;
+	   TRACEN(k_t_scattergather|k_t_sgdiag,"injecting, base_offset=0x%04x length=0x%04x my_injection_group=%d desired_fifo=%d dest_offset=0x%04x",
+			   base_offset,ficb->desc.msg_length,my_injection_group,desired_fifo, dest_offset) ;
+
+	   /* Prepare the initial not-fragment part */
+	   descPtr[0] = &ficb->desc ;
+	   /* scatter-gather fragments to be pushed out here */
+	   for(f=0;f<nr_frags;f+=1)
+		   {
+			   struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+			   struct page *page = frag->page ;
+			   unsigned int page_offset=frag->page_offset ;
+			   unsigned int size = frag->size ;
+			   dma_addr_t buffAddr = dma_map_page(NULL, page, page_offset, size, DMA_TO_DEVICE);
+			   TRACEN(k_t_scattergather|k_t_sgdiag,"f=%d page=%p page_offset=0x%04x size=0x%04x buffAddr=0x%08llx dest_offset=0x%04x",
+					   f,page,page_offset,size,buffAddr,dest_offset) ;
+			   total_inj_length += size ;
+			   if( 0 != size)
+				   {
+					   create_dma_descriptor_direct_put_offset(dma_tcp,x,y,z,k_injCounterId,rctr,buffAddr,size,descVector+f,dest_offset) ;
+				   }
+			   else
+				   {
+					   TRACEN(k_t_request,"(I) frag length zero") ;
+					   DMA_ZeroOutDescriptor(descVector+f) ;
+					   instrument_flow(dma_tcp,k_fraglength_zero) ;
+				   }
+			   descPtr[1+f]=descVector+f ;
+			   dest_offset += size ;
+
+		   }
+	   TRACEN(k_t_sgdiag,"Injecting tgt=[%d,%d,%d] length=0x%04x ctr=0x%02x",x,y,z,total_inj_length,rctr) ;
+
+
+	   TRACEN(k_t_scattergather ,"tgt=[%d %d %d] daddr=%d.%d.%d.%d tot_len=0x%04x, length=0x%04x headlen=0x%04x data_len=0x%04x dest_offset=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x] counter=0x%02x injfifo[%d %02x]\n",
+				    x,y,z,
+				    daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff,iph->tot_len,
+				    skb->len,skb_headlen(skb), skb->data_len, dest_offset,
+				    nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size,rctr,my_injection_group,desired_fifo ) ;
+	    if( skb_headlen(skb) < sizeof(struct ethhdr)+sizeof(struct iphdr))
+		    {
+			    TRACEN(k_t_request,"(!!!) length=0x%04x data_len=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size) ;
+			    display_skb_structure(skb) ;
+		    }
+	    return inject_dma_descriptors_adaptive(dma_tcp,my_injection_group,desired_fifo,descPtr,1+nr_frags) ;
+
+}
+/*  Send-and-free a frame with an already-prepared injection descriptor (which might be DMA-put or FIFO-put) */
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+    dma_tcp_t *dma_tcp,
+    struct sk_buff *skb,
+    unsigned int queue_at_head,
+    unsigned int transport_class
+    )
+  {
+	  unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	  unsigned int is_scattergather = (nr_frags > 0 ) ;
+	    unsigned int payload_length = (skb -> len) - (skb->data_len) ;
+	    unsigned int payload_address = (unsigned int)(skb->data) ;
+	    unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+	    unsigned int pad_head = payload_address & 0x0f ;
+	    unsigned int aligned_payload_length = payload_length + pad_head ;
+	#if 1
+	    unsigned int use_taxi = 0 ;
+	#else
+	    unsigned int use_taxi = (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags);
+	#endif
+	    unsigned long flags ;
+	    unsigned int current_injection_used=0xffffffff ;
+
+	    int ret = 0;
+	    int ring_ok ;
+
+	    int my_injection_group ;
+	    skb_group_t skb_group ;
+	    frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+	    unsigned int x=ficb->desc.hwHdr.X ;
+	    unsigned int y=ficb->desc.hwHdr.Y ;
+	    unsigned int z=ficb->desc.hwHdr.Z ;
+	    unsigned int header_dma_length=ficb->desc.msg_length ; // If this is zero, then we can free the skb as soon as its 'frags' are in software injection fifo
+    TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d nr_frags=%d", skb,x,y,z,skb->data, skb->len, skb->data_len, nr_frags);
+    if(is_scattergather ) instrument_flow(dma_tcp,k_scattergather) ;
+
+    skb_group_init(&skb_group) ;
+
+    TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)",
+            x, y, z );
+
+/*  Make sure we're not trying to send off the partition or to self */
+    if( k_verify_target)
+	    {
+		    if( offfabric(&(dma_tcp->extent),x,y,z))
+			    {
+					TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ;
+					WARN_ON(1) ;
+					dev_kfree_skb(skb) ;
+					return -EINVAL;
+			    }
+		    if( selfsend(&(dma_tcp->location),x,y,z))
+			    {
+					TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ;
+					WARN_ON(1) ;
+					dev_kfree_skb(skb) ;
+					return -EINVAL;
+			    }
+	    }
+    TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d",
+             x,y,z,skb->len) ;
+
+     /*  copy descriptor into the inj fifo */
+    {
+    unsigned int desired_fifo=((transport_class != k_cattle_class) && (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags)) ? (k_skb_controlling_directions-1) : select_transmission_fifo(dma_tcp,x,y,z) ;
+    my_injection_group=injection_group_hash(dma_tcp,x,y,z) ;
+    spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+     {
+       unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ;  /*  Everything to a given node will go on the same stream, no point coding injection group in */
+        /*  Work out which buffer we are going to use for the packet stream */
+       idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ;
+        /*  Set up the payload */
+       unsigned int bhx = buffer->buffer_head_index ;
+       unsigned int lastx = packet_mod(bhx) ;
+       unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ;
+       unsigned int  fifo_current_head =
+        (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+       unsigned int  fifo_current_tail =
+        (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+       unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+       unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+       unsigned int injection_count ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+       unsigned long long now=get_powerpc_tb() ;
+       *(unsigned long long*)(skb->cb) = now ;
+#endif
+       current_injection_used=packet_mod(tailx-headx) ;
+        /*  If the network is backing up, we may have to skip out here, */
+        /*  so that we don't overwrite unsent data. */
+       TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d",
+           desired_fifo,headx,tailx,bhx,current_injection_used) ;
+       if( current_injection_used > buffer->injection_high_watermark )
+         {
+           buffer->injection_high_watermark=current_injection_used ;  /*  Congestion statistic */
+         }
+         {
+        	  /*  Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */
+           if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1)
+             {
+                ring_ok = 1 ;
+                TRACEN(k_t_general,"Runway slot granted") ;
+             }
+           else
+             {
+                ring_ok = 0 ;
+                TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ;
+             }
+         }
+       TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x nr_frags=%d",
+           my_injection_group, desired_fifo, bhx, headx,tailx,nr_frags
+           ) ;
+       if ( ring_ok )
+         {
+            /*  We are going to send something. */
+
+            /*  Bump the injection counter. Actually only needs doing once per 4GB or so */
+           ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId,  0xffffffff );
+
+	    /*  and inject it */
+	   if(use_taxi)
+		   {
+		           injection_count = inject_into_dma_taxi(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo,
+		        		   dma_tcp->proto_issue_frames_single,src_key) ;
+		   }
+	   else
+		   {
+			   if( is_scattergather && 0 != ficb->free_when_done)
+				   {
+					   injection_count = inject_scattergather(
+							    dma_tcp,skb,my_injection_group,desired_fifo
+							   ) ;
+				   }
+			   else
+				   {
+					    /*  Prop, or accept, or unfragmented skbuff */
+					   injection_count = inject_dma_descriptor_adaptive(dma_tcp,my_injection_group,desired_fifo,
+					   &ficb->desc
+					   ) ;
+				   }
+
+		   }
+	   {
+		   unsigned int nhx=packet_mod(bhx+injection_count) ;
+		   /*  Remember where we will be pushing the next injection in */
+		   TRACEN(k_t_detail,"Next injection will be at nhx=0x%08x",nhx) ;
+		   buffer->buffer_head_index = nhx ;
+		    /*  Record the skbuff so it can be freed later, after data is DMA'd out */
+		   if( ficb->free_when_done && header_dma_length > 0 )
+			   {
+				   TRACEN(k_t_detail,"Saving skb=%p at [%p] for freeing later",skb,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array+nhx) ;
+				   dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = skb ;
+			   }
+	   }
+            /*  hang on to the skbs until they are sent ... */
+           if( current_injection_used != 0xffffffff)
+             {
+               unsigned int btx = buffer->buffer_tail_index ;  /*  This indexes the oldest skbuff that might still be pending send by the DMA unit */
+               int skql2 = packet_mod(bhx-btx) ;
+               int count_needing_freeing = skql2-current_injection_used ;
+               int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+               TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free);
+               skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+					        		       , my_injection_group, desired_fifo, now
+#endif
+               ) ;
+               btx = packet_mod(btx+count_to_free) ;
+               buffer->buffer_tail_index = btx ;
+               TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+             }
+         }
+       else
+         {
+           TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x",
+               my_injection_group, desired_fifo, bhx, headx,tailx, lastx
+               ) ;
+         }
+     }
+     spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+     skb_group_free(&skb_group) ;
+     if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+   if( 0 == ring_ok )
+     {
+       TRACEN(k_t_congestion,"(=)Queuing skb=%p desired_fifo=%d (%u %u %u)", skb,desired_fifo,x,y,z) ;
+       if( queue_at_head)
+         {
+           skb_queue_head(dma_tcp->inj_queue+desired_fifo, skb) ;
+         }
+       else
+         {
+           skb_queue_tail(dma_tcp->inj_queue+desired_fifo, skb) ;
+         }
+     }
+   else
+	   {
+		   if( 0 == header_dma_length)
+			   {
+				   TRACEN(k_t_general,"Freeing skb=%p, its header has left the node",skb) ;
+				   dev_kfree_skb(skb) ;
+			   }
+	   }
+
+
+
+   TRACEN(k_t_general ,"(<) ring_ok=%d desired_fifo=%d",ring_ok,desired_fifo);
+
+   return  ring_ok ? desired_fifo : -1 ;
+    }
+
+  }
+
+/*  ... return 'direction' if we sent the packet, '-1' if we queued it */
+static int bgp_dma_tcp_s_and_f_frames(
+    dma_tcp_t *dma_tcp,
+    struct sk_buff *skb,
+    unsigned int queue_at_head
+    )
+{
+#if defined(USE_ADAPTIVE_ROUTING)
+  struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+  unsigned int x = eth->h_dest[3] ;
+  unsigned int y = eth->h_dest[4] ;
+  unsigned int z = eth->h_dest[5] ;
+  unsigned int payload_length = (skb -> len) - (skb->data_len) ;
+  unsigned int payload_address = (unsigned int)(skb->data) ;
+  unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+  unsigned int pad_head = payload_address & 0x0f ;
+  unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ;  /*  Everything to a given node will go on the same stream, no point coding injection group in */
+  unsigned int aligned_payload_length = payload_length + pad_head ;
+  frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+
+   unsigned int dest_key =  x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+			      +y*dma_tcp->extent.coordinate[2]
+			      +z ;
+   unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ;
+   instrument_flow(dma_tcp,k_send_eager) ;
+   ficb->free_when_done = 1 ;
+
+  if(TRACING(k_t_sgdiag))
+	  {
+		  diag_skb_structure(skb) ;
+	  }
+   create_dma_descriptor_adaptive(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,
+		   dma_tcp->proto_issue_frames_adaptive,src_key,conn_id, &ficb->desc
+   ) ;
+
+#endif
+   if( k_verify_ctlen)
+           {
+                   unsigned int ctlen = counted_length(skb) ;
+                   struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+                   struct iphdr *iph = (struct iphdr *)(eth+1) ;
+                   if( ctlen != iph->tot_len + sizeof(struct ethhdr))
+                           {
+                                  TRACEN(k_t_error,"(E) Counted length mismatch, skb=%p, conuted_length=0x%04x, tot_len=0x%04x",skb,ctlen,iph->tot_len ) ;
+                                  display_skb_structure(skb) ;
+                                  display_iphdr(iph) ;
+                                  dev_kfree_skb(skb) ; // It would cause trouble later, to try and send it. So drop it.
+                                  instrument_flow(dma_tcp,k_counted_length_mismatch) ;
+                                  return 0 ; // Not really 'direction 0', but this will not cause the caller a problem.
+                           }
+           }
+
+	return  bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb,queue_at_head, 0) ;
+}
+
+/*  Try to clear a pending skbuff queue into the mem-fifo */
+/*  return 0 if queue cleared */
+/*        -1 if the queue cannot be cleared because the FIFO gets full */
+static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction) noinline ;
+static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction)
+  {
+    struct sk_buff_head *skq = dma_tcp->inj_queue+direction ;
+    TRACEN(k_t_general,"(>) direction=%u",direction );
+     if( ! skb_queue_empty(skq))
+       {
+          /*  We sent something, and there is a pending list which we might be able to send as well */
+         for(;;)
+           {
+             struct sk_buff * askb = skb_dequeue(skq) ;
+             if( askb)
+               {
+                  TRACEN(k_t_congestion,"(=)Dequeuing dir=%d askb=%p length=%u", direction, askb,askb->len) ;
+                    {
+                      int arc= bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,askb,1,k_cattle_class) ;
+                      if( -1 == arc)
+                        {
+                          TRACEN(k_t_congestion,"still-congested dir=%d",direction );
+                          TRACEN(k_t_general,"(<) still-congested" );
+                          instrument_flow(dma_tcp,k_queue_filled_propose_fifo) ;
+                          return -1 ;  /*  Queue not cleared */
+                        }
+                    }
+               }
+             else
+               {
+                 TRACEN(k_t_congestion,"(=)Dequeuing askb=NULL") ;
+                 break ;
+               }
+
+           }
+
+       }
+
+     TRACEN(k_t_general,"(<) clear" );
+     return 0 ;  /*  Queue cleared */
+  }
+
+static void dma_tcp_frames_runway_check(unsigned long parm)
+  {
+    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+    int direction ;
+    int anything_queued = 0 ;
+    TRACEN(k_t_congestion,"(>)");
+    for(direction=0;direction<k_injecting_directions;direction+=1)
+      {
+        anything_queued += bgp_dma_tcp_try_to_clear_queue(dma_tcp,direction) ;
+      }
+    if( anything_queued)
+      {
+        mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ;  /*  Redrive on the next timer tick */
+      }
+    TRACEN(k_t_congestion,"(<) anything_queued=%d",anything_queued);
+  }
+
+/*  Take an skbuff bound for (x,y,z), and either put it in the software FIFO or queue it for when congestion abates */
+int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb  )
+{
+  TRACEN(k_t_general,"(>)skb=%p data=%p length=%d", skb,skb->data, skb->len) ;
+  {
+    dma_tcp_t *dma_tcp = &dma_tcp_state ;
+    dma_tcp->tx_by_core[smp_processor_id() & 3] += 1 ;  /*  Stats on which core(s) are busy */
+#if defined(CONFIG_BGP_STATISTICS)
+    {
+	struct ethhdr *eth = (struct ethhdr *) (skb->data) ;
+	struct iphdr *iph=(struct iphdr *) (eth+1) ;
+	dma_tcp->bytes_sent += iph->tot_len ;
+    }
+#endif
+
+    if( 0 == skb_headlen(skb))
+	    {
+		    TRACEN(k_t_request,"(I) head length zero") ;
+	    }
+
+#if defined(USE_SKB_TO_SKB)
+    if( skb->len > dma_tcp->eager_limit  || 0 != skb_shinfo(skb)->nr_frags )
+	    {
+		    bgp_dma_tcp_s_and_f_frames_dma(dma_tcp,skb) ;
+	    }
+    else
+#endif
+	    {
+		    int rc = bgp_dma_tcp_s_and_f_frames(dma_tcp,skb,
+		 /* 		    x,y,z, */
+				    0) ;
+		    if( rc == -1)
+		      {
+			mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ;  /*  Redrive on the next timer tick */
+		      }
+	    }
+  }
+  TRACEN(k_t_general,"(<)");
+  return 0 ;
+}
+
+#if defined(ENABLE_LATENCY_TRACKING)
+
+static unsigned int isqrt(unsigned int x)
+  {
+    unsigned int rc=0 ;
+    unsigned int i ;
+    for( i=0;i<16;i+=1)
+      {
+        unsigned int c= rc | (0x8000 >> i) ;
+        if( c*c <= x ) rc = c ;
+      }
+    return rc ;
+  }
+#endif
+
+#if defined(TRACK_SEQUENCE)
+static void dma_tcp_frames_show_sequence(dma_tcp_t *dma_tcp)
+  {
+    unsigned int x ;
+    unsigned int y ;
+    unsigned int z ;
+    unsigned int core ;
+    unsigned int xsize = dma_tcp->extent.coordinate[0] ;
+    unsigned int ysize = dma_tcp->extent.coordinate[1] ;
+    unsigned int zsize = dma_tcp->extent.coordinate[2] ;
+    unsigned int myx = dma_tcp->location.coordinate[0] ;
+    unsigned int myy = dma_tcp->location.coordinate[1] ;
+    unsigned int myz = dma_tcp->location.coordinate[2] ;
+    for(x=0;x<xsize; x+=1 )
+      {
+        for( y = 0; y<ysize; y+=1)
+          {
+            for( z = 0 ; z<zsize; z+=1 )
+              {
+                unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ;
+                for( core=0; core<k_injecting_cores; core+=1)
+                  {
+                    unsigned int slot = (slot_base << 2) | core ;
+                    unsigned int txcount = send_sequences[slot] ;
+                    unsigned int rxcount = receive_sequences[slot] ;
+                    if( txcount || rxcount)
+                      {
+                        TRACEN(k_t_request,"( %d %d %d ) show_sequence( %d %d %d %d )=( %d %d )", myx, myy, myz, x,y,z,core, txcount,rxcount) ;
+                      }
+                  }
+              }
+          }
+      }
+  }
+#endif
+
+#if defined(ENABLE_PROGRESS_TRACKING)
+static void dma_tcp_frames_show_progress(dma_tcp_t *dma_tcp)
+  {
+    unsigned int x ;
+    unsigned int y ;
+    unsigned int z ;
+    unsigned int core ;
+    unsigned int xsize = dma_tcp->extent.coordinate[0] ;
+    unsigned int ysize = dma_tcp->extent.coordinate[1] ;
+    unsigned int zsize = dma_tcp->extent.coordinate[2] ;
+    unsigned int myx = dma_tcp->location.coordinate[0] ;
+    unsigned int myy = dma_tcp->location.coordinate[1] ;
+    unsigned int myz = dma_tcp->location.coordinate[2] ;
+    unsigned long long now=get_powerpc_tb() ;
+    TRACEN(k_t_entryexit,">") ;
+    for(x=0;x<xsize; x+=1 )
+      {
+        for( y = 0; y<ysize; y+=1)
+          {
+            for( z = 0 ; z<zsize; z+=1 )
+              {
+                unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ;
+                for( core=0; core<k_injecting_cores; core+=1)
+                  {
+                    unsigned int slot = (slot_base << 2) | core ;
+                    if( get_rcv_skb(&dma_tcp->rcvdemux,slot))
+                      {
+                        unsigned long long timestamp=get_timestamp(&dma_tcp->rcvdemux,slot) ;
+                        unsigned long long age=now-timestamp ;
+                        TRACEN(k_t_request,"( %d %d %d ) age( %d %d %d %d )= 0x%08x%08x !!!", myx, myy, myz, x,y,z,core,(unsigned int)(age>>32),(unsigned int)age) ;
+                      }
+                  }
+              }
+          }
+      }
+    TRACEN(k_t_entryexit,"<") ;
+  }
+#endif
+
+static void balancer_init(bgp_dma_balancer *balancer)
+{
+	int x;
+	for(x=0;x<k_pending_rcv_skb_classes;x+=1)
+		{
+			TRACEN(k_t_general,"balancer init[%d]",x) ;
+			skb_queue_head_init(&balancer->b[x].pending_rcv_skbs) ;
+			balancer->b[x].outstanding_counters=0 ;
+		}
+}
+
+/*
+ * We set up 32 software injection FIFOs. We arrange them in 4 groups of 8; the group number is chosen as a function of the
+ * destination node, For the group of 8, we use 6 FIFOs to control 'bulk data' nominally one for each outbound link (though
+ * adaptive routing may take a packet out of a different link when the time comes); 1 FIFO to control single-packet frames
+ * which are sent high-priority because they may be 'ack' frames which will enable more data to flow from a far-end node; and
+ * 1 FIFO to control 'accept' packets which are sent high-priority because a scarce local resource (a reception counter) has been
+ * allocated to the transfer and we would like it underway as soon as possible.
+ */
+
+void dma_tcp_frames_init(dma_tcp_t *dma_tcp)
+  {
+	  TRACEN(k_t_general,"sizeof(frame_injection_cb)=%d sizeof(DMA_PacketHeader_t)=%d sizeof(DMA_InjDescriptor_t)=%d",sizeof(frame_injection_cb),sizeof(DMA_PacketHeader_t),sizeof(DMA_InjDescriptor_t)) ;
+
+	  if( k_async_free ) setup_timer(&dma_tcp->transmission_free_skb_timer,dma_tcp_frames_transmission_free_skb,0) ;
+    setup_timer(&dma_tcp->runway_check_timer,dma_tcp_frames_runway_check,0) ;
+    dma_tcp->rcv_checked_time = jiffies ;
+    dma_tcp->packets_received_count = 0 ;
+    allocate_idma(&dma_tcp->idma) ;  /*  Buffering for packets-style injection DMA */
+    allocate_rcv(&dma_tcp->rcvdemux,dma_tcp->node_count) ;  /*  Demultiplexing for packets-style reception */
+#if defined(USE_ADAPTIVE_ROUTING)
+    allocate_tx(&dma_tcp->tx_mux,dma_tcp->node_count) ;  /*  Multiplexing for adaptive-routing transmit */
+#endif
+#if defined(TRACK_SEQUENCE)
+    track_sequence_init(dma_tcp->node_count) ;
+#endif
+    init_demux_table(dma_tcp, dma_tcp->node_count) ;
+     /*  Allocate injection FIFOs for 'packets' style access */
+    {
+      int core ;
+      int direction ;
+      for( core=0; core< k_injecting_cores; core += 1  )
+        {
+          for( direction=0; direction< k_injecting_directions; direction += 1  )
+            {
+              dma_tcp->injFifoFramesPri[ core*k_injecting_directions+direction ] = 0 ;
+              dma_tcp->injFifoFramesLoc[ core*k_injecting_directions+direction ] = 0 ;
+              dma_tcp->injFifoFramesIds[ core*k_injecting_directions+direction ] = core*k_injecting_directions+direction ;
+            }
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+0 ] = 0x80;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+1 ] = 0x40;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+2 ] = 0x20;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+3 ] = 0x08;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+4 ] = 0x04;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+5 ] = 0x02;  /*  Set deterministic injection FIFO per direction */
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0x11;  /*  Set 'high priority' FIFO for taxi channel */
+          dma_tcp->injFifoFramesPri[ core*k_injecting_directions+k_injecting_directions-1 ] = 1 ; // 'high priority' for taxi channel
+/*           dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0xee; // Set any FIFO for taxi channel */
+#if defined(USE_SKB_TO_SKB)
+          dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0x11;  /*  Set 'high priority' FIFO for propose/accept channel */
+/*           dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0xee; //  propose/accept channel can go in any fifo, but regular pri */
+          dma_tcp->injFifoFramesPri[ core*k_injecting_directions+7 ] = 1 ; // 'high priority' for propose/accept channel
+#endif
+        }
+    }
+     /*  register receive functions for the memfifo packets */
+    dma_tcp->proto_issue_frames_single=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataSingleActor, dma_tcp, 0, 0);
+#if defined(USE_ADAPTIVE_ROUTING)
+    dma_tcp->proto_issue_frames_adaptive=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataAdaptiveActor, dma_tcp, 0, 0);
+#endif
+
+#if defined(USE_SKB_TO_SKB)
+    dma_tcp->proto_transfer_propose=DMA_RecFifoRegisterRecvFunction(issuePropActor, dma_tcp, 0, 0);
+    /* If we want to start up with everything flowing through the reception FIFO , do this by setting the 'eager limit' longer than the largest IP frame */
+    dma_tcp->eager_limit = k_force_eager_flow ? 10000000 : 1024 ;  /*  Frames smaller than this get sent through the FIFO rather than the DMA (set it above 65536 to run everything through receive FIFO) */
+    balancer_init(&dma_tcp->balancer) ;
+#endif
+    dma_tcp_diagnose_init(dma_tcp) ;
+    TRACEN(k_t_general,"(=)DMA_RecFifoRegisterRecvFunction proto_issue_frames_single=%d",
+		    dma_tcp->proto_issue_frames_single);
+  }
+
+void dma_tcp_frames_ifup(dma_tcp_t *dma_tcp)
+  {
+      {
+        int ret = DMA_InjFifoGroupAllocate( dma_tcp_InjectionFifoGroup(dma_tcp),
+            k_injecting_cores*k_injecting_directions,   /*  num inj fifos */
+                                    dma_tcp->injFifoFramesIds,
+                                    dma_tcp->injFifoFramesPri,
+                                    dma_tcp->injFifoFramesLoc,
+                                    dma_tcp->injFifoFramesMap,
+                                    NULL,
+                                    NULL,
+                                    NULL,
+                                    NULL,
+                                    NULL,
+                                    & dma_tcp->injFifoGroupFrames );
+
+        TRACEN(k_t_init,"(=)DMA_InjFifoGroupAllocate rc=%d", ret );
+      }
+
+    {
+      int core ;
+      int direction ;
+      for( core=0; core< k_injecting_cores; core += 1  )
+        {
+          for( direction=0; direction< k_injecting_directions; direction += 1  )
+            {
+              int ret = DMA_InjFifoInitById( &dma_tcp->injFifoGroupFrames,
+                  dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction],
+                  dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo,
+                  dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo,   /*  head */
+                  dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo+1   /*  end */
+                                 );
+              dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head =
+                (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+              TRACEN(k_t_general,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head);
+            }
+        }
+    }
+
+  }
+
+void dma_tcp_frames_ifdown(dma_tcp_t *dma_tcp)
+  {
+    int ret = DMA_InjFifoGroupFree( dma_tcp_InjectionFifoGroup(dma_tcp),
+        k_injecting_cores*k_injecting_directions,   /*  num inj fifos */
+                                dma_tcp->injFifoFramesIds,
+                                & dma_tcp->injFifoGroupFrames );
+
+    TRACEN(k_t_init,"(=) DMA_InjFifoGroupFree rc=%d", ret );
+
+  }
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_quads.h b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h
new file mode 100644
index 0000000..279999d
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h
@@ -0,0 +1,394 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Quadword ops for copying data, in particular torus-packet-sized
+ *              (240 byte) sequences. Not currently used, but provided for
+ *              reference.
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_DMA_TCP_QUADS_H__
+#define __BGP_DMA_TCP_QUADS_H__
+
+/*  TODO: take away the use of FP regs, now that software FIFO frames are 'rare', so we can avoid FP-in-kernel */
+/*  Drop 240 bytes of payload from regs into 'software FIFO' */
+static inline void torus_frame_payload_store(
+    void * payloadptr)
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    torus_frame_payload *payload=payloadptr ;
+
+    TRACEN(k_t_detail, "torus_payload_store payload=%p",payload) ;
+           asm  (
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "stfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "stfpdx  2,%[index1],%[payload]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "stfpdx  3,%[index2],%[payload]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "stfpdx  4,%[index1],%[payload]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "stfpdx  5,%[index2],%[payload]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "stfpdx  6,%[index1],%[payload]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "stfpdx  7,%[index2],%[payload]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "stfpdx  8,%[index1],%[payload]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "stfpdx  9,%[index2],%[payload]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "stfpdx  10,%[index1],%[payload]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "stfpdx  11,%[index2],%[payload]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "stfpdx  12,%[index1],%[payload]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "stfpdx  13,%[index2],%[payload]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "stfpdx  14,%[index1],%[payload]       \n\t"  /* F4=Q14 load */
+               "stfpdx  15,%[index2],%[payload]       \n\t"  /* F3=Q15load */
+                     :          /* outputs */
+                       "=m" (*payload),
+                       [index1] "=&b" (index1),
+                       [index2] "=&b" (index2)
+                     :            /* Inputs */
+                       [payload] "b" (payload)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14", "fr15"
+                       );
+  }
+
+/*  Load 240 bytes of payload from memory into regs */
+static inline void torus_frame_payload_load(
+    void * payloadptr)
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+    torus_frame_payload *payload=payloadptr ;
+
+    TRACEN(k_t_detail, "torus_payload_load payload=%p",payload) ;
+           asm  (
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "lfpdx  1,0,%[payload]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "lfpdx  2,%[index1],%[payload]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "lfpdx  3,%[index2],%[payload]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "lfpdx  4,%[index1],%[payload]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "lfpdx  5,%[index2],%[payload]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "lfpdx  6,%[index1],%[payload]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "lfpdx  7,%[index2],%[payload]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "lfpdx  8,%[index1],%[payload]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "lfpdx  9,%[index2],%[payload]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "lfpdx  10,%[index1],%[payload]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "lfpdx  11,%[index2],%[payload]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "lfpdx  12,%[index1],%[payload]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "lfpdx  13,%[index2],%[payload]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "lfpdx  14,%[index1],%[payload]       \n\t"  /* F4=Q14 load */
+               "lfpdx  15,%[index2],%[payload]       \n\t"  /* F3=Q15 load */
+                     :          /* outputs */
+                       "=m" (*payload),
+                       [index1] "=&b" (index1),
+                       [index2] "=&b" (index2)
+                     :            /* Inputs */
+                       [payload] "b" (payload)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14", "fr15"
+                       );
+  }
+
+static inline int torus_frame_payload_memcpy_base(
+		torus_frame_payload * target,
+		torus_frame_payload * source
+    )
+  {
+    unsigned int index1 ;
+    unsigned int index2 ;
+
+    TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+           asm  (
+               "li      %[index1],16                    \n\t"  /* Indexing values */
+               "lfpdx  1,0,%[source]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+               "li      %[index2],32                    \n\t"  /* Indexing values */
+               "lfpdx  2,%[index1],%[source]       \n\t"  /* F2=Q2 load */
+               "li      %[index1],48                    \n\t"  /* Indexing values */
+               "lfpdx  3,%[index2],%[source]       \n\t"  /* F3=Q3 load */
+               "li      %[index2],64                    \n\t"  /* Indexing values */
+               "lfpdx  4,%[index1],%[source]       \n\t"  /* F4=Q4 load */
+               "li      %[index1],80                    \n\t"  /* Indexing values */
+               "lfpdx  5,%[index2],%[source]       \n\t"  /* F5=Q5 load */
+               "li      %[index2],96                    \n\t"  /* Indexing values */
+               "lfpdx  6,%[index1],%[source]       \n\t"  /* F6=Q6 load */
+               "li      %[index1],112                   \n\t"  /* Indexing values */
+               "lfpdx  7,%[index2],%[source]       \n\t"  /* F7=Q7 load */
+               "li      %[index2],128                    \n\t"  /* Indexing values */
+               "lfpdx  8,%[index1],%[source]       \n\t"  /* F8=Q8 load */
+               "li      %[index1],144                    \n\t"  /* Indexing values */
+               "lfpdx  9,%[index2],%[source]       \n\t"  /* F9=Q9 load */
+               "li      %[index2],160                    \n\t"  /* Indexing values */
+               "lfpdx  10,%[index1],%[source]       \n\t"  /* F0=Q10 load */
+               "li      %[index1],176                   \n\t"  /* Indexing values */
+               "lfpdx  11,%[index2],%[source]       \n\t"  /* F1=Q11 load */
+               "li      %[index2],192                    \n\t"  /* Indexing values */
+               "lfpdx  12,%[index1],%[source]       \n\t"  /* F2=Q12 load */
+               "li      %[index1],208                   \n\t"  /* Indexing values */
+               "lfpdx  13,%[index2],%[source]       \n\t"  /* F3=Q13 load */
+               "li      %[index2],224                    \n\t"  /* Indexing values */
+               "lfpdx  14,%[index1],%[source]       \n\t"  /* F4=Q14 load */
+               "lfpdx  15,%[index2],%[source]       \n\t"  /* F3=Q15 load */
+    	               "li      %[index1],16                    \n\t"  /* Indexing values */
+    	               "stfpdx  1,0,%[target]       \n\t"  /* F1=Q1 load from (%[remaining_quads]) */
+    	               "li      %[index2],32                    \n\t"  /* Indexing values */
+    	               "stfpdx  2,%[index1],%[target]       \n\t"  /* F2=Q2 load */
+    	               "li      %[index1],48                    \n\t"  /* Indexing values */
+    	               "stfpdx  3,%[index2],%[target]       \n\t"  /* F3=Q3 load */
+    	               "li      %[index2],64                    \n\t"  /* Indexing values */
+    	               "stfpdx  4,%[index1],%[target]       \n\t"  /* F4=Q4 load */
+    	               "li      %[index1],80                    \n\t"  /* Indexing values */
+    	               "stfpdx  5,%[index2],%[target]       \n\t"  /* F5=Q5 load */
+    	               "li      %[index2],96                    \n\t"  /* Indexing values */
+    	               "stfpdx  6,%[index1],%[target]       \n\t"  /* F6=Q6 load */
+    	               "li      %[index1],112                   \n\t"  /* Indexing values */
+    	               "stfpdx  7,%[index2],%[target]       \n\t"  /* F7=Q7 load */
+    	               "li      %[index2],128                    \n\t"  /* Indexing values */
+    	               "stfpdx  8,%[index1],%[target]       \n\t"  /* F8=Q8 load */
+    	               "li      %[index1],144                    \n\t"  /* Indexing values */
+    	               "stfpdx  9,%[index2],%[target]       \n\t"  /* F9=Q9 load */
+    	               "li      %[index2],160                    \n\t"  /* Indexing values */
+    	               "stfpdx  10,%[index1],%[target]       \n\t"  /* F0=Q10 load */
+    	               "li      %[index1],176                   \n\t"  /* Indexing values */
+    	               "stfpdx  11,%[index2],%[target]       \n\t"  /* F1=Q11 load */
+    	               "li      %[index2],192                    \n\t"  /* Indexing values */
+    	               "stfpdx  12,%[index1],%[target]       \n\t"  /* F2=Q12 load */
+    	               "li      %[index1],208                   \n\t"  /* Indexing values */
+    	               "stfpdx  13,%[index2],%[target]       \n\t"  /* F3=Q13 load */
+    	               "li      %[index2],224                    \n\t"  /* Indexing values */
+    	               "stfpdx  14,%[index1],%[target]       \n\t"  /* F4=Q14 load */
+    	               "stfpdx  15,%[index2],%[target]       \n\t"  /* F3=Q15load */
+                     :          /* outputs */
+                       "=m" (*target),
+                       [index1] "=&b" (index1),
+                       [index2] "=&b" (index2)
+                     :            /* Inputs */
+                       [source] "b" (source),         /* inputs */
+                       [target] "b" (target)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14", "fr15"
+                       );
+           return 0 ;
+  }
+#define loadreg(Reg,Name,Offset) \
+	"li %[index]," #Offset " \n\t" \
+	"lfpdx " #Reg ",%[index],%[" #Name "] \n\t"
+
+#define savereg(Reg,Name,Offset) \
+	"li %[index]," #Offset " \n\t" \
+	"stfpdx " #Reg ",%[index],%[" #Name "] \n\t"
+
+
+static inline int torus_frame_payload_memcpy(
+		torus_frame_payload * target,
+		torus_frame_payload * source
+    )
+  {
+    unsigned int index ;
+
+    TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+           asm  (
+			loadreg(0,source,0x00)
+			loadreg(1,source,0x10)
+			loadreg(2,source,0x20)
+			loadreg(3,source,0x30)
+			loadreg(4,source,0x40)
+			loadreg(5,source,0x50)
+			loadreg(6,source,0x60)
+			loadreg(7,source,0x70)
+			loadreg(8,source,0x80)
+			loadreg(9,source,0x90)
+			loadreg(10,source,0xa0)
+			loadreg(11,source,0xb0)
+			loadreg(12,source,0xc0)
+			loadreg(13,source,0xd0)
+			loadreg(14,source,0xe0)
+			savereg(0,target,0x00)
+			savereg(1,target,0x10)
+			savereg(2,target,0x20)
+			savereg(3,target,0x30)
+			savereg(4,target,0x40)
+			savereg(5,target,0x50)
+			savereg(6,target,0x60)
+			savereg(7,target,0x70)
+			savereg(8,target,0x80)
+			savereg(9,target,0x90)
+			savereg(10,target,0xa0)
+			savereg(11,target,0xb0)
+			savereg(12,target,0xc0)
+			loadreg(0,source,0xf0)             /*  Speculate that we will need this soon */
+			savereg(13,target,0xd0)
+			loadreg(1,source,0x110)            /*  Speculate that we will need this soon */
+			savereg(14,target,0xe0)
+			loadreg(2,source,0x130)            /*  Speculate that we will need this soon */
+
+                     :          /* outputs */
+                       "=m" (*target),
+                       [index] "=&b" (index)
+                     :            /* Inputs */
+                       [source] "b" (source),         /* inputs */
+                       [target] "b" (target)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14"
+                       );
+           return 0 ;
+  }
+
+static inline int torus_frame_payload_memcpy_try1(
+		torus_frame_payload * target,
+		torus_frame_payload * source
+    )
+  {
+    unsigned int index ;
+
+    TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+           asm  (
+				loadreg(0,source,0x00)
+				loadreg(2,source,0x20)
+				loadreg(4,source,0x40)
+				loadreg(1,source,0x10)
+				savereg(0,target,0x00)
+				loadreg(6,source,0x60)
+				savereg(2,target,0x20)
+				loadreg(3,source,0x30)
+				savereg(4,target,0x40)
+				loadreg(8,source,0x80)
+				savereg(1,target,0x10)
+				loadreg(5,source,0x50)
+				savereg(6,target,0x60)
+				loadreg(10,source,0xa0)
+				savereg(3,target,0x30)
+				loadreg(7,source,0x70)
+				savereg(8,target,0x80)
+				loadreg(12,source,0xc0)
+				savereg(5,target,0x50)
+				loadreg(9,source,0x90)
+				savereg(10,target,0xa0)
+				loadreg(14,source,0xe0)
+				savereg(7,target,0x70)
+				loadreg(11,source,0xb0)
+				savereg(12,target,0xc0)
+				loadreg(13,source,0xd0)
+				savereg(9,target,0x90)
+				savereg(14,target,0xe0)
+				savereg(11,target,0xb0)
+				savereg(13,target,0xd0)
+
+                     :          /* outputs */
+                       "=m" (*target),
+                       [index] "=&b" (index)
+                     :            /* Inputs */
+                       [source] "b" (source),         /* inputs */
+                       [target] "b" (target)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14"
+                       );
+           return 0 ;
+  }
+
+static inline int torus_frame_payload_memcpy_try2(
+		torus_frame_payload * target,
+		torus_frame_payload * source
+    )
+  {
+    unsigned int index ;
+
+    TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+           asm  (
+				loadreg(0,source,0x00)
+				loadreg(1,source,0x10)
+				loadreg(2,source,0x20)
+				loadreg(4,source,0x40)
+				savereg(0,target,0x00)
+				loadreg(6,source,0x60)
+				savereg(2,target,0x20)
+				loadreg(3,source,0x30)
+				savereg(4,target,0x40)
+				loadreg(8,source,0x80)
+				savereg(1,target,0x10)
+				loadreg(5,source,0x50)
+				savereg(6,target,0x60)
+				loadreg(10,source,0xa0)
+				savereg(3,target,0x30)
+				loadreg(7,source,0x70)
+				savereg(8,target,0x80)
+				loadreg(12,source,0xc0)
+				savereg(5,target,0x50)
+				loadreg(9,source,0x90)
+				savereg(10,target,0xa0)
+				loadreg(14,source,0xe0)
+				savereg(7,target,0x70)
+				loadreg(11,source,0xb0)
+				savereg(12,target,0xc0)
+				loadreg(13,source,0xd0)
+				savereg(9,target,0x90)
+				savereg(14,target,0xe0)
+				savereg(11,target,0xb0)
+				savereg(13,target,0xd0)
+
+                     :          /* outputs */
+                       "=m" (*target),
+                       [index] "=&b" (index)
+                     :            /* Inputs */
+                       [source] "b" (source),         /* inputs */
+                       [target] "b" (target)         /* inputs */
+                     : "fr0", "fr1", "fr2", /* Clobbers */
+                       "fr3", "fr4", "fr5",
+                       "fr6", "fr7", "fr8",
+                       "fr9", "fr10", "fr11",
+                       "fr12","fr13", "fr14"
+                       );
+           return 0 ;
+  }
+#endif
diff --git a/drivers/net/bgp_torus/bgtor.h b/drivers/net/bgp_torus/bgtor.h
new file mode 100644
index 0000000..49bceff
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtor.h
@@ -0,0 +1,310 @@
+/*********************************************************************
+ *
+ * Description:   Torus definitions
+ *
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__BLUEGENE__TORUS_H__
+#define __DRIVERS__BLUEGENE__TORUS_H__
+
+/* #include "bglink.h" */
+#include <linux/ioctl.h>
+
+#define TORUS_MAX_MTU	(39 * 240)
+
+#define BGP_TORUS_MAX_IRQS	96
+
+#define BGP_TORUS_GROUPS	4
+#define BGP_TORUS_DMA_SIZE	(sizeof(struct torus_dma) * BGP_TORUS_GROUPS)
+
+#define BGP_TORUS_INJ_FIFOS	32
+#define BGP_TORUS_RCV_FIFOS	8
+#define BGP_TORUS_COUNTERS	64
+#define BGP_TORUS_DMA_REGIONS	8
+
+#define BGP_TORUS_TX_ENTRIES	256
+#define BGP_TORUS_RX_ENTRIES	512
+
+#define BGP_TORUS_USER_GROUP	1
+
+/* IOCTLs for UL DMA */
+#define TORUS_IOCTL	'T'
+#define TORUS_ALLOC_TX_COUNTER		_IO(TORUS_IOCTL, 1)
+#define TORUS_ALLOC_RX_COUNTER		_IO(TORUS_IOCTL, 2)
+#define TORUS_ALLOC_TX_FIFO		_IO(TORUS_IOCTL, 3)
+#define TORUS_ALLOC_RX_FIFO		_IO(TORUS_IOCTL, 4)
+#define TORUS_FREE_TX_COUNTER		_IO(TORUS_IOCTL, 5)
+#define TORUS_FREE_RX_COUNTER		_IO(TORUS_IOCTL, 6)
+#define TORUS_FREE_TX_FIFO		_IO(TORUS_IOCTL, 7)
+#define TORUS_FREE_RX_FIFO		_IO(TORUS_IOCTL, 8)
+#define TORUS_REGISTER_TX_MEM		_IO(TORUS_IOCTL, 9)
+#define TORUS_REGISTER_RX_MEM		_IO(TORUS_IOCTL, 10)
+#define TORUS_DMA_RANGECHECK		_IO(TORUS_IOCTL, 11)
+
+
+struct torus_fifo {
+    u32 start;
+    u32 end;
+    volatile u32 head;
+    volatile u32 tail;
+};
+
+struct torus_dma {
+    struct {
+	struct torus_fifo fifo[BGP_TORUS_INJ_FIFOS];	 /*  0 - 1ff */
+	u32 empty;			 /*  200 */
+	u32 __unused0;			 /*  204 */
+	u32 avail;			 /*  208 */
+	u32 __unused1;			 /*  20c */
+	u32 threshold;			 /*  210 */
+	u32 __unused2;			 /*  214 */
+	u32 clear_threshold;		 /*  218 */
+	u32 __unused3;			 /*  21c */
+	u32 dma_active;			 /*  220 */
+	u32 dma_activate;		 /*  224 */
+	u32 dma_deactivate;		 /*  228 */
+	u8 __unused4[0x100-0x2c];	 /*  22c - 2ff */
+
+	u32 counter_enabled[2];		 /*  300 */
+	u32 counter_enable[2];		 /*  308 */
+	u32 counter_disable[2];		 /*  310 */
+	u32 __unused5[2];		 /*  318 */
+	u32 counter_hit_zero[2];	 /*  320 */
+	u32 counter_clear_hit_zero[2];	 /*  328 */
+	u32 counter_group_status;	 /*  330 */
+	u8 __unused6[0x400-0x334];	 /*  334 - 3ff */
+
+	struct {
+	    u32 counter;
+	    u32 increment;
+	    u32 base;
+	    u32 __unused;
+	} counter[BGP_TORUS_COUNTERS]; /*  400 - 7ff */
+    } __attribute__((packed)) inj;
+
+    struct {
+	struct torus_fifo fifo[BGP_TORUS_RCV_FIFOS];	 /*  800 - 87f */
+	struct torus_fifo hdrfifo;	 /*  880 - 88f */
+	u8 __unused0[0x900-0x890];	 /*  890 - 900 */
+
+	u32 glob_ints[16];		 /*  900 - 93f */
+	u8 __unused1[0xa00-0x940];	 /*  940 - 9ff */
+
+	u32 empty[2];			 /*  a00 */
+	u32 available[2];		 /*  a08 */
+	u32 threshold[2];		 /*  a10 */
+	u32 clear_threshold[2];		 /*  a18 */
+	u8 __unused2[0xb00 - 0xa20];	 /*  a20 - aff */
+
+	u32 counter_enabled[2];		 /*  b00 */
+	u32 counter_enable[2];		 /*  b08 */
+	u32 counter_disable[2];		 /*  b10 */
+	u32 __unused3[2];		 /*  b18 */
+	u32 counter_hit_zero[2];	 /*  b20 */
+	u32 counter_clear_hit_zero[2];	 /*  b28 */
+	u32 counter_group_status;	 /*  b30 */
+	u8 __unused4[0xc00 - 0xb34];	 /*  b34 - bff */
+
+	struct {
+	    u32 counter;
+	    u32 increment;
+	    u32 base;
+	    u32 limit;
+	} counter[BGP_TORUS_COUNTERS]; /*  c00 - fff */
+    } __attribute__((packed)) rcv;
+};
+
+enum {
+    torus_dir_xplus = 0x20,
+    torus_dir_xminus = 0x10,
+    torus_dir_yplus = 0x08,
+    torus_dir_yminus = 0x04,
+    torus_dir_zplus = 0x02,
+    torus_dir_zminus = 0x01
+};
+
+union torus_fifo_hw_header {
+    struct {
+	u32 csum_skip		: 7;	 /*  number of shorts to skip in chksum */
+	u32 sk			: 1;	 /*  0= use csum_skip, 1 skip pkt */
+	u32 dirhint		: 6;	 /*  x-,x+,y-,y+,z-,z+ */
+	u32 deposit		: 1;	 /*  multicast deposit */
+	u32 pid0		: 1;	 /*  destination fifo group MSb */
+	u32 size		: 3;	 /*  size: (size + 1) * 32bytes */
+	u32 pid1		: 1;	 /*  destination fifo group LSb */
+	u32 dma			: 1;	 /*  1=DMA mode, 0=Fifo mode */
+	u32 dyn_routing		: 1;	 /*  1=dynamic routing, */
+					 /*  0=deterministic routing */
+	u32 virt_channel	: 2;	 /*  channel (0=Dynamic CH0, */
+					 /*  1=Dynamic CH1, 2=Bubble, 3=Prio) */
+	u32 dest_x		: 8;
+	u32 dest_y		: 8;
+	u32 dest_z		: 8;
+	u32 reserved		: 16;
+    };
+    u8 raw8[8];
+    u32 raw32[2];
+} __attribute__((packed));
+
+union torus_dma_hw_header {
+    struct {
+	u32			: 30;
+	u32 prefetch		: 1;
+	u32 local_copy		: 1;
+	u32			: 24;
+	u32 counter		: 8;
+	u32 base;
+	u32 length;
+    };
+    u32 raw32[2];
+} __attribute__((packed));
+
+union torus_dma_sw_header {
+    struct {
+	u32 offset;
+	u8 counter_id;
+	u8 bytes;
+	u8 unused		: 6;
+	u8 pacing		: 1;
+	u8 remote_get		: 1;
+    };
+    u32 raw32[2];
+} __attribute__((packed));
+
+union torus_inj_desc {
+    u32 raw32[8];
+    struct {
+	union torus_dma_hw_header dma_hw;
+	union torus_fifo_hw_header fifo;
+	union torus_dma_sw_header dma_sw;
+    };
+} __attribute__((packed));
+
+struct torus_tx_ring {
+    union torus_inj_desc *desc;
+    struct sk_buff **skbs;
+    u32 start;
+    unsigned int tail_idx, pending_idx;
+    unsigned counter;
+    phys_addr_t paddr;
+    spinlock_t lock;
+};
+
+union torus_source_id {
+    u32 raw;
+    atomic_t raw_atomic;
+    struct {
+	u32 conn_id	: 8;
+	u32 src_key	: 24;
+    };
+};
+
+#define TORUS_SOURCE_ID_NULL (~0ul)   /*  anything that can't be a legitimate id */
+
+union torus_rcv_desc {
+    u32 raw32[256 / sizeof(u32)];
+    u8 raw8[256];
+    struct {
+	union torus_fifo_hw_header fifo;
+	u32 counter;
+	union torus_source_id src_id;
+	u32 data[];
+    };
+} __attribute__((packed));
+
+struct torus_skb_cb {
+    union torus_source_id src_id;
+    u32 received_len;
+    u32 total_len;
+};
+
+struct torus_rx_ring {
+    union torus_rcv_desc *desc;
+    struct sk_buff_head skb_list;
+    u32 start;
+    unsigned int head_idx;
+    phys_addr_t paddr;
+    spinlock_t lock;
+
+     /*  bookkeeping for packet currently being reconstructed */
+    union torus_source_id src_id;
+    u32 received_len;
+    u32 total_len;
+    struct sk_buff *skb;
+
+     /*  statistics */
+    u32 dropped;
+    u32 delivered;
+};
+
+struct bg_torus {
+    u8 coordinates[3];
+    u8 dimension[3];
+    union torus_source_id source_id;
+
+    spinlock_t lock;
+    struct torus_dma *dma;
+
+    struct torus_tx_ring tx[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS];
+    struct torus_rx_ring rx[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS];
+
+    /* mapping from counter to tx ring index */
+    int inj_counter_to_txidx[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS];
+
+    /* counters used */
+    unsigned long inj_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS /
+						sizeof(unsigned long) / 8];
+    unsigned long rcv_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS /
+						sizeof(unsigned long) / 8];
+
+    /* fifos used */
+    unsigned long inj_fifo_map[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS /
+						sizeof(unsigned long) / 8 + 1];
+    unsigned long rcv_fifo_map[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS /
+						sizeof(unsigned long) / 8 + 1];
+
+    /* dma regions used */
+    unsigned long inj_dma_region_map;
+    unsigned long rcv_dma_region_map;
+
+    unsigned int dcr_base, dcr_size;
+    struct resource pdma, pfifo0, pfifo1;
+    int virq[BGP_TORUS_MAX_IRQS];
+
+    struct of_device *ofdev;
+    struct ctl_table_header *sysctl_header;
+};
+
+
+extern inline void bgtorus_init_inj_desc(struct bg_torus *torus,
+					 union torus_inj_desc *desc,
+					 int len, u8 x, u8 y, u8 z)
+{
+    memset(desc, 0, sizeof(*desc));
+
+    desc->fifo.sk = 1;		 /*  skip checksum */
+    desc->fifo.size = 7;	 /*  always full 240 bytes packets */
+    desc->fifo.dyn_routing = 1;
+    desc->fifo.dest_x = x;
+    desc->fifo.dest_y = y;
+    desc->fifo.dest_z = z;
+
+    desc->dma_hw.length = len;
+
+    /* atomic { desc->dma_sw.raw32[1] = ++torus->source_id.conn_id; } */
+    desc->dma_sw.raw32[1] =
+	atomic_add_return(1U << 24, &torus->source_id.raw_atomic);
+}
+
+int bgtorus_xmit(struct bg_torus *torus, union torus_inj_desc *desc,
+		 struct sk_buff *skb);
+
+
+#endif /* !__DRIVERS__BLUEGENE__TORUS_H__ */
diff --git a/drivers/net/bgp_torus/bgtornic.c b/drivers/net/bgp_torus/bgtornic.c
new file mode 100644
index 0000000..3773e35
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtornic.c
@@ -0,0 +1,637 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ *	    Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description: Blue Gene driver exposing tree and torus as a NIC
+ *
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+#include <linux/KernelFxLog.h>
+
+
+#include "bgtornic.h"
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev);
+
+/* #define TRUST_TORUS_CRC */
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/*  Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+#endif
+
+/* #define TORNIC_TASKLET_BGNET */
+
+/* #define TORNIC_TRANSMIT_TREE_TASKLET */
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+#define ENABLE_TRACE
+
+/* #define REQUIRE_TRACE */
+
+#if defined(ENABLE_TRACE)
+extern int bgp_dma_tcp_tracemask ;
+/* extern int bgtorus_debug_tracemask ; */
+#define bgtornic_debug_tracemask bgp_dma_tcp_tracemask
+/* static int bgtornic_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+#endif
+
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...)    KernelFxLog(1,x)
+#define TRACE1(x...)   KernelFxLog(1,x)
+#define TRACE2(x...)   KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#elif defined(ENABLE_TRACE)
+#define TRACE(x...)    KernelFxLog(bgtornic_debug_tracemask & k_t_general,x)
+#define TRACE1(x...)   KernelFxLog(bgtornic_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...)   KernelFxLog(bgtornic_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgtornic_debug_tracemask & (i),x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#endif
+
+/* #define TORNIC_FORCE_BROADCAST 1 */
+/**********************************************************************
+ *                           defines
+ **********************************************************************/
+
+static const char version[] = "Bgtornet: Version 1.0, (c) 2008,2010 IBM Corporation, GPL";
+
+/**********************************************************************
+ *                         Linux module
+ **********************************************************************/
+
+MODULE_DESCRIPTION("BlueGene Torus Ethernet driver");
+MODULE_LICENSE("GPL");
+
+
+int bgtornic_driverparm = 0 ;
+int bgnet_receive_torus(struct sk_buff * skb) ;
+void dma_tcp_set_port(unsigned int port) ;
+void dma_tcp_open(void) ;
+void dma_tcp_close(void) ;
+void dma_tcp_poll_handler(void) ;
+void dma_tcp_rx_enable(void) ;
+
+/*  Diagnostic options */
+enum {
+	k_inhibit_scattergather = 0 , /*  Whether to tell linux we cannot do 'scattergather' DMA TODO: test whether scattergathers actually work, using (e.g.) NFS */
+	k_inhibit_gso = 1 /* Whether to tell linux not to try Generic Segmentation Offload ; not useful until I can get s-g working with multiple frags in a skb */
+};
+
+
+static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__((unused)) ;
+static void dumpmem(const void *address, unsigned int length, const char * label)
+  {
+    int x ;
+    TRACEN(k_t_fifocontents|k_t_scattergather,"Memory dump, length=0x%08x: %s",length,label) ;
+    if( length > 20*32 ) {
+      length = 20*32 ;
+    }
+    for (x=0;x<length;x+=32)
+      {
+        int *v = (int *)(address+x) ;
+        TRACEN(k_t_fifocontents|k_t_scattergather,"%p: %08x %08x %08x %08x %08x %08x %08x %08x",
+            v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+            ) ;
+      }
+  }
+
+
+
+static BGP_Personality_t personality;
+static struct net_device *static_dev ;
+
+
+/* int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev) ; */
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev
+/* 		,unsigned int x, unsigned int y, unsigned int z */
+		) ;
+
+/**********************************************************************
+ *                   Linux' packet and skb management
+ **********************************************************************/
+
+static int bgtornet_change_mtu(struct net_device *dev, int new_mtu)
+{
+/*   struct bgtornet_dev *bgtornet = netdev_priv(dev); */
+    if (new_mtu < 60 || new_mtu > BGTORNET_MAX_MTU )
+	return -EINVAL;
+    dev->mtu = new_mtu;
+/*     bgtree_set_mtu(bgtornet->tree, new_mtu) ; */
+    return 0;
+}
+
+
+/*  Take 2 bytes from every 16 to form a frame verifier */
+static unsigned int asf_frame_verifier(const char * data, unsigned int length)
+{
+	const unsigned int * data_int = (unsigned int *) data ;
+	unsigned int result = 0 ;
+	unsigned int index ;
+	for(index=0; index<length/sizeof(unsigned int);index += 4)
+		{
+			result += data_int[index] ;
+		}
+	return result & 0xffff ;
+}
+
+static int bgtornet_receive(struct sk_buff *skb, struct bglink_hdr *lnkhdr,
+			 struct bglink_proto* proto)
+{
+    struct net_device *dev = (struct net_device*)proto->private;
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+    TRACE("bgtornet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d",
+	  skb->data, skb->len, lnkhdr->opt_eth.pad_head,
+	   lnkhdr->opt_eth.pad_tail, skb->len - lnkhdr->opt_eth.pad_head - lnkhdr->opt_eth.pad_tail);
+
+
+    /* skb_pull and trim check for over/underruns. For 0 size the
+     * add/subtract is the same as a test */
+    __skb_pull(skb, lnkhdr->opt_eth.pad_head);
+    __skb_trim(skb, skb->len - lnkhdr->opt_eth.pad_tail);
+
+
+
+/*     dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */
+
+    skb->dev = dev;
+    skb->protocol = eth_type_trans(skb, dev);
+
+    TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ;
+    netif_rx(skb);
+
+
+    dev->last_rx = jiffies;
+    bgtornet->stats.rx_packets++;
+    bgtornet->stats.rx_bytes += skb->len;
+
+    return 0;
+}
+
+void bgtornet_rx_schedule(void)
+  {
+    TRACEN(k_t_general,"(>) bgtornet_rx_schedule") ;
+    {
+    struct net_device *dev = static_dev;
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+    TRACEN(k_t_napi,"netif_rx_schedule(dev=%p,napi=%p)",dev,&bgtornet->napi) ;
+    napi_schedule(&bgtornet->napi) ;
+    }
+    TRACEN(k_t_general,"(<) bgtornet_rx_schedule") ;
+  }
+
+struct net_device_stats *bgtornet_stats(void)
+  {
+    struct net_device *dev = static_dev;
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+    return   &bgtornet->stats ;
+  }
+
+static int frame_passes_verification(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+        unsigned int eth_proto = eth->h_proto ;
+        struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+        unsigned int iphlen = 4*iph->ihl ;
+        struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+        unsigned int ip_proto = iph->protocol ;
+        if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+        	{
+			unsigned int tcphlen = 4*tcph->doff ;
+			char * payload = ((char *)(tcph)) + (tcphlen) ;
+			unsigned int payload_len=iph->tot_len-iphlen-tcphlen ;
+			unsigned int framecheck = asf_frame_verifier(payload,payload_len) ;
+			unsigned int rcvcheck = tcph->check ;
+			TRACEN(k_t_general, "framecheck=0x%08x rcvcheck=0x%08x",
+					framecheck, rcvcheck
+					) ;
+			if( framecheck != rcvcheck)
+				{
+					TRACEN(k_t_request,"(!!!) frame verify fails, framecheck=0x%08x rcvcheck=0x%08x payload_len=%d",
+								framecheck,
+								rcvcheck,
+								payload_len) ;
+					return 0 ;
+				}
+        	}
+        return 1 ;
+}
+
+static inline void deliver_frame(struct sk_buff *skb)
+{
+        struct net_device *dev = static_dev;
+        struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+
+/*         dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */
+
+        skb->dev = dev;
+        skb->protocol = eth_type_trans(skb, dev);
+/*         skb->pkt_type = PACKET_HOST ; */
+        if( k_trust_torus_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+
+#if defined(TORNIC_DEV_POLL)
+        TRACEN(k_t_napi,"netif_receive_skb(skb=%p)",skb) ;
+        netif_receive_skb(skb) ;
+#else
+        TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ;
+        netif_rx(skb);
+#endif
+
+        dev->last_rx = jiffies;
+        bgtornet->stats.rx_packets++;
+        bgtornet->stats.rx_bytes += skb->len;
+}
+
+int bgtornet_receive_torus(struct sk_buff *skb)
+{
+
+    TRACE("bgtornet rcvd pkt: data=%p, len=%d",
+          skb->data, skb->len);
+
+    if( k_asf_frame_verifier )
+	    {
+		    if (frame_passes_verification(skb))
+			    {
+				    deliver_frame(skb) ;
+			    }
+		    else
+			    {
+					dev_kfree_skb(skb) ;
+			    }
+	    }
+    else
+	    {
+		    deliver_frame(skb) ;
+	    }
+
+    TRACE("(<)");
+    return 0;
+}
+
+
+static void inject_verifier(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+	unsigned int eth_proto = eth->h_proto ;
+        struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+	unsigned int iphlen = 4*iph->ihl ;
+	struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+	unsigned int ip_proto = iph->protocol ;
+	if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+		{
+			unsigned int tcphlen = 4*tcph->doff ;
+			char * payload = ((char *)(tcph)) + (tcphlen) ;
+			unsigned int payload_len=iph->tot_len-iphlen-tcphlen ;
+			unsigned int framecheck = asf_frame_verifier(payload,payload_len) ;
+			tcph->check = framecheck ;
+			TRACEN(k_t_general,"framecheck set to 0x%08x",framecheck) ;
+		}
+
+}
+
+static int bgtornet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+#if defined(CONFIG_BLUEGENE_TCP)
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+        struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+        struct bgtornet_dev *bgtornet = netdev_priv(dev);
+        unsigned int h_proto =  eth->h_proto ;
+        unsigned int daddr = iph->daddr ;
+	TRACEN(k_t_general,"(>) skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] daddr=0x%08x", skb, skb->sk,
+			eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5], daddr) ;
+	if( eth->h_dest[0] == 0x00 && eth->h_dest[1] == 0x80 && eth->h_dest[2] == 0x47)
+		{
+
+			if( h_proto == ETH_P_IP && (daddr >> 24) == 12)
+				{
+					eth->h_dest[3]=(daddr >> 16) & 0xff ;
+					eth->h_dest[4]=(daddr >> 8) & 0xff ;
+					eth->h_dest[5]=(daddr& 0xff) - 1 ;
+				}
+
+			      if( eth->h_dest[3] == personality.Network_Config.Xcoord
+			          && eth->h_dest[4] == personality.Network_Config.Ycoord
+			          && eth->h_dest[5] == personality.Network_Config.Zcoord
+			          )
+				      {
+					          netif_rx(skb) ;  /*  Try to feed the skb to the local networking layer */
+				      }
+			      else
+				{
+				        if( k_asf_frame_verifier ) inject_verifier(skb) ;
+					bgtorus_start_xmit(skb, dev
+/* 							, eth->h_dest[3],eth->h_dest[4],eth->h_dest[5] */
+							                                            ) ;
+				}
+			bgtornet->stats.tx_packets += 1 ;
+			bgtornet->stats.tx_bytes += skb->len ;
+		}
+	else
+		{
+			 /*  Request to send a frame over the torus, but not to a torus MAC address. Trace and discard. */
+			TRACEN(k_t_protocol,"skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] not torus-mac", skb, skb->sk,
+					eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+/* 			bgtornet->stats.tx_errors += 1; */
+/* 			bgtornet->stats.tx_aborted_errors += 1; */
+			dev_kfree_skb(skb) ;
+
+		}
+	TRACEN(k_t_general,"(<)") ;
+#else
+  col_start_xmit(skb, dev) ;
+#endif
+  return 0 ;
+}
+
+static int bgtornet_poll(struct napi_struct * napi, int budget)
+  {
+    struct net_device *dev = napi->dev ;
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+    TRACEN(k_t_general,"(>) bgtornet_poll napi=%p dev=%p budget=%d", napi, dev, budget) ;
+    TRACEN(k_t_napi,"napi polling starts") ;
+    dma_tcp_poll_handler() ;
+    TRACEN(k_t_napi,"netif_rx_complete(dev=%p,napi=%p)",dev,&bgtornet->napi) ;
+    napi_complete(&bgtornet->napi);
+    dma_tcp_rx_enable() ;
+    TRACEN(k_t_general,"(<) bgtornet_poll dev=%p", dev) ;
+    return 0 ;
+  }
+
+static void bgtornet_uninit(struct net_device *dev)
+{
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+    BUG_ON(bgtornet->lnk.private != dev);
+
+}
+
+static struct net_device_stats *bgtornet_get_stats(struct net_device *dev)
+{
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+    return &bgtornet->stats;
+}
+
+
+static int bgtornet_init (struct net_device *dev)
+{
+    struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+    bgtornet = netdev_priv(dev);
+
+
+
+     /*  register with tree */
+    bgtornet->lnk.lnk_proto = bgtornet->tor_protocol;
+    bgtornet->lnk.rcv = bgtornet_receive;
+    bgtornet->lnk.private = dev;
+
+
+
+    return 0;
+}
+static int bgtornet_set_config(struct net_device* netDev,
+          struct ifmap* map)
+{
+    dma_tcp_set_port(map->port) ;
+
+  return 0;
+}
+
+static int bgtornet_open(struct net_device* dev)
+{
+//     struct bgtornet_dev* bgtornet = (struct bgtornet_dev*) netdev_priv(dev);
+     dma_tcp_open() ; /* Indicate that we want to operate as ethernet */
+
+
+    TRACEN(k_t_napi,"netif_start_queue(dev=%p)",dev) ;
+    netif_start_queue(dev);
+
+    return 0;
+}
+
+static int bgtornet_stop(struct net_device* dev)
+{
+//    struct bgtornet_dev* bgtornet = (struct bgtornet_dev*) netdev_priv(dev);
+    dma_tcp_close() ; /* Indicate that we want to stop operating as ethernet */
+
+    TRACEN(k_t_napi,"netif_stop_queue(dev=%p)",dev) ;
+    netif_stop_queue(dev);
+
+    return 0;
+}
+
+void bgtornet_set_arp_table_entry(unsigned int x, unsigned int y, unsigned int z, unsigned int ip_address)
+	{
+	struct net_device *dev = static_dev ;
+	__be32 ip = ip_address ;
+	struct neighbour * neigh = neigh_create(&arp_tbl, &ip, dev);
+	if (neigh) {
+		u8 lladdr[6] ;
+		lladdr[0] = 0x00 ;
+		lladdr[1] = 0x80 ;
+		lladdr[2] = 0x47 ;
+		lladdr[3] = x ;
+		lladdr[4] = y ;
+		lladdr[5] = z ;
+		neigh_update(neigh,  lladdr, NUD_PERMANENT, NEIGH_UPDATE_F_OVERRIDE);
+		neigh_release(neigh);
+	}
+	}
+
+#if defined(HAVE_NET_DEVICE_OPS)
+static const struct net_device_ops netdev_ops = {
+    .ndo_change_mtu = bgtornet_change_mtu ,
+    .ndo_get_stats = bgtornet_get_stats ,
+    .ndo_start_xmit = bgtornet_start_xmit ,
+    .ndo_init = bgtornet_init ,
+    .ndo_uninit = bgtornet_uninit ,
+    .ndo_open = bgtornet_open ,
+    .ndo_stop = bgtornet_stop ,
+    .ndo_set_config = bgtornet_set_config ,
+//    .ndo_set_mac_address = bgtornet_set_mac_addr,
+};
+#endif
+
+static unsigned int dummy_features ;
+
+static struct ctl_table bgp_tornic_table[] = {
+	        {
+	                .ctl_name       = CTL_UNNUMBERED,
+	                .procname       = "features",
+	                .data           = &dummy_features,
+	                .maxlen         = sizeof(int),
+	                .mode           = 0644,
+	                .proc_handler   = &proc_dointvec
+	        },
+	        { 0 },
+} ;
+static struct ctl_path tornic_ctl_path[] = {
+	{ .procname = "bgp", .ctl_name = 0, },
+	{ .procname = "torusdev", .ctl_name = 0, },
+	{ },
+};
+
+
+int __init
+bgtornet_module_init (void)
+{
+
+     struct bgtornet_dev *bgtornet;
+     struct net_device *dev;
+    printk (KERN_INFO "%s\n", version);
+
+    bluegene_getPersonality( &personality, sizeof(personality) );
+
+    dev = alloc_etherdev(sizeof(struct bgtornet_dev));
+    if (!dev)
+	return -ENOMEM;
+
+    static_dev = dev ;
+
+
+    bgtornet = netdev_priv(dev);
+    memset(bgtornet, 0, sizeof(*bgtornet));
+     /*  The following probably need to be configurable */
+
+    bgtornet->phandle_torus = 0;
+    bgtornet->eth_mask = 0;
+    dev->dev_addr[0] = 0x00;
+    dev->dev_addr[1] = 0x80;
+    dev->dev_addr[2] = 0x47;
+    dev->dev_addr[3] = personality.Network_Config.Xcoord ;
+    dev->dev_addr[4] = personality.Network_Config.Ycoord ;
+    dev->dev_addr[5] = personality.Network_Config.Zcoord ;
+
+    bgtornet->eth_local = bgtornet->eth_mask & *(unsigned int *)&dev->dev_addr[0];
+
+#if defined(HAVE_NET_DEVICE_OPS)
+    dev->netdev_ops = &netdev_ops ;
+#else
+    dev->init			= bgtornet_init;
+    dev->uninit			= bgtornet_uninit;
+    dev->get_stats	        = bgtornet_get_stats;
+    dev->hard_start_xmit        = bgtornet_start_xmit;
+    dev->change_mtu		= bgtornet_change_mtu;
+    dev->open = bgtornet_open ,
+    dev->stop = bgtornet_stop ,
+    dev->set_config = bgtornet_set_config ,
+#endif
+    dev->mtu      = BGTORNET_DEFAULT_MTU;
+
+
+    TRACEN(k_t_napi,"netif_napi_add(dev=%p,napi=%p,poll=bgtornet_poll,weight=16)",dev,&bgtornet->napi) ;
+    netif_napi_add(dev,&bgtornet->napi,bgtornet_poll,16) ;
+    TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)",
+            bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev,
+            bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll,
+            bgtornet->napi.dev,
+            bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ;
+    TRACEN(k_t_napi,"napi_enable(napi=%p)",&bgtornet->napi) ;
+    napi_enable(&bgtornet->napi) ;
+    TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)",
+            bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev,
+            bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll,
+            bgtornet->napi.dev,
+            bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ;
+
+
+/*  If we're trusting the torus hardware, there is no point forming an IP checksum on the send side */
+    dev->features = NETIF_F_HIGHDMA
+                  | (k_trust_torus_crc ? (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : 0 )
+		  | (k_inhibit_scattergather ? 0 : NETIF_F_SG) ;
+
+    skb_queue_head_init(&(bgtornet->xmit_list)) ;
+
+
+    if (register_netdev(dev) != 0)
+	goto err;
+    if( k_inhibit_gso )
+	    {
+		    dev->features &= ~(NETIF_F_GSO) ; // scatter-gather sometimes does not get it right. Might be a problem with GSO or might be broken anyway
+						       /*  TODO: Isolate whether GSO is broken or whether the torus driver is broken */
+	    }
+
+    bgp_tornic_table[0].data = &(dev->features) ;
+
+	register_sysctl_paths(tornic_ctl_path,bgp_tornic_table) ;
+
+    printk(KERN_INFO
+	   "%s: BGNET %s, MAC %02x:%02x:%02x:%02x:%02x:%02x\n" "BGTORNET mask 0x%08x local 0x%08x\n",
+	   dev->name, "np->full_name",
+	   dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2],
+	   dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5],
+           bgtornet->eth_mask, bgtornet->eth_local
+	   );
+
+    return 0;
+
+ err:
+    free_netdev(dev);
+    return -1;
+
+
+    return 0;
+}
+
+void __exit bgtornet_module_exit (void)
+{
+}
+
+/* module_init(bgtornet_module_init); */
+/* module_exit(bgtornet_module_exit); */
diff --git a/drivers/net/bgp_torus/bgtornic.h b/drivers/net/bgp_torus/bgtornic.h
new file mode 100644
index 0000000..2139081
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtornic.h
@@ -0,0 +1,126 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Volkmar uhlig
+ *          Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description:   definitions for BG networks
+ *
+ *
+ ********************************************************************/
+
+#ifndef __DRIVERS__NET__BLUEGENE__BGNIC_H__
+#define __DRIVERS__NET__BLUEGENE__BGNIC_H__
+
+/* #define BG_IRQ(group, irq)	((group) << 5 | (irq)) */
+
+
+/**********************************************************************
+ * link layer
+ **********************************************************************/
+
+/* #define BGNET_P_ETH0		1 */
+/* #define BGNET_P_ETH1            2 */
+/* #define BGNET_P_ETH2            3 */
+/* #define BGNET_P_ETH3            4 */
+/* #define BGNET_P_ETH4            5 */
+/* #define BGNET_P_ETH5            6 */
+/* #define BGNET_P_ETH6            7 */
+/* #define BGNET_P_ETH7            8 */
+/* #define BGNET_P_ETH8            9 */
+/* #define BGNET_P_LAST_ETH        BGNET_P_ETH8 */
+/*  */
+/* #define BGNET_P_CONSOLE		20 */
+
+/* #define BGNET_FRAG_MTU		240 */
+/*  When running 'dma_tcp_frames', we can have an MTU as large as we like. IP limits to 64k, though. */
+enum {
+	BGTORNET_DEFAULT_MTU = ETH_DATA_LEN ,
+	BGTORNET_MAX_MTU = 65536
+};
+#define BGNET_MAX_MTU		65536
+/* #define BGNET_MAX_MTU		(BGNET_FRAG_MTU * 128) */
+/* #define BGNET_DEFAULT_MTU	(BGNET_FRAG_MTU * 30 - 12) */
+/* #define BGNET_DEFAULT_MTU	ETH_DATA_LEN */
+
+enum {
+	k_trust_torus_crc =
+#if defined(BGP_TORUS_IP_CHECKSUM)
+		0
+#else
+		1
+#endif
+		,
+/* #if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING) */
+//	k_trust_torus_crc = 1 ,  /*  Whether the IP layer should trust the BGP hardware CRC on the torus network */
+/* #else */
+//	k_trust_torus_crc = 1 ,  /*  Whether the IP layer should trust the BGP hardware CRC on the torus network */
+/* #endif */
+	k_asf_frame_verifier = 0  /*  Whether to try a frame verifier in the bgtornic layer */
+};
+
+
+struct bglink_hdr
+{
+    unsigned int dst_key;
+    unsigned int src_key;
+    unsigned short conn_id;
+    unsigned char this_pkt;
+    unsigned char total_pkt;
+    unsigned short lnk_proto;   /*  1 eth, 2 con, 3... */
+    union {
+        unsigned short optional;  /*  for encapsulated protocol use */
+        struct {
+            u8 pad_head;
+            u8 pad_tail;
+        } opt_eth;
+    };
+} __attribute__((packed));
+
+
+struct bglink_proto
+{
+    unsigned short lnk_proto;
+    int (*rcv)(struct sk_buff*, struct bglink_hdr*, struct bglink_proto*);
+    void *private;
+    struct list_head list;
+};
+
+struct bgtornet_dev
+{
+    unsigned short tor_protocol;
+    unsigned int eth_mask;
+    unsigned int eth_local;
+    struct bglink_proto lnk;
+    struct net_device_stats stats;
+    u32 phandle_torus;
+    struct napi_struct napi ; /* 2.6.27-ism for NAPI poll */
+    struct sk_buff_head xmit_list ;   /* List of skb's to be sent */
+};
+
+extern inline unsigned int eth_to_key(char *addr)
+{
+    unsigned int key;
+    if (is_broadcast_ether_addr(addr))
+        key = ~0U;
+    else
+        key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0);
+    return key;
+}
+
+
+#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */
diff --git a/drivers/net/bgp_torus/torus.c b/drivers/net/bgp_torus/torus.c
new file mode 100644
index 0000000..2849e3d
--- /dev/null
+++ b/drivers/net/bgp_torus/torus.c
@@ -0,0 +1,551 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ *          Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for tree
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/KernelFxLog.h>
+
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <linux/irq.h>
+#ifdef CONFIG_PPC_MERGE
+#include <asm/prom.h>
+#include <asm/of_platform.h>
+#endif
+
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+
+
+/* #include "bgnic.h" */
+/* #include "bgcol.h" */
+
+#define TORUS_DEV_NAME "bgtorus"
+#include "../bgp_network/bgp_net_traceflags.h"
+
+int __init
+bgtornet_module_init(void) ;
+int __init
+bgtornet_module_exit(void) ;
+int __exit
+dma_tcp_module_init(void) ;
+int __exit
+dma_tcp_module_cleanup(void) ;
+
+typedef struct {
+  struct sk_buff_head skb_list_xmit ;   /* List of skb's being passed to the tasklet for sending */
+} bg_tcptorus ;
+
+static bg_tcptorus static_torus ;
+
+typedef struct {
+  unsigned char x ;
+  unsigned char y ;
+  unsigned char z ;
+} torusTarget_t ;
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+/* int bgtorus_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+/* int bgtorus_debug_tracemask=k_t_protocol; */
+int bgtorus_debug_tracemask=k_t_init ;
+#endif
+
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...)    KernelFxLog(1,x)
+#define TRACE1(x...)   KernelFxLog(1,x)
+#define TRACE2(x...)   KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#elif  defined(CONFIG_BLUEGENE_TORUS_TRACE)
+#define TRACE(x...)    KernelFxLog(bgtorus_debug_tracemask & k_t_general,x)
+#define TRACE1(x...)   KernelFxLog(bgtorus_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...)   KernelFxLog(bgtorus_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgtorus_debug_tracemask & (i),x)
+#define TRACED(x...)   KernelFxLog(1,x)
+#define TRACES(x...)   KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+/* #define HAS_HOSTS */
+/* #define HAS_NICPARM */
+/* #define HAS_DRIVERPARM */
+#define HAS_TORUSDIAG
+
+/*  If you need settable parameters for the tree or the NIC (for debugging), enable them here */
+#if defined(HAS_DRIVERPARM)
+static int bgtorus_driverparm ;
+#endif
+
+#if defined(HAS_NICPARM)
+extern int bgnic_driverparm ;
+#endif
+
+/* void torus_learn_host(const char *cp) ; */
+
+int bgp_dma_ethem ;  /*  Set externally if we want to try 'eth-em' on torus */
+
+/* #define SENDS_WITH_TASKLET */
+
+#define BGP_COL_MAJOR_NUM  120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM    122
+#define BGP_COL_MINOR_NUMS  2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS   4
+#define _BGP_UA_COL0  (0x6)
+#define _BGP_PA_COL0  (0x10000000)
+#define _BGP_UA_COL1  (0x6)
+#define _BGP_PA_COL1  (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+/*
+ * device management
+ */
+struct bgpnet_dev
+{
+  int                  major,minor;        /* device major, minor */
+  unsigned long long   physaddr;           /* physical address */
+  struct task_struct* current;            /* process holding device */
+  int                  signum;             /* signal to send holding process */
+  wait_queue_head_t    read_wq;
+  int                  read_complete;
+  void                 *regs;              /* mapped regs (only used with col) */
+  struct semaphore     sem;                /* interruptible semaphore */
+  struct cdev          cdev;               /* container device? */
+};
+
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+static unsigned int bgpnet_num_devices = 0;
+
+
+static int bgtorus_mappable_module_init(void) ;
+
+static int bgpnet_add_device(int major, int minor, const char* name,
+                             unsigned long long base, int irq,
+                             irqreturn_t (*irq_handler)(int, void*));
+static int bgpnet_device_open(struct inode *inode, struct file *filp);
+static int bgpnet_device_mmap(struct file *filp,  struct vm_area_struct *);
+static int bgpnet_device_release(struct inode *inode, struct file * filp);
+static int bgpnet_device_ioctl(struct inode *inode, struct file * filp,
+                               unsigned int  cmd,   unsigned long arg);
+
+
+static struct file_operations bgpnet_device_fops =
+{
+  .owner=   THIS_MODULE,
+  .open=    bgpnet_device_open,
+  .read=    NULL,
+  .write=   NULL,
+  .poll=    NULL,
+  .ioctl=   bgpnet_device_ioctl,
+  .release= bgpnet_device_release,
+  .mmap=    bgpnet_device_mmap,
+};
+
+
+
+#if defined(HAS_TORUSDIAG)
+void torus_diag(int param) ;  /*  So we can drive a function in the torus layer to poke at things */
+#endif
+
+void bgp_dma_tcp_send_and_free( struct sk_buff *skb ) ;
+
+void bgp_dma_tcp_poll(void) ;
+
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev) ;
+/*  We have a frame which should be routable via the torus. */
+/*  For code path checkout, try it via the tree ... */
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev
+/* 		, unsigned int x, unsigned int y, unsigned int z */
+		)
+{
+/*   int ethem = bgp_dma_ethem ; */
+/*   TRACEN(k_t_general,"(>) %s:%d", __func__, __LINE__) ; */
+/*   if( 0 == ethem ) */
+/*     { */
+/*       col_start_xmit(skb, dev) ; */
+/*     } */
+/*   else */
+/*     { */
+/*       struct inet_connection_sock *icskp = inet_csk(skb->sk) ; */
+/*       if( ethem & 4) */
+/*         { */
+/*           // Feature for duplicating the frame over the tree, so we can take the torus 'through the motions' */
+/*           // as we bring up various drivers */
+/*           struct sk_buff *cloneskb = skb_clone(skb, GFP_ATOMIC) ; */
+/*           if( cloneskb) */
+/*             { */
+/*                col_start_xmit(cloneskb, dev) ; */
+/*             } */
+/*         } */
+/*     #if defined(CONFIG_BLUEGENE_TCP) */
+/*         if( 1 ) */
+/*         { */
+              bgp_dma_tcp_send_and_free(skb
+/*         		      ,x,y,z */
+        		      ) ;
+/*  */
+/*           } */
+/*       else */
+/*         { */
+/*           col_start_xmit(skb, dev) ; */
+/*         } */
+/*     #else */
+/*       col_start_xmit(skb, dev) ; */
+/*     #endif */
+/*     } */
+  TRACEN(k_t_general,"(<) %s:%d", __func__, __LINE__) ;
+  return 0 ;
+}
+
+static int bgtorus_proc_read (char *page, char **start, off_t off,
+          int count, int *eof, void *data)
+{
+    int remaining = count;
+    *eof = 1;
+
+    return count-remaining ;
+}
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE) || defined(HAS_DRIVERPARM) || defined(HAS_NICPARM) || defined(HAS_TORUSDIAG)
+static unsigned char xtable[256] =
+    {
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    };
+
+static int bgtorus_atoix(const unsigned char *cp)
+  {
+    int result = 0 ;
+    unsigned char ecp = xtable[*cp] ;
+    while (ecp < 0x10)
+      {
+        result = (result << 4 ) | ecp ;
+        cp += 1 ;
+        ecp = xtable[*cp] ;
+      }
+    return result ;
+  }
+#endif
+
+static int bgtorus_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data)
+  {
+    char proc_write_buffer[256] ;
+    unsigned long actual_len=(len<255) ? len : 255 ;
+    int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ;
+    if( rc != 0 ) return -EFAULT ;
+    proc_write_buffer[actual_len] = 0 ;
+#if defined(HAS_DRIVERPARM)
+    if( 0 == strncmp(proc_write_buffer,"driverparm=",11))
+      {
+        bgtorus_driverparm=bgtorus_atoix(proc_write_buffer+11) ;
+      }
+#endif
+#if defined(HAS_NICPARM)
+    if( 0 == strncmp(proc_write_buffer,"nicparm=",8))
+      {
+        bgnic_driverparm=bgtorus_atoix(proc_write_buffer+8) ;
+      }
+#endif
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+    if ( 0 == strncmp(proc_write_buffer,"tracemask=",10) )
+      {
+        bgtorus_debug_tracemask = bgtorus_atoix(proc_write_buffer+10) ;
+      }
+#endif
+#if defined(HAS_TORUSDIAG)
+    if ( 0 == strncmp(proc_write_buffer,"torusdiag=",10) )
+      {
+        int diag_opcode = bgtorus_atoix(proc_write_buffer+10) ;
+        torus_diag(diag_opcode) ;
+      }
+#endif
+
+    return actual_len ;
+  }
+
+#if defined(TCP_TORUS_AVAILABLE)
+extern BGP_Personality_t tcp_bgp_personality;
+#endif
+
+
+static int __init
+torus_module_init (void)
+{
+  struct proc_dir_entry *ent;
+  TRACEN(k_t_init,"torus_module_init") ;
+  /* ----------------------------------------------------- */
+  /*        create /proc entry                             */
+  /* ----------------------------------------------------- */
+  printk(KERN_INFO "%s:%d create proc ent \n", __func__, __LINE__);
+  ent = create_proc_entry("driver/" TORUS_DEV_NAME, S_IRUGO, NULL);
+  if (ent)
+  {
+      ent->nlink = 1;
+      ent->read_proc = (void *)bgtorus_proc_read;
+      ent->write_proc = (void *)bgtorus_proc_write;
+  }
+#if defined(TCP_TORUS_AVAILABLE)
+  bluegene_getPersonality( &tcp_bgp_personality, sizeof(tcp_bgp_personality) );
+  printk(KERN_NOTICE "Network_Config.Rank=%08x Network_Config.IOnodeRank=%08x\n",
+      tcp_bgp_personality.Network_Config.Rank,
+      tcp_bgp_personality.Network_Config.IOnodeRank
+      ) ;
+#endif
+  skb_queue_head_init(&static_torus.skb_list_xmit) ;
+   /*  Bring up the memory-mappable version */
+  bgtorus_mappable_module_init() ;
+   /*  NIC and IP driver initialisation */
+  bgtornet_module_init() ;
+  dma_tcp_module_init() ;
+  return 0 ;
+}
+
+static void __exit
+torus_module_exit (void)
+{
+  TRACEN(k_t_init,"torus_module_exit") ;
+  bgtornet_module_exit() ;
+/*   dma_tcp_module_cleanup() ; */
+}
+/*  Code grabbed from Rch's driver so that we can map the torus for user-space access */
+
+
+static int bgpnet_add_device(int major,
+                             int minor,
+                             const char* devname,
+                             unsigned long long physaddr,
+                             int irq,
+                             irqreturn_t (*irq_handler)(int, void *))
+{
+  int ret;
+  dev_t devno;
+  struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices];
+  TRACEN(k_t_init,"devname=%s major=%d minor=%d",devname,major,minor) ;
+  /* initilize struct */
+  init_MUTEX (&dev->sem);
+  dev->major  = major;
+  dev->minor  = minor;
+  dev->physaddr = physaddr;
+  init_waitqueue_head(&dev->read_wq);
+  dev->read_complete = 0;
+  if (physaddr) {
+          dev->regs = ioremap(physaddr, 4096);
+  }
+  devno=MKDEV(major,minor);
+
+  /* register i.e., /proc/devices */
+  ret=register_chrdev_region(devno,1,(char *)devname);
+
+  if (ret)
+    {
+      printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n",
+              major,minor,ret);
+      return ret;
+    }
+
+  /* add cdev */
+  cdev_init(&dev->cdev,&bgpnet_device_fops);
+  dev->cdev.owner=THIS_MODULE;
+  dev->cdev.ops=&bgpnet_device_fops;
+  ret=cdev_add(&dev->cdev,devno,1);
+  if (ret)
+    {
+      printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n",
+             major,minor,ret);
+      return ret;
+    }
+
+  /* signul to pass to owning process, should be altered using ioctl */
+  dev->signum=-1;
+
+  bgpnet_num_devices++;
+
+  return 0;
+}
+
+
+static int bgpnet_device_open (struct inode *inode, struct file *filp)
+{
+  struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev);
+
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+  up(&dev->sem);
+
+  dev->current=current;
+  filp->private_data = (void*) dev;
+
+  TRACE("bgpnet: device (%d,%d) opened by process \"%s\" pid %i",
+        MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid);
+
+  return 0;
+}
+
+
+
+
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+  unsigned long vsize = vma->vm_end - vma->vm_start;
+  struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data;
+  int ret = -1;
+
+  /* ------------------------------------------------------- */
+  /* set up page protection.                                 */
+  /* ------------------------------------------------------- */
+
+  vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+  vma->vm_flags     |= VM_IO;
+  vma->vm_flags     |= VM_RESERVED;
+
+  /* ------------------------------------------------------- */
+  /*                  do the mapping                         */
+  /* ------------------------------------------------------- */
+
+  if (device->physaddr != 0)
+    ret = remap_pfn_range(vma,
+                          vma->vm_start,
+                          device->physaddr >> PAGE_SHIFT,
+                          vsize,
+                          vma->vm_page_prot);
+
+  if (ret) {
+      printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n",
+                   device->major, device->minor);
+  } else {
+      TRACE("bgpnet: mapped (%d,%d) to vm=%lx",
+             device->major, device->minor, vma->vm_start);
+  }
+  return ret? -EAGAIN :0;
+}
+
+/* ************************************************************************* */
+/*                  BG/P network: release device                             */
+/* ************************************************************************* */
+
+static int bgpnet_device_release (struct inode *inode, struct file * filp)
+{
+  struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data;
+
+  /*Ensure exclusive access*/
+  if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+  dev->current = NULL;
+  up(&dev->sem);
+
+  TRACE("bgpnet: device (%d,%d) successfully released",
+         MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
+  return 0;
+}
+
+
+static int bgpnet_device_ioctl (struct inode *inode,
+                                struct file * filp,
+                                unsigned int cmd,
+                                unsigned long arg)
+{
+  return 0;
+}
+
+static int bgtorus_mappable_module_init(void)
+{
+/*     unsigned long long tr0, tr1; */
+    unsigned long long ts0, ts1;
+
+    TRACEN(k_t_init,"bgtorus_mappable_module_init") ;
+
+/*     tr0=((unsigned long long)_BGP_UA_COL0<<32)  + _BGP_PA_COL0; */
+/*     tr1=((unsigned long long)_BGP_UA_COL1<<32)  + _BGP_PA_COL1; */
+    ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0;
+    ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1;
+
+//  bgptorus_g0, and bgptorus_g1 are added in 'bluegene_networks.c'; duplicating here gets warning messages
+//    bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0, -1, NULL);
+//    bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1, -1, NULL);
+
+    mb();
+
+    return 0;
+
+}
+
+
+/* module_init(bgtorus_mappable_module_init); */
+
+module_init(torus_module_init);
+module_exit(torus_module_exit);
+
+
diff --git a/fs/Kconfig b/fs/Kconfig
index 93945dd..47927a4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -170,7 +170,7 @@
 
 config HUGETLBFS
 	bool "HugeTLB file system support"
-	depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
+	depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BLUEGENE || \
 		   (S390 && 64BIT) || BROKEN
 	help
 	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 33b7235..c0ab8ed 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -42,6 +42,39 @@
 #include <asm/param.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+
+static DEFINE_MUTEX(zepto_task_mutex);
+
+int bgWriteConsoleBlockDirect(const char* fmt,...);
+#define  BIGMEM_FAIL_MSG(msg)     bgWriteConsoleBlockDirect("bigmem process failed to exec: %s binfmt_elf.c(%d)\n",(msg),__LINE__)
+
+void print_mmaps(const char* label, int lineno)
+{
+    if(zepto_debug_level>1 ) { 
+	if( IS_ZEPTO_TASK(current) ) {
+	    struct mm_struct *mm = current->mm;
+	    struct vm_area_struct *vma;
+	    int cnt=0;
+
+	    down_read(&mm->mmap_sem);
+	    vma = mm->mmap;
+	    while(vma) {
+		zepto_debug(2,"%d.%08lx:%08lx  ",
+		       cnt++,vma->vm_start, vma->vm_end );
+		vma = vma->vm_next;
+	    }
+	    zepto_debug(2,"print_mmaps %s(%d)\n",label, lineno);
+	    up_read(&mm->mmap_sem);
+	}
+    }
+}
+
+#define  Z  if(IS_ZEPTO_TASK(current)) {zepto_debug(2,"%s(%d)\n",__FILE__,__LINE__)}
+#endif
+
+
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
@@ -86,14 +119,23 @@
 {
 	start = ELF_PAGEALIGN(start);
 	end = ELF_PAGEALIGN(end);
-	if (end > start) {
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( enable_bigmem&&IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"skip do_brk(%08lx,%08lx)  binfmt_elf.c(%d)\n",
+		   start, end - start, __LINE__);
+	} else {
+#endif
+	   if (end > start) {
 		unsigned long addr;
 		down_write(&current->mm->mmap_sem);
 		addr = do_brk(start, end - start);
 		up_write(&current->mm->mmap_sem);
 		if (BAD_ADDR(addr))
 			return addr;
+	   }
+#ifdef CONFIG_ZEPTO_MEMORY
 	}
+#endif
 	current->mm->start_brk = current->mm->brk = end;
 	return 0;
 }
@@ -161,7 +203,42 @@
 	int ei_index = 0;
 	const struct cred *cred = current_cred();
 	struct vm_area_struct *vma;
+#ifdef CONFIG_ZEPTO_MEMORY
+	unsigned long orig_p = p;
+	unsigned long stack_vma_end = 0;
+	unsigned long bigmem_stack_adj = 0;
 
+	/* find stack vma and re-calculate sp for bigmem */
+	if( IS_ZEPTO_TASK(current) ) {
+	    struct mm_struct *mm = current->mm;
+	    struct vm_area_struct *vma;
+	    unsigned stackused;
+
+	    down_read(&mm->mmap_sem);
+	    vma = mm->mmap;
+	    while(vma) {
+		if( vma->vm_end >  stack_vma_end ) 
+		    stack_vma_end = vma->vm_end;
+		vma = vma->vm_next;
+	    }
+	    up_read(&mm->mmap_sem);
+
+
+	    stackused = stack_vma_end - p;
+	    p = get_bigmem_region_end() - stackused;
+
+	    /* copy argv, envp str content from stack vma to bigmem */
+	    if( copy_from_user((void*)p, (void*)orig_p, stackused ) ) {
+		BIGMEM_FAIL_MSG("copy_from_user");
+		return -EFAULT;
+	    }
+
+	    bigmem_stack_adj = stack_vma_end - get_bigmem_region_end();
+
+	    zepto_debug(2,"stack_vma_end=%08lx  bigmem_end=%08x\n", 
+			stack_vma_end,  get_bigmem_region_end() );
+	}
+#endif
 	/*
 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
 	 * evictions by the processes running on the same package. One
@@ -273,15 +350,23 @@
 	sp = (elf_addr_t __user *)bprm->p;
 #endif
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"sp=%p bprm->p=%p  %s@binfmt_elf.c(%d)\n",
+			(void*)sp, (void*)bprm->p, __func__,__LINE__);
+	    print_mmaps("stack vma", __LINE__);
+	}
+#endif
 
 	/*
 	 * Grow the stack manually; some architectures have a limit on how
 	 * far ahead a user-space access may be in order to grow the stack.
 	 */
-	vma = find_extend_vma(current->mm, bprm->p);
+  	vma = find_extend_vma(current->mm, bprm->p);
 	if (!vma)
 		return -EFAULT;
 
+
 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
 	if (__put_user(argc, sp++))
 		return -EFAULT;
@@ -290,8 +375,19 @@
 
 	/* Populate argv and envp */
 	p = current->mm->arg_end = current->mm->arg_start;
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    /* arg and env strings already copied into bigmem region */
+	    p -= bigmem_stack_adj;
+	}
+#endif
 	while (argc-- > 0) {
 		size_t len;
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"argc=%d p=%08lx *p=%02x\n", argc, p, *((char*)p) );
+	}
+#endif
 		if (__put_user((elf_addr_t)p, argv++))
 			return -EFAULT;
 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
@@ -302,6 +398,12 @@
 	if (__put_user(0, argv))
 		return -EFAULT;
 	current->mm->arg_end = current->mm->env_start = p;
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"envp p=%p  %s@binfmt_elf.c(%d)\n",
+			(void*)p, __func__,__LINE__);
+	}
+#endif
 	while (envc-- > 0) {
 		size_t len;
 		if (__put_user((elf_addr_t)p, envp++))
@@ -317,8 +419,14 @@
 
 	/* Put the elf_info on the stack in the right place.  */
 	sp = (elf_addr_t __user *)envp + 1;
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"auxv sp=%p  %s@binfmt_elf.c(%d)\n",
+			sp, __func__,__LINE__);
+	}
+#endif
 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
-		return -EFAULT;
+	    return -EFAULT;
 	return 0;
 }
 
@@ -339,6 +447,20 @@
 	if (!size)
 		return addr;
 
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    if ( off ) {
+		zepto_debug(2,"elf_map()  calls do_mmap() addr=%08lx  len=%08lx off=%08lx  fs/binfmt_elf.c(%d)\n",
+		       ELF_PAGESTART(addr),
+			    size, off,__LINE__ );
+	    } else {
+		zepto_debug(2,"map_addr=%08lx  fs/binfmt_elf.c(%d)\n",
+		       ELF_PAGESTART(addr), __LINE__ );
+	    }
+	}
+#endif
+
 	down_write(&current->mm->mmap_sem);
 	/*
 	* total_size is the size of the ELF (interpreter) image.
@@ -524,6 +646,12 @@
 
 	/* Map the last of the bss segment */
 	if (last_bss > elf_bss) {
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"do_brk()  start=%08lx len=%08lx  fs/binfmt_elf.c(%d)\n",
+		   elf_bss, last_bss - elf_bss, __LINE__);
+	}
+#endif
 		down_write(&current->mm->mmap_sem);
 		error = do_brk(elf_bss, last_bss - elf_bss);
 		up_write(&current->mm->mmap_sem);
@@ -567,6 +695,79 @@
 #endif
 }
 
+
+#ifdef CONFIG_ZEPTO_MEMORY
+static int zepto_task_init_bigmem(unsigned bigmem_start)
+{
+	struct vm_area_struct *mpnt;
+	struct mm_struct *mm = current->mm;
+
+	init_bigmem_tlb( bigmem_start );
+	install_bigmem_tlb(); 
+	fill_zero_bigmem();
+
+	/*
+	   Create a vma to cover a bigmem region.  bigmem region is covered
+	   by semi-statically installed TLBs
+	   */
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	if (!mpnt) {
+	    BIGMEM_FAIL_MSG("kmem_cache_alloc");
+		return -EFAULT;
+	}
+	memset(mpnt, 0, sizeof(*mpnt));
+
+	down_write(&mm->mmap_sem);
+
+	mpnt->vm_mm = mm;
+	mpnt->vm_start = get_bigmem_region_start() ;
+	mpnt->vm_end   = get_bigmem_region_end() ;
+
+	mpnt->vm_flags = MAP_FIXED|MAP_PRIVATE|VM_IO|VM_DONTEXPAND|VM_RESERVED|VM_PFNMAP;
+	mpnt->vm_page_prot = PROT_WRITE|PROT_READ|PROT_EXEC;
+
+	if(insert_vm_struct(mm, mpnt)) {
+	    BIGMEM_FAIL_MSG("insert_vm_struct");
+		up_write(&mm->mmap_sem);
+		kmem_cache_free(vm_area_cachep, mpnt);
+		return -EFAULT;
+	}
+
+	mm->stack_vm = mm->total_vm = vma_pages(mpnt);  /* NOTE: stack_vm is used basically for accounting */
+
+	up_write(&current->mm->mmap_sem);
+
+	zepto_debug(2,"vma[0x%08x,0x%08x) is inserted  binfmt_elf.c(%d)\n",
+			get_bigmem_region_start(), get_bigmem_region_end(),__LINE__);
+
+	return 0;
+}
+
+
+
+static void zepto_task_init_sched_affinity(int coreid) 
+{
+	cpumask_t mask;
+	int prev_cid = smp_processor_id();
+	zepto_debug(1,"bigmem_process_new() => %d\n", coreid);
+	
+	SET_ZEPTO_TASK(current, 1);
+	//set_tsk_thread_flag(current, TIF_USER_NOINT);
+
+	cpus_clear(mask);
+	cpu_set(coreid, mask);
+	sched_setaffinity(current->pid, &mask);  
+	yield();
+
+	zepto_debug(1,"A zepto task(pid=%d) is created cid=%d prev_cid=%d\n", current->pid, 
+		    smp_processor_id(), prev_cid);
+}
+
+
+
+#endif /* CONFIG_ZEPTO_MEMORY */
+
+
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
@@ -764,6 +965,20 @@
 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* Check see if this is Zepto task or not. */
+	if(loc->elf_ex.e_flags & ZEPTO_ELF_HDR_FLAG ) {
+	    if( bigmem_process_all_active() ) {
+		BIGMEM_FAIL_MSG("bigmem_process_all_active");
+		goto out_free_dentry;
+	    } else {
+			int coreid = bigmem_process_new();
+			zepto_debug(2, "bigmem cid=%d\n",coreid);
+			zepto_task_init_sched_affinity(coreid);
+	    }
+	}
+#endif
+
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
 	arch_pick_mmap_layout(current->mm);
@@ -772,15 +987,40 @@
 	   change some of these later */
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	print_mmaps("Prepare stack   binfmt_elf.c",__LINE__);
+
+	if(enable_bigmem && IS_ZEPTO_TASK(current) ) {
+
+		if (zepto_task_init_bigmem(loc->elf_ex.e_entry)) {
+		    BIGMEM_FAIL_MSG("zepto_task_init_bigmem");
+
+			send_sig(SIGKILL, current, 0);
+			goto out_free_dentry;
+		}
+	} 
+#endif /* CONFIG_ZEPTO_MEMORY */
+
+	/* NOTE: On bigmem, the contents of the arg pages is copied to the
+	 * bigmem region in create_elf_tables() */
+
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 				 executable_stack);
 	if (retval < 0) {
-		send_sig(SIGKILL, current, 0);
-		goto out_free_dentry;
+	    send_sig(SIGKILL, current, 0);
+	    goto out_free_dentry;
 	}
-	
+
 	current->mm->start_stack = bprm->p;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"current->mm start_stack=0x%08lx  fs/binfmt_elf.c(%d)\n",
+		   current->mm->start_stack, __LINE__ );
+	}
+#endif
+
 	/* Now we do a little grungy work by mmaping the ELF image into
 	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
@@ -797,6 +1037,16 @@
 			/* There was a PT_LOAD segment with p_memsz > p_filesz
 			   before this one. Map anonymous pages, if needed,
 			   and clear the area.  */
+#ifdef CONFIG_ZEPTO_MEMORY
+			if( IS_ZEPTO_TASK(current) ) {
+			    BIGMEM_FAIL_MSG("unlikely_brk");
+			    //printk(KERN_ERR "[Z] unlikely_brk()  start=%08lx end=%08lx  fs/binfmt_elf.c(%d)\n",
+				   // elf_bss + load_bias, elf_brk + load_bias, __LINE__);
+			    retval = -ENOEXEC;
+			    send_sig(SIGKILL, current, 0);
+			    goto  out_free_dentry;
+			}
+#endif
 			retval = set_brk (elf_bss + load_bias,
 					  elf_brk + load_bias);
 			if (retval) {
@@ -843,14 +1093,79 @@
 #endif
 		}
 
-		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
+#ifdef CONFIG_ZEPTO_MEMORY
+		if( enable_bigmem && IS_ZEPTO_TASK(current) ) {
+		    unsigned pageoffset = ELF_PAGEOFFSET(elf_ppnt->p_vaddr);
+		    unsigned s_off_file = elf_ppnt->p_offset - pageoffset;
+		    unsigned vaddr = elf_ppnt->p_vaddr;
+		    int i;
+		    int retry;
+		    /*
+		      load text and data section into bigmem
+		    */
+		    
+		    zepto_debug(2,"Elf section copying: pageoffset=%08x s_off_file=%08x  va=%08x size=%08x fs/binfmt_elf.c(%d)\n",
+			   pageoffset, s_off_file,  elf_ppnt->p_vaddr,  elf_ppnt->p_filesz, __LINE__ );
+
+		    print_mmaps("Reading bigmem text",__LINE__);
+
+		    mutex_lock(&zepto_task_mutex);
+		    for(i=0; i<elf_ppnt->p_filesz/PAGE_SIZE; i++ ) {
+			// zepto_debug(2,"kernel_read() off=%08x vadd=%08x size=%08lx\n", s_off_file, vaddr, PAGE_SIZE );
+			for(retry=0;retry<10;retry++) {
+			    retval = kernel_read(bprm->file, s_off_file, (void*)vaddr, PAGE_SIZE );
+			    if(retval ==  PAGE_SIZE ) break;
+			    schedule();
+			}
+
+			if(retval !=  PAGE_SIZE ) {
+			    mutex_unlock(&zepto_task_mutex);
+			    BIGMEM_FAIL_MSG("kernel_read");
+			    //printk(KERN_ERR "[Z] kernel_read() failed. retval=%d %s\n", retval,
+			    //(retval==-EBADF)?"BADF":(retval==-EINVAL)?"INVAL":(retval==-EFAULT)?"FAULT":"unknown");
+
+			    if (retval >= 0)
+				retval = -EIO;
+			    goto out_free_ph;
+			}
+			s_off_file += PAGE_SIZE;
+			vaddr += PAGE_SIZE;
+		    }
+
+		    //  zepto_debug(2,"kernel_read() off=%08x vadd=%08x size=%08lx\n", s_off_file, vaddr, (elf_ppnt->p_filesz % PAGE_SIZE) );
+		    for(retry=0;retry<10;retry++) {
+			retval = kernel_read(bprm->file, s_off_file, (void*)vaddr, (elf_ppnt->p_filesz % PAGE_SIZE) );
+			if( retval == (elf_ppnt->p_filesz % PAGE_SIZE) ) break;
+			schedule();
+		    }
+
+		    if(retval !=  (elf_ppnt->p_filesz % PAGE_SIZE) ) {
+			mutex_unlock(&zepto_task_mutex);
+			BIGMEM_FAIL_MSG("kernel_read");
+			//printk(KERN_ERR "[Z] kernel_read() failed. retval=%d size=%lu %s\n", retval,
+			//(elf_ppnt->p_filesz % PAGE_SIZE) ,
+			//(retval==-EBADF)?"BADF":(retval==-EINVAL)?"INVAL":(retval==-EFAULT)?"FAULT":"unknown");
+
+			if (retval >= 0)
+			    retval = -EIO;
+			goto out_free_ph;
+		    }
+		    mutex_unlock(&zepto_task_mutex);
+
+		    error = 0;
+		} else {
+#endif
+		   error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 				elf_prot, elf_flags, 0);
-		if (BAD_ADDR(error)) {
+		   if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?
 				PTR_ERR((void*)error) : -EINVAL;
 			goto out_free_dentry;
-		}
+		   }
+#ifdef CONFIG_ZEPTO_MEMORY
+		} /* IS_ZEPTO_TASK(current)  */
+#endif
 
 		if (!load_addr_set) {
 			load_addr_set = 1;
@@ -908,6 +1223,24 @@
 	 * mapping in the interpreter, to make sure it doesn't wind
 	 * up getting placed where the bss needs to go.
 	 */
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* XXX: fix the hard coded number and the bigmem layout!!! */
+	if(enable_bigmem && IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2, "set_brk() elf_bss=0x%08lx  elf_brk=0x%08lx load_bias=%08lx fs/binfmt_elf.c(%d)\n",
+		   elf_bss, elf_brk, load_bias, __LINE__);
+
+	    if( bigmem_mmap_init((elf_brk+0x04000000)&0xffff0000,
+			   (get_bigmem_region_end()-0x01000000)  )!=BIGMEM_MMAP_SUCCESS ) {
+		BIGMEM_FAIL_MSG("bigmem_mmap_init");
+		send_sig(SIGKILL, current, 0);
+		goto out_free_dentry;
+	    }
+	    zepto_debug(2, "bigmem_mmap_start=0x%08x bigmem_mmap_end=0x%08x\n",
+			get_bigmem_mmap_start() , get_bigmem_mmap_end()
+		);
+	}
+#endif
+
 	retval = set_brk(elf_bss, elf_brk);
 	if (retval) {
 		send_sig(SIGKILL, current, 0);
@@ -953,6 +1286,9 @@
 			goto out_free_dentry;
 		}
 	}
+#ifdef CONFIG_ZEPTO_MEMORY
+	print_mmaps("binfmt_elf.c",__LINE__);
+#endif
 
 	kfree(elf_phdata);
 
@@ -976,6 +1312,14 @@
 		send_sig(SIGKILL, current, 0);
 		goto out;
 	}
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"start_stack=%p after create_elf_tables()\n", (void*)bprm->p);
+	}
+#endif
+
+
 	/* N.B. passed_fileno might not be initialized? */
 	current->mm->end_code = end_code;
 	current->mm->start_code = start_code;
@@ -994,6 +1338,14 @@
 		   and some applications "depend" upon this behavior.
 		   Since we do not have the power to recompile these, we
 		   emulate the SVr4 behavior. Sigh. */
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	    if( IS_ZEPTO_TASK(current) ) {
+		zepto_debug(2,"do_mmap() addr=0  len=%08lx off=0  fs/binfmt_elf.c(%d)\n",
+		       PAGE_SIZE, __LINE__ );
+	    }
+#endif
+
 		down_write(&current->mm->mmap_sem);
 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE, 0);
@@ -1045,6 +1397,10 @@
 	int retval, error, i, j;
 	struct elfhdr elf_ex;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	print_mmaps("binfmt_elf.c",__LINE__);
+#endif
+
 	error = -ENOEXEC;
 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
 	if (retval != sizeof(elf_ex))
@@ -1083,6 +1439,16 @@
 	while (eppnt->p_type != PT_LOAD)
 		eppnt++;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	if( IS_ZEPTO_TASK(current) ) {
+	    zepto_debug(2,"do_mmap() addr=%08lx  len=%08lx off=%08lx  fs/binfmt_elf.c(%d)\n",
+		   ELF_PAGESTART(eppnt->p_vaddr),
+		   (eppnt->p_filesz +  ELF_PAGEOFFSET(eppnt->p_vaddr)),
+		   (eppnt->p_offset -  ELF_PAGEOFFSET(eppnt->p_vaddr)),
+		   __LINE__);
+	}
+#endif
+
 	/* Now use mmap to map the library into memory. */
 	down_write(&current->mm->mmap_sem);
 	error = do_mmap(file,
@@ -1094,6 +1460,11 @@
 			(eppnt->p_offset -
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
 	up_write(&current->mm->mmap_sem);
+
+#ifdef CONFIG_ZEPTO_MEMORY
+	print_mmaps("binfmt_elf.c",__LINE__);
+#endif
+
 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
 		goto out_free_ph;
 
@@ -1113,6 +1484,10 @@
 	}
 	error = 0;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	print_mmaps("binfmt_elf.c",__LINE__);
+#endif
+
 out_free_ph:
 	kfree(elf_phdata);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 672368f..5ee8961 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -230,8 +230,11 @@
 	if (desc->timestamp_valid) {
 		desc->entry->fattr->time_start = desc->timestamp;
 		desc->entry->fattr->gencount = desc->gencount;
-	} else
+	}
+#if !defined(CONFIG_BGP_NFS_FIX)
+	else
 		desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
+#endif
 	return 0;
 }
 
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 43d2394..a747ec7 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -15,6 +15,10 @@
 #include <asm/pgtable.h>
 #include "internal.h"
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_bigmem.h>
+#endif
+
 void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 {
 }
@@ -53,6 +57,9 @@
 	 * Tagged format, for easy grepping and expansion.
 	 */
 	seq_printf(m,
+#ifdef CONFIG_ZEPTO_MEMORY
+                "BigMem:         %8lu kB\n"
+#endif
 		"MemTotal:       %8lu kB\n"
 		"MemFree:        %8lu kB\n"
 		"Buffers:        %8lu kB\n"
@@ -98,6 +105,9 @@
 		"VmallocTotal:   %8lu kB\n"
 		"VmallocUsed:    %8lu kB\n"
 		"VmallocChunk:   %8lu kB\n",
+#ifdef CONFIG_ZEPTO_MEMORY
+                (unsigned long)(get_bigmem_size())/1024,
+#endif
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
diff --git a/include/linux/KernelFxLog.h b/include/linux/KernelFxLog.h
new file mode 100644
index 0000000..3766013
--- /dev/null
+++ b/include/linux/KernelFxLog.h
@@ -0,0 +1,35 @@
+#ifndef __KernelFxLogger_h__
+#define __KernelFxLogger_h__
+
+static const char * FindShortPathName(const char *PN, unsigned int length)  __attribute__ ((unused)) ;
+static const char * FindShortPathName(const char *PN, unsigned int length)
+  {
+  int slashcount = 0;
+  int i;
+  for( i = length-1; i >= 0 ; i-- )
+    {
+    if( PN[i] == '/' )
+      {
+      slashcount++;
+      if( slashcount == 3 )
+        break;
+      }
+    }
+  return PN+i ;
+  }
+
+
+#define KernelFxLog(dbgcat, fmt, args...)     \
+  do {                \
+    if(dbgcat)        \
+    {                 \
+      static const char filename[] = __FILE__ ;  \
+      printk(KERN_INFO " %5d %1X ..%20s %4d %30s() " fmt "\n",   \
+          current->pid,     \
+          current_thread_info()->cpu, \
+          FindShortPathName(filename,sizeof(filename)), __LINE__, __FUNCTION__, ## args);    \
+          }     \
+  } while (0)
+
+
+#endif
diff --git a/include/linux/alignment_histograms.h b/include/linux/alignment_histograms.h
new file mode 100644
index 0000000..484d1d6
--- /dev/null
+++ b/include/linux/alignment_histograms.h
@@ -0,0 +1,38 @@
+#ifndef _LINUX_ALIGNMENT_HISTOGRAM_H
+#define _LINUX_ALIGNMENT_HISTOGRAM_H
+
+#include <linux/autoconf.h>
+
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+
+enum {
+	k_histogram_size=16
+};
+struct alignment_histogram {
+	int src_alignment_histogram_crc[k_histogram_size] ;
+	int dst_alignment_histogram_crc[k_histogram_size] ;
+	int rel_alignment_histogram_crc[k_histogram_size] ;
+	int src_alignment_histogram_copy[k_histogram_size] ;
+	int dst_alignment_histogram_copy[k_histogram_size] ;
+	int rel_alignment_histogram_copy[k_histogram_size] ;
+	int tagged[k_histogram_size] ;
+	long long int qcopybytes ;
+	long long int copybytes ;
+	long long int copybytesshort ;
+	long long int copybytesmisalign ;
+	long long int copybytesbroke ;
+	long long int crcbytes ;
+	long long int csumpartialbytes ;
+	int min_size_of_interest ;
+};
+extern struct alignment_histogram al_histogram ;
+
+#define INC_AL_HISTOGRAM(Name,Address,Size) \
+	{ if((Size) >= al_histogram.min_size_of_interest) { al_histogram.Name[(Address)&(k_histogram_size-1)] += 1 ; } }
+#define AL_HISTOGRAM(Name,Index) (al_histogram.Name[(Index)&(k_histogram_size-1)])
+#else
+#define INC_AL_HISTOGRAM(Name,Address,Size)
+#define AL_HISTOGRAM(Name,Index) 0
+#endif
+
+#endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9f31538..a72bcae 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -646,7 +646,7 @@
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
  * @mask: the first cpumask pointer
- * @and: the second cpumask pointer
+ * @andmask: the second cpumask pointer
  *
  * This saves a temporary CPU mask in many places.  It is equivalent to:
  *	struct cpumask tmp;
@@ -656,9 +656,9 @@
  *
  * After the loop, cpu is >= nr_cpu_ids.
  */
-#define for_each_cpu_and(cpu, mask, and)				\
+#define for_each_cpu_and(cpu, mask, andmask)				\
 	for ((cpu) = -1;						\
-		(cpu) = cpumask_next_and((cpu), (mask), (and)),		\
+		(cpu) = cpumask_next_and((cpu), (mask), (andmask)),	\
 		(cpu) < nr_cpu_ids;)
 #endif /* SMP */
 
diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
index e576b84..eeb3fb4 100644
--- a/include/linux/kmalloc_sizes.h
+++ b/include/linux/kmalloc_sizes.h
@@ -19,27 +19,34 @@
 	CACHE(32768)
 	CACHE(65536)
 	CACHE(131072)
-#if KMALLOC_MAX_SIZE >= 262144
+#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU)
 	CACHE(262144)
 #endif
-#if KMALLOC_MAX_SIZE >= 524288
+#ifdef CONFIG_BGP
+/*  Intended for 'large' allocations of DMA buffers at boot time, because I cannot get bootmem_alloc to work */
+        CACHE(262144)
 	CACHE(524288)
-#endif
-#if KMALLOC_MAX_SIZE >= 1048576
 	CACHE(1048576)
-#endif
-#if KMALLOC_MAX_SIZE >= 2097152
 	CACHE(2097152)
-#endif
-#if KMALLOC_MAX_SIZE >= 4194304
 	CACHE(4194304)
-#endif
-#if KMALLOC_MAX_SIZE >= 8388608
+#if defined(CONFIG_HUGE_KMALLOC)
 	CACHE(8388608)
-#endif
-#if KMALLOC_MAX_SIZE >= 16777216
 	CACHE(16777216)
-#endif
-#if KMALLOC_MAX_SIZE >= 33554432
 	CACHE(33554432)
+        CACHE(67108864)
+        CACHE(134217728)
+        CACHE(268435456)
+        CACHE(536870912)
 #endif
+#endif
+#ifndef CONFIG_MMU
+	CACHE(524288)
+	CACHE(1048576)
+#ifdef CONFIG_LARGE_ALLOCS
+	CACHE(2097152)
+	CACHE(4194304)
+	CACHE(8388608)
+	CACHE(16777216)
+	CACHE(33554432)
+#endif /* CONFIG_LARGE_ALLOCS */
+#endif /* CONFIG_MMU */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1aca6ce..83e31f4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -21,7 +21,11 @@
 
 /* Free memory management - zoned buddy allocator.  */
 #ifndef CONFIG_FORCE_MAX_ZONEORDER
+#ifdef CONFIG_HUGETLB_PAGE
+#define MAX_ORDER 13
+#else
 #define MAX_ORDER 11
+#endif
 #else
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e6..8e9e437 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -62,7 +62,11 @@
  * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
  * and other sensitive information are never written to disk.
  */
+#if defined(CONFIG_INFINIBAND)
+#define MLOCK_LIMIT (10*1024*PAGE_SIZE)
+#else
 #define MLOCK_LIMIT	((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024)
+#endif
 
 /*
  * Due to binary compatibility, the actual resource numbers
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9dcf956..69e7b70 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -124,8 +124,13 @@
 
 struct sk_buff;
 
+#if defined(CONFIG_BGP)
+/* Set 'high' to give scope for ZRL 'soft Iwarp' over the BlueGene torus */
+#define MAX_SKB_FRAGS 18
+#else
 /* To allow 64K frame to be packed as single skb without frag_list */
 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+#endif
 
 typedef struct skb_frag_struct skb_frag_t;
 
@@ -1706,7 +1711,12 @@
 						    const int offset, void *to,
 						    const unsigned int len)
 {
+#if defined(CONFIG_BGP_TORUS)
+    /* This version of 'copy' will use the BlueGene floating point unit when appropriate */
+    __copy_tofrom_user(to, skb->data + offset, len) ;
+#else
 	memcpy(to, skb->data + offset, len);
+#endif
 }
 
 static inline void skb_copy_to_linear_data(struct sk_buff *skb,
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 6a40c76..18b8dcf 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -12,10 +12,13 @@
 
 #ifdef __KERNEL__
 
+/* C++ preprocessor has 'false' and 'true' as keywords, so the enum doesn't work */
+#if !defined(__cplusplus)
 enum {
 	false	= 0,
 	true	= 1
 };
+#endif
 
 #undef offsetof
 #ifdef __compiler_offsetof
diff --git a/include/linux/zepto_bigmem.h b/include/linux/zepto_bigmem.h
new file mode 100644
index 0000000..29d9b54
--- /dev/null
+++ b/include/linux/zepto_bigmem.h
@@ -0,0 +1,83 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZEPTO_BIGMEM_H_DEFINED__
+#define __ZEPTO_BIGMEM_H_DEFINED__
+
+/* arch/powerpc/mm/zepto_bigmem.c */
+
+extern int  init_bigmem_tlb(unsigned entry);
+extern void fill_zero_bigmem(void);
+extern void free_bigmem_tlb(void);
+extern int  install_bigmem_tlb(void);
+extern int  in_bigmem(unsigned address);
+extern void init_bigmem_pa(void);
+
+/* this is for DMA region */
+extern unsigned long long get_entire_bigmem_pa_start(void);
+extern unsigned long long get_entire_bigmem_pa_end(void); 
+
+/* function for per proc */
+extern unsigned get_bigmem_region_start(void); 
+extern unsigned get_bigmem_region_end(void);
+extern unsigned get_bigmem_size(void);
+extern unsigned get_bigmem_pa_start(void);
+extern unsigned get_bigmem_pa_end(void);
+extern unsigned bigmem_virt2phy(unsigned long va);
+extern unsigned bigmem_virt2phy_cid(unsigned long va,int cid);
+
+
+
+extern void bigmem_process_reset(void);
+extern int  bigmem_process_new(void);
+extern int  bigmem_process_release(void);
+extern int  bigmem_process_active_count(void);
+extern int  bigmem_process_all_active(void);
+
+
+
+/* arch/powerpc/syslib/bgdd/zepto_task.c   */
+extern int  enable_bigmem;           
+
+/* arch/powerpc/syslib/bgdd/zepto_setup_treeroute.c */
+extern int  bigmem_nprocs_per_node;  
+
+static inline int bigmem_process_cid(void) {
+    if(bigmem_nprocs_per_node==4) 	
+	return smp_processor_id();
+    else if(bigmem_nprocs_per_node==2) 	
+	return smp_processor_id()&0x2; /* core 0 and 2 will be used */
+    else
+	return 0;
+}
+
+/* defined in arch/powerpc/mm/init_32.c */
+extern unsigned long __bigmem_size ;   /* total physical memory for bigmem */
+
+extern int bgp4GB; /* =1 if BGP has 4GB of memory, otherwise we assume BGP memory size is 2GB */
+
+#endif
diff --git a/include/linux/zepto_bigmem_mmap.h b/include/linux/zepto_bigmem_mmap.h
new file mode 100644
index 0000000..6249c58
--- /dev/null
+++ b/include/linux/zepto_bigmem_mmap.h
@@ -0,0 +1,57 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+
+#ifndef __ZEPTO_BIGMEM_MMAP_H_DEFINED__
+#define __ZEPTO_BIGMEM_MMAP_H_DEFINED__
+
+typedef enum {
+    BIGMEM_MMAP_SUCCESS,
+    BIGMEM_MMAP_FAILURE,
+} BIGMEM_MMAP_status;
+
+#define  BIGMEM_MMAP_ALLOCATION_FAILURE   ((unsigned)-1)
+
+unsigned    allocate_bigmem_mmap_section(unsigned len);
+BIGMEM_MMAP_status remove_bigmem_mmap_section(unsigned addr);
+
+unsigned    get_bigmem_mmap_start(void);
+unsigned    get_bigmem_mmap_end(void);
+
+BIGMEM_MMAP_status update_bigmem_mmap_start(unsigned addr);
+BIGMEM_MMAP_status update_bigmem_mmap_end(unsigned addr);
+
+BIGMEM_MMAP_status bigmem_mmap_init(unsigned start, unsigned end);
+BIGMEM_MMAP_status bigmem_mmap_finalize(void);
+
+struct bigmem_mmap_section_struct            /* bigmem mapping area */
+{ 
+    struct rb_node  rb_node;
+    unsigned start,end;       /* section [start,end) */
+};
+
+#endif
diff --git a/include/linux/zepto_debug.h b/include/linux/zepto_debug.h
new file mode 100644
index 0000000..1a61e29
--- /dev/null
+++ b/include/linux/zepto_debug.h
@@ -0,0 +1,43 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZEPTO_DEBUG__
+#define __ZEPTO_DEBUG__
+
+#ifdef CONFIG_ZEPTO_DEBUG 
+
+extern int zepto_debug_level; /* defined in arch/powerpc/kernel/setup_32.c */
+
+#define zepto_debug(level,format,...)  if(level<=zepto_debug_level) { printk("Z: " format,##__VA_ARGS__); }
+
+#else
+
+#define zepto_debug(a,...)
+
+#endif
+
+#endif
diff --git a/include/linux/zepto_task.h b/include/linux/zepto_task.h
new file mode 100644
index 0000000..5659a91
--- /dev/null
+++ b/include/linux/zepto_task.h
@@ -0,0 +1,63 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZEPTO_TASK_H_DEFINED__
+#define __ZEPTO_TASK_H_DEFINED__
+
+#include <linux/zepto_debug.h>
+
+/*  This value may be used in the future version of the official Linux kernel.
+    See <linux/personality.h>
+*/
+#define PER_ZEPTO_TASK 0x0020000
+
+#define SET_ZEPTO_TASK(task, val)           \
+           do {     \
+                if (val)                                      \
+                       task->personality |= PER_ZEPTO_TASK;    \
+               else                                           \
+                       task->personality &= ~PER_ZEPTO_TASK;   \
+           } while(0)
+
+#define IS_ZEPTO_TASK(task) ((task->personality&PER_ZEPTO_TASK)!=0)
+
+#define  ZEPTO_ELF_HDR_FLAG   0x00000080    /* e_flags */
+
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_bigmem.h>
+#include <linux/zepto_bigmem_mmap.h>
+#endif
+
+extern int zepto_task_error(const char* fmt,...);  
+/* 
+defined in arch/powerpc/syslib/bgdd/zepto_task.c 
+
+This function is used to print a critical error from a zepto task.
+*/
+
+#endif
+
diff --git a/include/net/tcp_hiatus.h b/include/net/tcp_hiatus.h
new file mode 100644
index 0000000..7b61940
--- /dev/null
+++ b/include/net/tcp_hiatus.h
@@ -0,0 +1,31 @@
+#ifndef _NET_TCP_HIATUS_H
+#define _NET_TCP_HIATUS_H
+
+/*
+ * Attempt to streamline TCP. Gather statistics on tx sleeps
+ */
+enum {
+	k_tcp_launched,  /*  Number of frames launched */
+	k_tcp_wait_for_sndbuf,
+	k_tcp_wait_for_memory,
+	k_tcp_defer_mtu_probe,
+	k_tcp_defer_cwnd_quota,
+	k_tcp_defer_snd_wnd,
+	k_tcp_defer_nagle,
+	k_tcp_defer_should,
+	k_tcp_defer_fragment,
+	k_tcp_launch_failed,
+	k_tcp_hiatus_reasons
+};
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+extern int tcp_hiatus_counts[k_tcp_hiatus_reasons] ;
+#endif
+
+static inline void increment_tcp_hiatus_count(int X)
+{
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+	tcp_hiatus_counts[X] += 1 ;
+#endif
+}
+
+#endif
diff --git a/include/zepto/VirtualMap.h b/include/zepto/VirtualMap.h
new file mode 100644
index 0000000..d0e45d5
--- /dev/null
+++ b/include/zepto/VirtualMap.h
@@ -0,0 +1,50 @@
+#ifndef __VIRTUALMAP_H_DEFINED__
+#define __VIRTUALMAP_H_DEFINED__
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+
+#define _BGP_VA_SCRATCH    0xFE000000
+
+#define _BGP_VA_BLIND      0xFFF90000
+#define _BGP_VA_BLIND_TRANS  0xFFFA0000
+
+#define _BGP_VA_TORUS0      0xFFFB0000
+#define _BGP_VA_TORUS1      0xFFFC0000
+
+#define _BGP_VA_DMA          0xFFFD0000
+#define _BGP_VA_DMA0         0xFFFD0000
+#define _BGP_VA_DMA1         0xFFFD1000
+#define _BGP_VA_DMA2         0xFFFD2000
+#define _BGP_VA_DMA3         0xFFFD3000
+
+#define _BGP_VA_TREE0       0xFFFDC000
+#define _BGP_VA_TREE1       0xFFFDD000
+
+#define _BGP_VA_SRAM            0xFFFF8000
+#define _BGP_VA_SRAM0           0xFFFF8000
+#define _BGP_VA_SRAM1           0xFFFFC000
+#define _BGP_VA_SRAMECC         0xFFFE0000
+#define _BGP_VA_SRAM_UNCORRECTED   0xFFFE0000
+#define _BGP_VA_SRAM_ECC_ACCESS       0xFFFE8000
+#define _BGP_VA_SRAMERR             0xFFFDFC00
+
+#define _BGP_VA_LOCKBOX     0xFFFF0000
+#define _BGP_VA_LOCKBOX_SUP   0xFFFF0000
+#define _BGP_VA_LOCKBOX_USR  0xFFFF4000
+
+#define _BGP_VA_UPC        0xFFFDA000
+#define _BGP_VA_UPC_CTL     0xFFFDB000
+
+#define _BGP_VA_TOMAL      0xFFFD4000
+#define _BGP_VA_XEMAC      0xFFFD8000
+
+#define _BGP_VA_DEVBUS      0xFFFD9000
+#define _BGP_VA_BIC        0xFFFDE000
+
+
+__END_DECLS
+
+#endif  /* #ifndef __VIRTUALMAP_H_DEFINED__ */
diff --git a/include/zepto/bgp_NodeState.h b/include/zepto/bgp_NodeState.h
new file mode 100644
index 0000000..342c66f
--- /dev/null
+++ b/include/zepto/bgp_NodeState.h
@@ -0,0 +1,20 @@
+#ifndef	__BGP_NODESTATE_H_DEFINE__
+#define	__BGP_NODESTATE_H_DEFINE__
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+extern inline int _bgp_GetInitCore(void)
+{
+    return 0;
+}
+extern inline int _bgp_GetRunningCores(void)
+{
+    return 4;
+}
+
+
+__END_DECLS
+
+#endif 
diff --git a/include/zepto/zcl_spi.h b/include/zepto/zcl_spi.h
new file mode 100644
index 0000000..fbc14f3
--- /dev/null
+++ b/include/zepto/zcl_spi.h
@@ -0,0 +1,146 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZCL_SPI_H_DEFINED__
+#define __ZCL_SPI_H_DEFINED__
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#ifdef __ZCL_KERNEL__
+#error "This should not be included from Linux kernel!"
+#endif
+
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <bpcore/ppc450_inlines.h>
+
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+extern uint32_t zcl_PhysicalProcessorID(void);
+extern uint32_t zcl_ProcessCount(void);
+extern int zcl_ProcessorCount(void);
+
+extern unsigned  zcl_get_bigmemory_va_start(void);
+extern unsigned  zcl_get_bigmemory_pa_start(void);
+extern unsigned  zcl_get_bigmemory_len(void);
+extern int       zcl_virt2phy(unsigned long va, unsigned long *pa);
+
+extern int zcl_AllocateLockBox(uint32_t lockid, uint32_t numlocks,
+				  uint32_t** ptr, uint32_t flags);
+
+
+extern uint32_t  zcl_spi_CounterGroupQueryFree(uint32_t type, uint32_t group,
+					       uint32_t* n_subgroups,
+					       uint32_t* subgroups);
+
+extern uint32_t zcl_spi_CounterGroupAllocate(uint32_t type,
+                                      uint32_t group,
+                                      uint32_t num_subgroups,
+                                      uint32_t* subgroups,
+                                      uint32_t target,
+                                      uint32_t handler,
+                                      uint32_t* handler_parm,
+                                      uint32_t interruptGroup,
+					     uint32_t* _cg_ptr);
+
+
+extern uint32_t zcl_spi_InjFifoGroupQueryFree(
+    uint32_t group, 
+    uint32_t* num_fifos, 
+    uint32_t* fifo_ids);
+
+extern uint32_t zcl_spi_InjFifoGroupAllocate(   uint32_t group,
+						uint32_t num_fifos,
+						uint32_t* fifo_ids,
+						uint16_t* priorities,
+						uint16_t* locals,
+						uint8_t* ts_inj_maps,
+						uint32_t* fg_ptr );
+
+
+
+extern uint32_t zcl_spi_InjFifoInitById(uint32_t* fg_ptr,
+					int  fifo_id,
+					uint32_t* va_start,
+					uint32_t* va_head,
+					uint32_t* va_end);
+
+
+extern  uint32_t zcl_spi_RecFifoSetMap(uint32_t* rec_map);
+
+extern uint32_t zcl_spi_RecFifoGetFifoGroup(
+    uint32_t*                         fifogroup,
+    int                               group,
+    int                               target);
+
+extern uint32_t zcl_spi_RecFifoInitByID(
+    uint32_t*          fg_ptr,
+    int                fifo_id,
+    void               *va_start,
+    void               *va_head,
+    void               *va_end  );
+
+
+extern uint32_t zcl_spi_ChgCounterInterruptEnables(uint32_t enable);
+
+extern uint32_t zcl_spi_globalBarrier(unsigned msec);
+
+extern uint32_t zcl_spi_debug_tag(unsigned long tag);
+
+extern uint32_t zcl_spi_donothing(void);
+
+extern int zcl_getpersonality(char* personality, size_t size);
+
+extern int zcl_Coord2Rank(uint32_t xcoord, uint32_t ycoord, uint32_t zcoord, uint32_t tcoord, uint32_t* rank, uint32_t* numnodes);
+extern int zcl_Rank2Coord(uint32_t rank, uint32_t* xcoord, uint32_t* ycoord, uint32_t* zcoord, uint32_t* tcoord);
+
+#ifndef kernel_coords_t_defined
+typedef struct _Kernel_Coordinates {
+    unsigned char x;
+    unsigned char y;
+    unsigned char z;
+    unsigned char t;
+} kernel_coords_t;
+#define kernel_coords_t_defined
+#endif
+
+extern int zcl_Rank2Coords(kernel_coords_t* coordinates, uint32_t len);
+
+extern int zcl_rank(void);
+extern int zcl_size(void);
+
+
+__END_DECLS
+
+
+#endif /* #ifndef __ZCL_SPI_H_DEFINED__ */
diff --git a/include/zepto/zcl_trace.h b/include/zepto/zcl_trace.h
new file mode 100644
index 0000000..7f01fbd
--- /dev/null
+++ b/include/zepto/zcl_trace.h
@@ -0,0 +1,63 @@
+/****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZCL_TRACE_H_DEFINED__
+#define __ZCL_TRACE_H_DEFINED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    /* debuggin functions, macros */
+
+    typedef void  (*ZCL_TRACE_FUNC)(char* msg,...);
+
+    void   zcl_register_trace(ZCL_TRACE_FUNC func);
+    void   zcl_trace_start(unsigned level);
+    void   zcl_trace_stop(void);
+    char*  zcl_format(const char* fmt,...);
+    void   zcl_trace(unsigned level, char* fmt,...);
+    const char* zcl_basename(const char* p);
+    void   zcl_error(char* fmt,...);
+
+#define ZCL__FILE__  zcl_basename(__FILE__)
+
+#define ZCL_DEBUG  1
+#ifdef ZCL_DEBUG
+#define ZCL_TRACE(LEVEL,STRING)   zcl_trace((LEVEL),"%s@%s(%d): %s\n",__func__,ZCL__FILE__,__LINE__,(STRING))
+#else
+#define ZCL_TRACE(LEVEL,STRING)  do {} while(0);
+#endif
+
+
+#ifdef __cplusplus
+};
+#endif
+
+
+#endif
+
diff --git a/include/zepto/zepto_syscall.h b/include/zepto/zepto_syscall.h
new file mode 100644
index 0000000..b65bf31
--- /dev/null
+++ b/include/zepto/zepto_syscall.h
@@ -0,0 +1,154 @@
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+#ifndef __ZEPTO_SYSCALL_H_DEFINED__
+#define __ZEPTO_SYSCALL_H_DEFINED__
+
+/*
+  Systemcall related files:
+
+  include/asm-powerpc/unistd.h
+  include/zepto/zepto-syscall.h
+  arch/powerpc/kernel/systbl.S
+  arch/ppc/mm/zepto_bigmem.c
+  arch/ppc/syslib/bgdd/zepto_bluegene_dma.c
+  arch/ppc/syslib/bgdd/zepto_bluegene_lockbox.c
+*/
+
+#define ZEPTO_GENERIC_SYSCALL_NO     (1048)
+#define ZEPTO_BIGMEM_SYSCALL_NO      (1049)
+#define ZEPTO_LOCKBOX_SYSCALL_NO     (1050)
+#define ZEPTO_DMA_SYSCALL_NO         (1051)          
+
+
+/* sys_zepto_generic() in arch/ppc/mm/zepto_bigmem.c */
+enum { 
+    ZEPTOSC_NULL = 100,        /* return 0. do nothing */
+    ZEPTOSC_FLIP,              /* return (~val). this is used to check the zepto kernel */
+    ZEPTOSC_COREID,            /* return coreid. val is unused */
+    ZEPTOSC_ZEPTO_TASK,        /* return 1 if the current task is a zepto task */
+    ZEPTOSC_GETDEC,            /* return decrementer value */
+};
+
+/* sys_zepto_bigmem() in arch/ppc/mm/zepto_bigmem.c */
+enum {
+    ZEPTOSC_BIGMEM_N_SEGS = 200,     /* return n_segs(# of bigmem segments) (currently 1). val is unused */
+    ZEPTOSC_BIGMEM_VA_START,         /* val is the seg number.  [coreid*n_segs, coreid*(n_segs+1) ) */
+    ZEPTOSC_BIGMEM_PA_START,
+    ZEPTOSC_BIGMEM_LEN,
+    ZEPTOSC_SCRATCHPAD_VA_START = 300,
+    ZEPTOSC_SCRATCHPAD_PA_START,
+    ZEPTOSC_SCRATCHPAD_LEN,
+};
+
+/* sys_zepto_lockbox() in arch/ppc/syslib/bgdd/zepto_bluegene_lockbox.c */
+enum {
+    ZEPTOSC_LOCKBOX_ALLOCATE = 300,
+    ZEPTOSC_LOCKBOX_RESET,
+};
+
+/* sys_zepto_dma() in arch/ppc/syslib/bgdd/zepto_bluegene_dma.c */
+enum {
+    ZEPTOSC_DMA_COUNTERGROUPQUERYFREE = 400,
+    ZEPTOSC_DMA_COUNTERGROUPALLOCATE,
+    ZEPTOSC_DMA_INJFIFOGROUPQUERYFREE,
+    ZEPTOSC_DMA_INJFIFOGROUPALLOCATE,   
+    ZEPTOSC_DMA_INJFIFOINITBYID,
+    ZEPTOSC_DMA_RECFIFOSETMAP,
+    ZEPTOSC_DMA_RECFIFOGETFIFOGROUP,
+    ZEPTOSC_DMA_RECFIFOINITBYID,
+    ZEPTOSC_DMA_CHGCOUNTERINTERRUPTENABLES,
+};
+
+
+/* AllocateLockBox_struct is used in arch/ppc/syslib/bgdd/zepto_bluegene_lockbox.c */
+
+#define ALLOCATELOCKBOX_MAX_LOCK (32)  /* lockbox barrier requires 5 locks. */
+struct AllocateLockBox_struct
+{
+    unsigned   locknum;
+    unsigned   numlocks; 
+    unsigned   lockbox_va[ALLOCATELOCKBOX_MAX_LOCK]; 
+    unsigned   flags;
+};
+
+
+/* DMA related struct used in arch/ppc/syslib/bgdd/zepto_bluegene_dma.c */
+
+struct CounterGroupQueryFree_struct {
+    unsigned  type;
+    unsigned  group;
+    unsigned  n_subgroups;   /* is filled by kernel code */
+    unsigned  *subgroups;    /* pointer to an user buffer */
+};
+
+struct CounterGroupAllocate_struct {
+    unsigned  type;
+    unsigned  group;
+    unsigned  num_subgroups;
+    unsigned* subgroups;     /* points to an user buffer, read-only */
+    unsigned* cg_ptr;        /* points to a special buffer */
+};
+
+struct InjFifoGroupQueryFree_struct {
+    unsigned   group;
+    unsigned   num_fifos;     /* altered by kernel */
+    unsigned*  fifo_ids;      /* points to an user buffer, filled by kernel */
+};
+
+struct InjFifoGroupAllocate_struct {
+    unsigned  group;
+    unsigned  num_fifos;
+    unsigned* fifo_ids;     /* points to an user buffer */
+    unsigned short* priorities;   /* points to an user buffer */
+    unsigned short* locals;       /* points to an user buffer */
+    unsigned char*  ts_inj_maps;  /* points to an user buffer */
+    unsigned* fg_ptr;       /* points to an special buffer */
+};
+
+struct InjFifoInitByID_struct {
+    unsigned* fg_ptr;
+    int       fifo_id;
+    unsigned* va_start;    
+    unsigned* va_head;     
+    unsigned* va_end;      
+};
+
+struct RecFifoGetFifoGroup_struct {
+    unsigned*  fg_ptr;
+    int   group;
+    int   target;
+};
+
+struct RecFifoInitByID_struct {
+    unsigned*     fg_ptr;
+    int           fifo_id;
+    void          *va_start;
+    void          *va_head;
+    void          *va_end;
+};
+
+#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index efd30cc..793ded3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,11 @@
 #include <linux/init_task.h>
 #include <trace/sched.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_debug.h>
+#include <linux/zepto_task.h>
+#endif
+
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
@@ -1069,6 +1074,31 @@
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
 
+#ifdef CONFIG_ZEPTO_MEMORY
+        if( IS_ZEPTO_TASK(tsk) ) {
+		zepto_debug(2, "exit() is called from a compute task. pid=%d tpid=%d empty=%d is_leader=%d enable_bigmem=%d\n",
+			    tsk->pid, tsk->tgid, thread_group_empty(current), tsk == tsk->group_leader, enable_bigmem);
+
+		while ( delay_group_leader( tsk ) ) 
+			yield();
+			/* the thread leader is exiting, but the thread group is not empty yet */
+
+		if( thread_group_empty( tsk ) ) {
+			/* at this point, only the thread leader is still alive */
+			zepto_debug(1, "cleaning up flatmem: pid=%d, tpid=%d, empty=%d, is_leader=%d\n", 
+				    tsk->pid, tsk->tgid, thread_group_empty(tsk), tsk == tsk->group_leader);
+			/* computetask exit procedure must be called when the last thread exits */
+			bigmem_process_release();
+			if ( enable_bigmem ) {
+				if ( bigmem_mmap_finalize() != BIGMEM_MMAP_SUCCESS) {
+					printk(KERN_ALERT  "[Z] bigmem_mmap_finalize() failed.\n");
+				}
+				free_bigmem_tlb();
+			}
+			zepto_debug(1, "A zepto task exited.\n");
+		}
+	}
+#endif
 	exit_mm(tsk);
 
 	if (group_dead)
diff --git a/kernel/futex.c b/kernel/futex.c
index 438701a..2ea8a93 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -56,6 +56,10 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
+
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
@@ -219,6 +223,17 @@
 		return -EINVAL;
 	address -= key->both.offset;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* XXX: not sure this is enough */
+	if ( enable_bigmem && IS_ZEPTO_TASK(current) && 
+	     ( address >= get_bigmem_region_start() &&
+	       address < get_bigmem_region_end() ) ) 
+	{
+		key->private.mm = mm;
+		key->private.address = address;
+	}   
+#endif
+
 	/*
 	 * PROCESS_PRIVATE futexes are fast.
 	 * As the mm cannot disappear under us and the 'key' only needs
diff --git a/kernel/printk.c b/kernel/printk.c
index e3602d0..7191a6e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -34,6 +34,10 @@
 #include <linux/syscalls.h>
 
 #include <asm/uaccess.h>
+#if defined(CONFIG_BLUEGENE)
+#include <asm/time.h>
+unsigned long long printk_clock_aligner ;
+#endif
 
 /*
  * Architectures can override it:
@@ -49,7 +53,10 @@
 
 /* We show everything that is MORE important than this.. */
 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
-#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
+/*  Noisy kernel 7 */
+/* #define DEFAULT_CONSOLE_LOGLEVEL 7 */ /* anything MORE serious than KERN_DEBUG */
+/*  Quiet kernel 3 */
+#define DEFAULT_CONSOLE_LOGLEVEL 3 /* KERN_ERR */
 
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 
@@ -695,12 +702,20 @@
 				unsigned tlen;
 				unsigned long long t;
 				unsigned long nanosec_rem;
+				unsigned long tick_rem;
 
+#if defined(CONFIG_BLUEGENE)
+				t = get_tb() - printk_clock_aligner ;
+				tick_rem = do_div(t,850000000) ;
+        nanosec_rem = (tick_rem/17) * 20 ;
+#else
 				t = cpu_clock(printk_cpu);
 				nanosec_rem = do_div(t, 1000000000);
-				tlen = sprintf(tbuf, "[%5lu.%06lu] ",
+#endif
+				tlen = sprintf(tbuf, "[%5lu.%06lu]:%x ",
 						(unsigned long) t,
-						nanosec_rem / 1000);
+						nanosec_rem / 1000,
+						printk_cpu);
 
 				for (tp = tbuf; tp < tbuf + tlen; tp++)
 					emit_log_char(*tp);
@@ -713,7 +728,10 @@
 
 		emit_log_char(*p);
 		if (*p == '\n')
+			{
 			new_text_line = 1;
+				if( p[1] == '\n' ) p++ ; /* Don't double-line-space */
+			};
 	}
 
 	/*
@@ -1300,3 +1318,6 @@
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
 #endif
+#if defined(CONFIG_BLUEGENE)
+EXPORT_SYMBOL(printk_clock_aligner);
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1bcf9cd..7e08397 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -95,7 +95,8 @@
 
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
-	depends on UNDEFINED
+	depends on UNDEFINED || BLUEGENE
+	default y if BLUEGENE
 	# This option is on purpose disabled for now.
 	# It will be enabled when we are down to a resonable number
 	# of section mismatch warnings (< 10 for an allyesconfig build)
diff --git a/make.sh b/make.sh
new file mode 100644
index 0000000..0042cad
--- /dev/null
+++ b/make.sh
@@ -0,0 +1,199 @@
+#!/bin/sh
+#***************************************************************************
+# ZEPTOOS:zepto-info
+#      This file is part of ZeptoOS: The Small Linux for Big Computers.
+#      See www.mcs.anl.gov/zeptoos for more information.
+# ZEPTOOS:zepto-info
+#
+# ZEPTOOS:zepto-fillin
+#      $Id:  $
+#      ZeptoOS_Version: 2.0
+#      ZeptoOS_Heredity: FOSS_ORIG
+#      ZeptoOS_License: GPL
+# ZEPTOOS:zepto-fillin
+#
+# ZEPTOOS:zepto-gpl
+#       Copyright: Argonne National Laboratory, Department of Energy,
+#                  and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+#       ZeptoOS License: GPL
+#  
+#       This software is free.  See the file ZeptoOS/misc/license.GPL
+#       for complete details on your rights to copy, modify, and use this
+#       software.
+# ZEPTOOS:zepto-gpl
+#***************************************************************************
+#
+#
+# $ ID: $
+
+_SRCDIR=`dirname $0`
+SRCDIR=`cd $_SRCDIR ; pwd`
+
+cd $SRCDIR
+
+#
+# default value
+#
+# compute node is default for 2.6.29 kernel
+#
+CONFIG=./arch/powerpc/configs/44x/bgpzepto_defconfig
+
+CROSS_COMPILE=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-
+BUILD_DIR_PREFIX=build-
+
+usage() {
+cat - <<EOF
+
+Usage: $0 [options]
+
+options:
+ --help             Show the help message
+ --builddirpre=STR  build directory prefix
+ --cross=STR        cross compiler prefix
+ --config=STR       kernel config file
+ --ramdisk=STR      path of ramdisk(basically for CN)
+ --spi              path of spi headers( required to compile zepto bigmem)
+EOF
+}
+
+_getval() {  echo "$1" | sed -e 's/^[^=]*=//' ; }
+
+
+while test $# -gt 0 ; do
+   case $1 in
+    --help | -h )
+      usage; exit 0 ;;
+    --config=* )
+      CONFIG=`_getval "$1"`
+      shift
+      ;;
+    --cross=* )
+      CROSS_COMPILE=`_getval "$1"`
+      shift
+      ;;
+    --builddirpre=* )
+      BUILD_DIR_PREFIX=`_getval "$1"`
+      shift
+      ;;
+    --ramdisk=* )
+      ZEPTO_CN_RAMDISK=`_getval "$1"`
+      shift
+      ;;
+    --spi=* )
+      ZEPTO_SPI=`_getval "$1"`
+      shift
+      ;;
+   *) 
+     break
+     ;;
+   esac
+done
+
+
+if [ ! -z "$ZEPTO_CN_RAMDISK" ] ; then
+    if [ ! -f "$ZEPTO_CN_RAMDISK" ] ; then
+	echo "$ZEPTO_CN_RAMDISK not found"
+	usage 
+	exit 1
+    fi
+fi
+
+
+
+# basic sanity check
+
+if [ ! -f $CONFIG ] ; then
+    echo "$CONFIG does not exist"
+    exit 1
+fi
+
+
+if [ ! -x ${CROSS_COMPILE}gcc ] ; then
+    echo "${CROSS_COMPILE} is not valid"
+fi 
+
+CONFIGNAME=`basename $CONFIG`
+_BUILDDIR=$SRCDIR/../${BUILD_DIR_PREFIX}$CONFIGNAME
+[ -d $_BUILDDIR ] || mkdir $_BUILDDIR
+if [ ! -d $_BUILDDIR ] ; then
+    echo "Faild to mkdir $_BUILDDIR"
+    exit 1
+fi 
+BUILDDIR=`cd $_BUILDDIR ; pwd`
+
+
+MAKE="make O=$BUILDDIR ARCH=powerpc CROSS_COMPILE=$CROSS_COMPILE"
+NCPUS=`if [ -x /usr/bin/getconf ] ; then getconf _NPROCESSORS_ONLN ; else echo 1; fi`
+
+
+
+echo ""
+echo "KERNEL_BUILDIR=$BUILDDIR"
+echo "KERNEL_CONFIG=$CONFIG"
+echo "CROSS_COMPILE=$CROSS_COMPILE"
+echo "MAKE=$MAKE"
+echo "NCPUS=$NCPUS"
+echo "ZEPTO_CN_RAMDISK=$ZEPTO_CN_RAMDISK"
+echo "ZEPTO_SPI=$ZEPTO_SPI"
+echo ""
+echo ""
+
+
+if [ ! -z $ZEPTO_SPI ] ; then
+    SPI_DIR=`(cd $ZEPTO_SPI;pwd)`
+    # check see if dir exists
+    for i in spi common cnk bpcore zepto ; do
+	if [ ! -d $SPI_DIR/$i ] ; then
+	    echo $SPI_DIR/$i not found
+	    exit 1
+	fi
+    done
+
+    ZSPI=arch/powerpc/include/zspi
+    [ -d $ZSPI ] || mkdir -p $ZSPI
+
+    for i in spi common cnk bpcore zepto ; do
+	if [ ! -L $ZSPI/$i ] ; then 
+	    echo Creating a link to $SPI_DIR/$i with $ZSPI/$i
+	    ( cd $ZSPI ; ln -s $SPI_DIR/$i )
+	fi
+    done
+fi
+
+
+if [ ! -z $ZEPTO_CN_RAMDISK ] ; then
+    mkdir -p $BUILDDIR/arch/powerpc/boot/images
+    cp $ZEPTO_CN_RAMDISK  $BUILDDIR/arch/powerpc/boot/ramdisk.image.gz
+fi
+
+if [ ! -f $BUILDDIR/.config -o $BUILDDIR/.config -ot $CONFIG ] ; then
+#   make mrproper ARCH=powerpc
+    ${MAKE} mrproper
+    cp $CONFIG $BUILDDIR/.config
+    ${MAKE} oldconfig
+fi
+
+if [ ! -z $ZEPTO_CN_RAMDISK ] ; then 
+    if [ z"$@" = z"" ] ; then
+       ${MAKE} -j${NCPUS} zImage.initrd $@
+       if [ $? -ne 0 ] ; then 
+	  exit 1
+       fi
+    fi
+else
+    ${MAKE} -j${NCPUS} $@
+    if [ $? -ne 0 ] ; then 
+        exit 1
+    fi
+fi
+
+
+echo ""
+if [ ! -z $ZEPTO_CN_RAMDISK ] ; then
+    echo "Kernel image: $BUILDDIR/arch/powerpc/boot/dtbImage.initrd.bgp"
+else
+    echo "Kernel image: $BUILDDIR/arch/powerpc/boot/dtbImage.bgp"
+fi
+echo ""
+
+exit 0
diff --git a/mm/Makefile b/mm/Makefile
index 72255be..5606f3a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,3 +33,4 @@
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_ZEPTO_MEMORY) += zepto_bigmem_mmap.o
\ No newline at end of file
diff --git a/mm/memory.c b/mm/memory.c
index d7df5ba..a2f05b6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -62,6 +62,10 @@
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
+
 #include "internal.h"
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -1361,6 +1365,15 @@
 {
 	int flags = 0;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* bigmemory is not handled in pages, so it simply returns an error */
+	if (enable_bigmem && IS_ZEPTO_TASK(tsk)) {
+		if (get_bigmem_region_start() <= start && 
+		    start+len < get_bigmem_region_end()) 
+			return -EFAULT;
+	}
+#endif
+
 	if (write)
 		flags |= GUP_FLAGS_WRITE;
 	if (force)
@@ -3080,6 +3093,37 @@
 	if (!mm)
 		return 0;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	if (enable_bigmem && IS_ZEPTO_TASK(tsk)) {
+		if (get_bigmem_region_start() <= addr &&
+		    addr+len < get_bigmem_region_end()) {
+			if (tsk == current) {
+				if (write)
+					memcpy((void*)addr, buf, len);
+				else
+					memcpy(buf, (void*)addr, len);
+			} else {
+				unsigned addr_pa = (addr - get_bigmem_region_start())+get_bigmem_pa_start();
+				void __iomem *bigmem_addr;
+				
+				bigmem_addr = __ioremap((phys_addr_t)addr_pa, len, 0);
+				if (!bigmem_addr) {
+					printk(KERN_ERR "[Z] access_process_vm(): ioremap() failed. addr_pa=%p\n", (void*)addr_pa);
+					return 0;
+				}
+				if (write)
+					memcpy( bigmem_addr, buf, len);
+				else
+					memcpy( buf, bigmem_addr, len);
+				iounmap(bigmem_addr);
+			}
+
+			return len;
+		}
+	}
+#endif
+
+
 	down_read(&mm->mmap_sem);
 	/* ignore errors, just check how much was successfully transferred */
 	while (len) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 00ced3e..d7de8bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -33,6 +33,10 @@
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
+
 #include "internal.h"
 
 #ifndef arch_mmap_check
@@ -252,6 +256,26 @@
 	struct mm_struct *mm = current->mm;
 	unsigned long min_brk;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* NOTE: running_pid is 0 for bigmem explicit mmap */
+	if (enable_bigmem && IS_ZEPTO_TASK(current) && bigmem_process_active_count() > 0) {
+
+		zepto_debug(3, "brk=%08lx\n", brk);
+
+		down_write(&mm->mmap_sem);
+#ifdef CONFIG_COMPAT_BRK
+		if (brk < mm->end_code) 
+			retval = mm->brk;
+#else
+		if (brk < mm->start_brk)
+			retval = mm->brk;
+#endif
+		else 
+			retval = mm->brk = brk;
+		up_write(&mm->mmap_sem);
+		return retval;
+	}
+#endif
 	down_write(&mm->mmap_sem);
 
 #ifdef CONFIG_COMPAT_BRK
@@ -920,6 +944,35 @@
 	int error;
 	unsigned long reqprot = prot;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* NOTE: running_pid is 0 for bigmem explicit mmap */
+	if (IS_ZEPTO_TASK(current) && bigmem_process_active_count() > 0) {
+
+		zepto_debug(3, "do_mmap_pgoff addr=%08lx len=%08lx pgoff=%08lx %s %s %s %s%s%s\n",
+			    addr, len, pgoff, 
+			    (flags&MAP_SHARED)?"Shared":"Private",
+			    (flags&MAP_FIXED)?"Fixed":"",
+		            (flags&MAP_ANONYMOUS)?"Anon":"",
+			    (prot&PROT_READ)?"R":"-",
+			    (prot&PROT_WRITE)?"W":"-",
+			    (prot&PROT_EXEC)?"X":"-");
+		
+		if (enable_bigmem) {
+			if (flags & MAP_ANONYMOUS) {
+				unsigned ret;
+				ret = allocate_bigmem_mmap_section(len);
+				if (addr == BIGMEM_MMAP_ALLOCATION_FAILURE) {
+					printk(KERN_ERR "[Z] allocate_bigmem_mmap_section() failed\n");
+					return 0;
+				}
+				zepto_debug(3, "do_mmap_pgoff()=>%08x\n", ret);
+				return ret;
+			}
+		}
+	}
+#endif
+
+
 	/*
 	 * Does the application expect PROT_READ to imply PROT_EXEC?
 	 *
@@ -1877,6 +1930,18 @@
 	unsigned long end;
 	struct vm_area_struct *vma, *prev, *last;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	if (enable_bigmem && IS_ZEPTO_TASK(current)) {
+		if (get_bigmem_region_start() <= start && 
+		    start+len < get_bigmem_region_end()) {
+			if(remove_bigmem_mmap_section(start) != BIGMEM_MMAP_SUCCESS) {
+				printk(KERN_ERR "[Z] remove_bigmem_mmap_section(0x%08lx) failed\n", start);
+				return -EINVAL;
+			}
+			zepto_debug(3, "A bigmem_mmap region %08lx+%08x was removed.\n", start, len);
+		}
+	}
+#endif
 	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
 		return -EINVAL;
 
@@ -1982,6 +2047,14 @@
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
 	int error;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* NOTE: running_pid is 0 for bigmem explicit mmap */
+	if (enable_bigmem && IS_ZEPTO_TASK(current) && bigmem_process_active_count() > 0) {
+		printk(KERN_WARNING "[Z] bigmem vma was already created");
+		return addr;
+	}	    
+#endif
+
 	len = PAGE_ALIGN(len);
 	if (!len)
 		return addr;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 258197b..116cc40 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -28,6 +28,10 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_ZEPTO_MEMORY
+#include <linux/zepto_task.h>
+#endif
+
 #ifndef pgprot_modify
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 {
@@ -229,6 +233,16 @@
 	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
 		return -EINVAL;
 
+#ifdef CONFIG_ZEPTO_MEMORY
+	/* if mprotect target address is within bigmem range, just return */
+	if (IS_ZEPTO_TASK(current) && enable_bigmem) {
+		if (get_bigmem_region_start() <= start &&
+		    start+len < get_bigmem_region_end() ) {
+			return 0;
+		}
+	}
+#endif
+
 	if (start & ~PAGE_MASK)
 		return -EINVAL;
 	if (!len)
diff --git a/mm/zepto_bigmem_mmap.c b/mm/zepto_bigmem_mmap.c
new file mode 100644
index 0000000..a9f409b
--- /dev/null
+++ b/mm/zepto_bigmem_mmap.c
@@ -0,0 +1,936 @@
+//****************************************************************************/
+/* ZEPTOOS:zepto-info */
+/*     This file is part of ZeptoOS: The Small Linux for Big Computers.
+ *     See www.mcs.anl.gov/zeptoos for more information.
+ */
+/* ZEPTOOS:zepto-info */
+/* */
+/* ZEPTOOS:zepto-fillin */
+/*     $Id:  $
+ *     ZeptoOS_Version: 2.0
+ *     ZeptoOS_Heredity: FOSS_ORIG
+ *     ZeptoOS_License: GPL
+ */
+/* ZEPTOOS:zepto-fillin */
+/* */
+/* ZEPTOOS:zepto-gpl */
+/*      Copyright: Argonne National Laboratory, Department of Energy,
+ *                 and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+ *      ZeptoOS License: GPL
+ * 
+ *      This software is free.  See the file ZeptoOS/misc/license.GPL
+ *      for complete details on your rights to copy, modify, and use this
+ *      software.
+ */
+/* ZEPTOOS:zepto-gpl */
+/****************************************************************************/
+
+
+#ifdef __KERNEL__
+
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/semaphore.h>
+
+#include <linux/zepto_task.h>
+
+#define BIGMEM_MMAP_ALIGNMENT   (PAGE_SIZE)
+
+#else
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h> 
+#include "rbtree.h"
+#include "zepto_bigmem_mmap.h"
+
+#define BIGMEM_MMAP_ALIGNMENT   (64*1024)
+
+#endif
+
+#define MAX_BIGMEM_CORE (4)
+
+
+/*
+  This function is used at _find_free_bigmem_mmap_region_no_lock()
+*/
+static unsigned next_bigmem_mmap_alignment(unsigned addr)
+{
+    unsigned mask = BIGMEM_MMAP_ALIGNMENT-1;
+    return ( addr & ~mask ) + ((addr&mask)!=0)*BIGMEM_MMAP_ALIGNMENT;
+}
+
+
+#undef BIGMEM_MMAP_DEBUG  
+//#define BIGMEM_MMAP_DEBUG  
+
+static struct rb_root bigmem_mmap_rb_root[MAX_BIGMEM_CORE] = { RB_ROOT, RB_ROOT,RB_ROOT,RB_ROOT };
+static int            bigmem_mmap_initialized[MAX_BIGMEM_CORE] = { 0,0,0,0 };
+
+static unsigned bigmem_mmap_start[MAX_BIGMEM_CORE];
+static unsigned bigmem_mmap_end[MAX_BIGMEM_CORE];
+static unsigned bigmem_mmap_section_allocated_bytes[MAX_BIGMEM_CORE];
+static unsigned bigmem_mmap_n_sections_allocated[MAX_BIGMEM_CORE];
+
+
+#ifdef __KERNEL__
+
+
+static void* __allocatememory(unsigned size) {  return kmalloc(size, GFP_KERNEL); }
+static void  __freememory(void* ptr) { kfree(ptr); }
+
+#ifdef BIGMEM_MMAP_DEBUG 
+static void  _bigmem_mmap_debug_print(char* fmt,...) 
+{
+    va_list ap;
+    if( 2 <= zetp_debug_level  ) {
+	va_start(ap, fmt);
+	vprintk(fmt, ap);
+	va_end(ap);
+    }
+}
+#else
+#define _bigmem_mmap_debug_print(fmt,...)
+#endif
+
+
+static void  _bigmem_mmap_error_print(char* fmt,...) 
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vprintk(fmt,ap);
+    va_end(ap);
+}
+
+struct semaphore  _bigmem_mmap_sem;
+
+
+void _bigmem_mmap_init_lock(int cid)
+{
+	sema_init(&_bigmem_mmap_sem,1);
+}
+
+void _bigmem_mmap_finalize_lock(int cid)
+{
+}
+
+void  _bigmem_mmap_lock(int cid) 
+{
+	down(&_bigmem_mmap_sem);
+}
+
+void  _bigmem_mmap_unlock(int cid)
+{
+	up(&_bigmem_mmap_sem);
+}
+
+
+#else
+/* for userspace test */
+
+static int bigmem_process_cid(void) {return 0;}
+
+
+static void* __allocatememory(unsigned size) {  return malloc(size); }
+static void  __freememory(void* ptr) { free(ptr); }
+
+#ifdef BIGMEM_MMAP_DEBUG 
+static void  _bigmem_mmap_debug_print(char* fmt,...) 
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vprintf(fmt, ap);
+    va_end(ap);
+}
+#else
+#define _bigmem_mmap_debug_print(fmt,...)
+#endif
+
+static void  _bigmem_mmap_error_print(char* fmt,...) 
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vfprintf(stdout,fmt,ap);
+    va_end(ap);
+}
+
+static int _bigmem_mmap_lock_depth[MAX_BIGMEM_CORE];
+
+void _bigmem_mmap_init_lock(int cid)
+{
+    _bigmem_mmap_lock_depth[cid] = 0;
+}
+
+void _bigmem_mmap_finalize_lock(int cid)
+{
+    _bigmem_mmap_lock_depth[cid] = 0;
+}
+
+
+#define  _bigmem_mmap_lock(cid)   \
+  do { \
+    if(_bigmem_mmap_lock_depth[cid]<0) {  \
+	printf("lock is already taken %s(%d)\n",__FILE__,__LINE__); exit(1); \
+    } else { \
+        _bigmem_mmap_lock_depth[cid]--; \
+    } \
+  } while(0)
+
+#define  _bigmem_mmap_unlock(cid) \
+  do { \
+    if(_bigmem_mmap_lock_depth[cid]==0 ) {  \
+	printf("lock is not held yet %s(%d)\n",__FILE__,__LINE__); exit(1); \
+    } else { \
+        _bigmem_mmap_lock_depth[cid]++; \
+    } \
+  } while(0)
+
+#endif
+
+
+static int bigmem_mmap_section_alloc_count[MAX_BIGMEM_CORE];
+
+static struct bigmem_mmap_section_struct* _alloc_bigmem_mmap_section(int cid, unsigned start, unsigned end )
+{
+    struct bigmem_mmap_section_struct* new_section;
+
+    new_section = __allocatememory(sizeof(struct bigmem_mmap_section_struct));
+    if( !new_section ) return NULL;
+    bigmem_mmap_section_alloc_count[cid] ++;
+
+    memset( new_section, 0, sizeof(struct bigmem_mmap_section_struct) );
+    new_section->start = start;
+    new_section->end   = end;
+
+    return new_section;
+}
+static void _free_bigmem_mmap_elem(int cid, struct bigmem_mmap_section_struct*  ptr ) 
+{
+    if( ptr ) {
+	__freememory(ptr);
+	bigmem_mmap_section_alloc_count[cid]--;
+	// _bigmem_mmap_debug_print("* bigmem_mmap is free'ed  0x%08x\n",(unsigned)ptr );
+    }
+}
+
+
+/* ============================================================
+   functions operate rb nodes. expect bigmem_mmap lock is hold.
+   internal use only
+   ============================================================ */
+static void _insert_bigmem_mmap_secion_no_lock(int cid, struct rb_root *root, struct bigmem_mmap_section_struct *new)
+{
+    struct rb_node **link = &root->rb_node;
+    struct rb_node *parent = NULL;
+    int value = new->start;
+
+    while(*link) {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp;
+	parent = *link;
+	bigmem_mmap_tmp = rb_entry(parent, struct bigmem_mmap_section_struct, rb_node);
+
+	if(bigmem_mmap_tmp->start > value) 
+	    link = &(*link)->rb_left;
+	else
+	    link = &(*link)->rb_right;
+    }
+    rb_link_node(&new->rb_node, parent, link);
+    rb_insert_color(&new->rb_node, root);
+}
+
+static void _remove_all_bigmem_mmap_sections_no_lock(int cid, struct rb_root *root)
+{
+    struct rb_node *node = rb_last(root);
+
+    while( node ) {
+	struct rb_node *node_tmp = node;
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node );
+	// _bigmem_mmap_debug_print("free %p / node_tmp=%p / start=0x%08x\n", bigmem_mmap_tmp, node_tmp, bigmem_mmap_tmp->start);
+	rb_erase(node_tmp, root);
+	node = rb_prev(node);
+	_free_bigmem_mmap_elem(cid, bigmem_mmap_tmp);
+    }
+}
+
+
+static struct bigmem_mmap_section_struct*  _find_bigmem_mmap_section_no_lock(int cid, struct rb_root *root, unsigned addr)
+{
+    struct rb_node *node = root->rb_node;  /* top of the tree */
+    struct bigmem_mmap_section_struct *ret = 0;
+
+    if(!node) {
+	/* _bigmem_mmap_debug_print("_find_bigmem_mmap_section_no_lock() addr=0x%08x  empty rbtree!\n",addr); */
+	return 0;
+    }
+
+    while(node)   {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node);
+	/*
+	_bigmem_mmap_debug_print("_find_bigmem_mmap_section_no_lock() addr=0x%08x  [0x%08x,0x%08x)\n",
+	       addr, bigmem_mmap_tmp->start,  bigmem_mmap_tmp->end );
+	*/
+	if( bigmem_mmap_tmp->end > addr ) {
+	    ret = bigmem_mmap_tmp;
+	    if( bigmem_mmap_tmp->start == addr )  {
+		break;
+	    }
+	    node = node->rb_left;
+	} else {
+	    node = node->rb_right;
+	}
+    }
+
+    if( !node ) {
+	_bigmem_mmap_error_print("_find_bigmem_mmap_section_no_lock() didn't find 0x%08x\n", addr);
+	ret = 0;
+    }
+
+    return ret;
+}
+
+/*
+  find a region satisfies its start > addr 
+*/
+struct bigmem_mmap_section_struct*  _find_bigmem_mmap_section_any_bigger_no_lock(int cid, struct rb_root *root, unsigned addr)
+{
+    struct rb_node *node = root->rb_node;  /* top of the tree */
+    struct bigmem_mmap_section_struct *ret = 0;
+
+    if(!node) {
+	return 0;
+    }
+
+    while(node)   {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node);
+	if( bigmem_mmap_tmp->start > addr ) {
+	    ret = bigmem_mmap_tmp;
+	    break;
+	} else {
+	    node = node->rb_right;
+	}
+    }
+    if( !node ) {
+	_bigmem_mmap_error_print("_find_bigmem_mmap_section_no_lock() didn't find 0x%08x\n", addr);
+	ret = 0;
+    }
+    return ret;
+}
+
+
+
+
+static unsigned _bigmem_mmap_allocatedmemory_no_lock(int cid)
+{
+    struct rb_node *node = rb_first(&bigmem_mmap_rb_root[cid]);
+    unsigned total=0;
+
+    while( node ) {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node );
+	total += (bigmem_mmap_tmp->end - bigmem_mmap_tmp->start);
+	node = rb_next(node);
+    }
+    return total;
+}
+
+
+
+
+static unsigned _bigmem_mmap_freememory_no_lock(int cid)
+{
+    return (bigmem_mmap_end[cid] - bigmem_mmap_start[cid]) - bigmem_mmap_section_allocated_bytes[cid];
+}
+
+
+/* The start address of a free region is aligned by BIGMEM_MMAP_ALIGNEMENT.
+   We assume that this function is only called to allocate a region on bigmem_mmap
+   via anonymous mmap().
+*/
+unsigned _find_free_bigmem_mmap_region_no_lock(int cid,unsigned len )
+{
+    struct rb_node *node = rb_first(&bigmem_mmap_rb_root[cid]);
+    unsigned prev_end = next_bigmem_mmap_alignment(bigmem_mmap_start[cid]);
+    struct bigmem_mmap_section_struct  region_found;
+    int     found=0;
+    int idx =0;
+
+    memset(&region_found, 0, sizeof(region_found));
+
+    if( !node ) {
+	if( len < (next_bigmem_mmap_alignment(bigmem_mmap_end[cid])-bigmem_mmap_start[cid]) ) {
+	    return bigmem_mmap_start[cid];
+	} else {
+	    _bigmem_mmap_error_print( "Not enough memory. len=0x%08x [0x%08x,0x%08x) free=0x%08x allocated=0x%08x\n", 
+			       len, bigmem_mmap_start[cid], bigmem_mmap_end[cid], _bigmem_mmap_freememory_no_lock(cid), _bigmem_mmap_allocatedmemory_no_lock(cid) );
+	    return BIGMEM_MMAP_ALLOCATION_FAILURE;
+	}
+    }
+    
+    _bigmem_mmap_debug_print("finding a region that can hold 0x%08x bytes\n", len);
+
+    while( node ) {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node );
+
+	if( prev_end != bigmem_mmap_tmp->start ) {
+	    if( len < bigmem_mmap_tmp->start - prev_end ) { 
+		region_found.start = prev_end;
+		region_found.end   = bigmem_mmap_tmp->start;
+		found++;
+	    }
+	}
+	prev_end = next_bigmem_mmap_alignment(bigmem_mmap_tmp->end);
+	    
+	node = rb_next(node);
+	idx++;
+    }
+    if( found==0 && prev_end != bigmem_mmap_end[cid] ) {
+	if( len < bigmem_mmap_end[cid]-prev_end ) {
+	    region_found.start = prev_end;
+	    region_found.end   = bigmem_mmap_end[cid];
+	    found ++;
+	}
+    }
+
+    if( found == 0 ) {
+	_bigmem_mmap_error_print( "Not enough memory. len=0x%08x [0x%08x,0x%08x) free=0x%08x allocated=0x%08x\n", 
+			   len, bigmem_mmap_start[cid], bigmem_mmap_end[cid], _bigmem_mmap_freememory_no_lock(cid), _bigmem_mmap_allocatedmemory_no_lock(cid) );
+	return BIGMEM_MMAP_ALLOCATION_FAILURE;
+    }
+
+    _bigmem_mmap_debug_print("found a region[0x%08x,%08x) that can hold 0x%08x bytes\n", 
+		      region_found.start, region_found.end,     len);
+
+    return region_found.start;
+}
+
+
+/* ============================================================
+   The following function are called from other code.
+   ============================================================ */
+
+/* this function is called when brk is updated */
+BIGMEM_MMAP_status update_bigmem_mmap_start(unsigned addr)
+{
+    BIGMEM_MMAP_status ret = BIGMEM_MMAP_FAILURE;
+    struct bigmem_mmap_section_struct* bigmem_mmap_tmp;
+    int cid = bigmem_process_cid();
+
+    _bigmem_mmap_lock(cid);
+    bigmem_mmap_tmp = _find_bigmem_mmap_section_no_lock(cid, &bigmem_mmap_rb_root[cid], addr);
+    if( bigmem_mmap_tmp ) {
+	_bigmem_mmap_error_print("update_bigmem_mmap_start(0x%08x) failed.  request address conflicts with region[0x%08x,0x%08x)\n",
+			  addr, bigmem_mmap_tmp->start, bigmem_mmap_tmp->end);
+	goto out;
+    }
+    bigmem_mmap_start[cid] = addr;
+    ret = BIGMEM_MMAP_SUCCESS;
+ out:
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+/* this function is called when start is updated */
+BIGMEM_MMAP_status update_bigmem_mmap_end(unsigned addr)
+{
+    BIGMEM_MMAP_status ret = BIGMEM_MMAP_FAILURE;
+    struct bigmem_mmap_section_struct* bigmem_mmap_tmp;
+    int cid = bigmem_process_cid();
+
+    _bigmem_mmap_lock(cid);
+    bigmem_mmap_tmp = _find_bigmem_mmap_section_no_lock(cid, &bigmem_mmap_rb_root[cid], addr);
+    if( bigmem_mmap_tmp ) {
+	_bigmem_mmap_error_print("update_bigmem_mmap_end(0x%08x) failed.  request address conflicts with region[0x%08x,0x%08x)\n",
+			  addr, bigmem_mmap_tmp->start, bigmem_mmap_tmp->end);
+	goto out;
+    }
+    bigmem_mmap_end[cid] = addr;
+    ret = BIGMEM_MMAP_SUCCESS;
+ out:
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+unsigned get_bigmem_mmap_start(void) 
+{ 
+    unsigned ret=0;
+    int cid = bigmem_process_cid();
+    _bigmem_mmap_lock(cid);
+    ret = bigmem_mmap_start[cid];
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+unsigned get_bigmem_mmap_end(void) 
+{ 
+    unsigned ret=0;
+    int cid = bigmem_process_cid();
+    _bigmem_mmap_lock(cid);
+    ret = bigmem_mmap_end[cid];
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+
+unsigned create_bigmem_mmap_section( int cid, unsigned addr, unsigned len )
+{
+    struct bigmem_mmap_section_struct* bigmem_mmap_tmp;
+    unsigned ret=BIGMEM_MMAP_ALLOCATION_FAILURE;
+    unsigned addr_end = addr+len-4;
+    struct bigmem_mmap_section_struct*  new_bigmem_mmap;
+
+    _bigmem_mmap_lock(cid);
+    bigmem_mmap_tmp = _find_bigmem_mmap_section_no_lock(cid, &bigmem_mmap_rb_root[cid], addr);
+    if( bigmem_mmap_tmp ) {
+	_bigmem_mmap_debug_print("addr(%08x) is within [%08x,%08x)\n",
+			 addr,
+			 bigmem_mmap_tmp->start, bigmem_mmap_tmp->end );
+	goto out;
+    }
+    bigmem_mmap_tmp = _find_bigmem_mmap_section_no_lock(cid, &bigmem_mmap_rb_root[cid], addr_end);
+    if( bigmem_mmap_tmp ) {
+	_bigmem_mmap_debug_print("addr_end(%08x) is within [%08x,%08x)\n",
+			 addr_end,
+			 bigmem_mmap_tmp->start, bigmem_mmap_tmp->end);
+	goto out;
+    }
+
+    new_bigmem_mmap = _alloc_bigmem_mmap_section(cid, addr, addr+len);
+    if( !new_bigmem_mmap ) {
+	goto out;
+    }
+
+    /* XXX: add extra error check, or proof rb never fail? */
+    _insert_bigmem_mmap_secion_no_lock(cid, &bigmem_mmap_rb_root[cid], new_bigmem_mmap);
+    ret = addr;
+ out:
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+
+unsigned allocate_bigmem_mmap_section(unsigned len ) 
+{
+    unsigned ret = BIGMEM_MMAP_ALLOCATION_FAILURE;
+    int cid = bigmem_process_cid();
+
+    /* XXX: do we need to allow zero byte allocataion? */
+    if( len==0 ) {
+	_bigmem_mmap_error_print("zero byte allocation is not supported.\n");
+	return BIGMEM_MMAP_ALLOCATION_FAILURE;
+    }
+
+    _bigmem_mmap_lock(cid);
+
+    ret = _find_free_bigmem_mmap_region_no_lock(cid, len);
+    if(ret == BIGMEM_MMAP_ALLOCATION_FAILURE ) {
+	_bigmem_mmap_error_print("allocate_bigmem_mmap_section(0x%08x) No free region found.\n",
+			  len);
+	ret = BIGMEM_MMAP_ALLOCATION_FAILURE;
+	goto out;
+    }
+
+    /* XXX: add extra error check, or proof rb never fail? */
+    _insert_bigmem_mmap_secion_no_lock(cid, &bigmem_mmap_rb_root[cid], _alloc_bigmem_mmap_section(cid, ret,ret+len));
+
+    bigmem_mmap_section_allocated_bytes[cid] +=  len;
+    bigmem_mmap_n_sections_allocated[cid]++;
+ out:
+    _bigmem_mmap_unlock(cid);
+
+    return ret;
+}
+
+
+
+
+BIGMEM_MMAP_status remove_bigmem_mmap_section(unsigned addr )
+{
+    struct bigmem_mmap_section_struct* bigmem_mmap_tmp;
+    BIGMEM_MMAP_status ret = BIGMEM_MMAP_FAILURE ;
+    int cid = bigmem_process_cid();
+
+    _bigmem_mmap_lock(cid);
+    bigmem_mmap_tmp = _find_bigmem_mmap_section_no_lock(cid, &bigmem_mmap_rb_root[cid], addr);
+    if( !bigmem_mmap_tmp ) {
+	_bigmem_mmap_error_print("remove_bigmem_mmap_section(): there is no region contains %08x\n", addr);
+	ret = BIGMEM_MMAP_FAILURE ;
+	goto out;
+    }
+    /* unlikely */
+    if( bigmem_mmap_tmp->start != addr ) {
+	_bigmem_mmap_error_print("Error: addr(%08x) does not match with region start [%08x,%08x)  bigmem_mmap_n_sections_allocated=%d\n",
+			  addr, bigmem_mmap_tmp->start, bigmem_mmap_tmp->end,bigmem_mmap_n_sections_allocated[cid] );
+	ret = BIGMEM_MMAP_FAILURE ;
+        goto out;
+    }
+
+    /* XXX: rb_erase() and _free_bigmem_mmap_elem never fail? */
+    rb_erase(&bigmem_mmap_tmp->rb_node, &bigmem_mmap_rb_root[cid]);
+    _free_bigmem_mmap_elem(cid, bigmem_mmap_tmp);
+
+    bigmem_mmap_section_allocated_bytes[cid] -= (bigmem_mmap_tmp->end - bigmem_mmap_tmp->start );
+    bigmem_mmap_n_sections_allocated[cid]--;
+    ret = BIGMEM_MMAP_SUCCESS;
+ out:
+    _bigmem_mmap_unlock(cid);
+    return ret;
+}
+
+#ifdef __KERNEL__
+
+static int proc_bigmem_mmap_show(struct seq_file *m, void *v)
+{
+    struct rb_node *node;
+    int idx = 0;
+    unsigned  bigmem_virt2phy(unsigned long va);  /* defined in arch/ppc/mm/44x_mmu.c */
+    int cid=0;
+
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    for(cid=0; cid<bigmem_process_active_count(); cid++) {
+#endif
+	_bigmem_mmap_lock(cid);
+	node = rb_first(&bigmem_mmap_rb_root[cid]);
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+	seq_printf(m,"\n");
+	seq_printf(m,"[BigMem Process ID=%d]\n",cid);
+#endif
+	seq_printf(m,"bigmem_mmap          [%08x,%08x)\n", bigmem_mmap_start[cid], bigmem_mmap_end[cid]);
+	seq_printf(m,"No. of chunks  %d\n",  bigmem_mmap_n_sections_allocated[cid] );
+	seq_printf(m,"allocated      %d kB\n", bigmem_mmap_section_allocated_bytes[cid]/1024) ;
+	seq_printf(m,"free           %d kB\n", _bigmem_mmap_freememory_no_lock(cid)/1024 );
+	seq_printf(m,"\n");
+
+	while( node ) {
+	    struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node );
+
+	    seq_printf(m,"%3d  va:[%08x %08x)  pa:[%08x,%08x)\n", idx, 
+		       bigmem_mmap_tmp->start, bigmem_mmap_tmp->end,
+		       bigmem_virt2phy_cid(bigmem_mmap_tmp->start,cid),
+		       bigmem_virt2phy_cid(bigmem_mmap_tmp->end,cid)  );
+	    node = rb_next(node);
+	    idx++;
+	}
+	_bigmem_mmap_unlock(cid);
+#ifdef CONFIG_ZEPTO_COMPUTENODE
+    }
+#endif
+
+    return 0;
+}
+
+static int proc_bigmem_mmap_open(struct inode *inode, struct file *file)
+{
+    return single_open(file, proc_bigmem_mmap_show, NULL);
+}
+
+static struct file_operations proc_bigmem_mmap_operations = {
+    .open		= proc_bigmem_mmap_open,
+    .read		= seq_read,
+    .llseek		= seq_lseek,
+    .release	= single_release,
+};
+
+static struct proc_dir_entry *bigmem_proc_entry = NULL;
+
+static void register_bigmem_mmap_proc(void)
+{
+    if(bigmem_proc_entry) return;
+
+    bigmem_proc_entry = create_proc_entry("bigmem_mmap", 0, NULL);
+    if(bigmem_proc_entry) {
+	bigmem_proc_entry->proc_fops = &proc_bigmem_mmap_operations;
+	zepto_debug(2,"/proc/bigmem_mmap is registered\n");
+    } else {
+	printk(KERN_ERR "[Z] Failed to register /proc/bigmem_mmap\n");
+    }
+}
+
+#if 0
+/* we don't free /proc/bigmem_mmap once it is registered */
+static void unregister_bigmem_mmap_proc(void)
+{
+    remove_proc_entry("bigmem_mmap",bigmem_proc_entry);
+    bigmem_proc_entry=NULL;
+}
+#endif
+
+#endif
+
+
+
+BIGMEM_MMAP_status bigmem_mmap_init(unsigned start, unsigned end)
+{
+    BIGMEM_MMAP_status ret = BIGMEM_MMAP_FAILURE;
+    int cid = bigmem_process_cid();
+
+    _bigmem_mmap_init_lock(cid);
+
+    _bigmem_mmap_lock(cid);
+    if( bigmem_mmap_initialized[cid] ) {
+	_bigmem_mmap_error_print("bigmem_mmap already intialized!\n");
+	ret = BIGMEM_MMAP_FAILURE;
+	goto out;
+    }
+
+    bigmem_mmap_start[cid] = start;
+    bigmem_mmap_end[cid]   = end;
+
+    bigmem_mmap_section_allocated_bytes[cid] = 0;
+    bigmem_mmap_n_sections_allocated[cid] = 0;
+
+    _bigmem_mmap_debug_print("bigmem_mmap_init start=%08x end=%08x\n", bigmem_mmap_start[cid], bigmem_mmap_end[cid]);
+
+    ret = BIGMEM_MMAP_SUCCESS;
+    bigmem_mmap_initialized[cid] = 1;
+ out:
+    _bigmem_mmap_unlock(cid);
+
+#ifdef __KERNEL__
+    if(cid==0)  register_bigmem_mmap_proc();
+#endif
+
+    return ret; 
+}
+
+BIGMEM_MMAP_status bigmem_mmap_finalize(void)
+{
+    BIGMEM_MMAP_status ret = BIGMEM_MMAP_FAILURE;
+    int cid = bigmem_process_cid();
+
+    if( !bigmem_mmap_initialized[cid] ) {
+	_bigmem_mmap_error_print("bigmem_mmap not intialized yet?\n");
+	ret =  BIGMEM_MMAP_FAILURE;
+	goto out;
+    }
+    _bigmem_mmap_lock(cid);
+
+    _remove_all_bigmem_mmap_sections_no_lock(cid, &bigmem_mmap_rb_root[cid]);
+    
+    if( bigmem_mmap_section_alloc_count[cid] != 0 ) {
+	_bigmem_mmap_error_print("memory coruption: bigmem_mmap_section_alloc_count=%d\n",
+			  bigmem_mmap_section_alloc_count[cid]);
+	return BIGMEM_MMAP_FAILURE;
+    }
+
+    bigmem_mmap_section_allocated_bytes[cid] = 0;
+    bigmem_mmap_n_sections_allocated[cid] = 0;
+
+    ret = BIGMEM_MMAP_SUCCESS;
+    bigmem_mmap_initialized[cid] = 0;
+ out:
+    _bigmem_mmap_unlock(cid);
+
+    _bigmem_mmap_finalize_lock(cid);
+
+    return ret;
+}
+
+
+void bigmem_mmap_traverse_print(int (*print_func)(const char*,...) )
+{
+    struct rb_node *node = rb_first(&bigmem_mmap_rb_root[bigmem_process_cid()]);
+    int idx = 0;
+
+    while( node ) {
+	struct bigmem_mmap_section_struct *bigmem_mmap_tmp = rb_entry(node, struct bigmem_mmap_section_struct, rb_node );
+	print_func("%2d: [%08x %08x)\n", idx, bigmem_mmap_tmp->start, bigmem_mmap_tmp->end );
+	node = rb_next(node);
+	idx++;
+    }
+    print_func("total %d entries\n", idx);
+}
+
+
+
+
+#ifndef __KERNEL__
+
+unsigned do_alloc(unsigned len)
+{
+    unsigned addr;
+
+    _bigmem_mmap_debug_print("@ allocate_bigmem_mmap_section(0x%08x) request.\n",len);
+
+    addr = allocate_bigmem_mmap_section(len);
+    if( addr == BIGMEM_MMAP_ALLOCATION_FAILURE ) {
+	_bigmem_mmap_error_print( "allocate_bigmem_mmap_section() failed\n");
+	return addr;
+    }
+    _bigmem_mmap_debug_print("@ allocate_bigmem_mmap_section(0x%08x) succeeded. addr=0x%08x\n", len, addr);
+    return addr;
+}
+
+void do_remove(unsigned addr)
+{
+    _bigmem_mmap_debug_print("@ remove_bigmem_mmap_section(0x%08x) request.\n", addr);
+
+    if( remove_bigmem_mmap_section(addr) != BIGMEM_MMAP_SUCCESS ) {
+	_bigmem_mmap_error_print( "remove_bigmem_mmap_section(%08x) failed\n",  addr);
+	bigmem_mmap_traverse_print(printf);
+
+    }
+    _bigmem_mmap_debug_print("@ remove_bigmem_mmap_section(0x%08x) succeeded.\n", addr);
+
+}
+
+void bigmem_mmap_test(void)
+{
+    unsigned region_start = 0x10000000;
+    unsigned region_end   = 0x50000000;
+    unsigned addr;
+
+
+    if( bigmem_mmap_init(region_start, region_end)!=BIGMEM_MMAP_SUCCESS ) {
+	_bigmem_mmap_error_print( "bigmem_mmap_init() failed.\n");
+	exit(1);
+    }
+    printf("bigmem_mmap region [%08x,%08x)\n",  get_bigmem_mmap_start(), get_bigmem_mmap_end() );
+    
+    addr = do_alloc(0x00002000);
+    addr = do_alloc(0x0000F000);
+    addr = do_alloc(0x00200000);
+    bigmem_mmap_traverse_print(printf);
+
+    do_remove(addr);
+    addr = do_alloc(0x21002000);
+    addr = do_alloc(0x00102000);
+    bigmem_mmap_traverse_print(printf);
+
+    if( bigmem_mmap_finalize() !=BIGMEM_MMAP_SUCCESS ) {
+	_bigmem_mmap_error_print( "bigmem_mmap_finalize() failed.\n");
+	exit(1);
+    }
+    bigmem_mmap_traverse_print(printf);
+}
+
+
+
+
+void bigmem_mmap_test_random()
+{
+    unsigned region_start, region_end;
+    unsigned addr_keep[10];
+    unsigned n_addr_keep=0;
+    int cid = bigmem_process_cid();
+    int i,j;
+
+    for(i=0; i<10; i++ ) {
+	region_start = 0x30000000 + ((rand()%3)-1)*0x10000000;
+	region_end   = 0x70000000 + ((rand()%3)-1)*0x10000000;
+
+	printf("\n\n==== test try %d ==============\n", i);
+
+	if( bigmem_mmap_init(region_start, region_end)!=BIGMEM_MMAP_SUCCESS ) {
+	    _bigmem_mmap_error_print( "bigmem_mmap_init() failed.\n");
+	    exit(1);
+	}
+	printf("bigmem_mmap region [%08x,%08x)\n",  get_bigmem_mmap_start(), get_bigmem_mmap_end() );
+
+	n_addr_keep = 0;
+	
+	for( j=0; j< 300; j++ ) {
+	    int size;
+	    unsigned addr;
+	    if( (rand()%133)==0 ) {
+		size = ( (rand()+1)%(region_end-region_start+0x10000000))&0xffff0000;
+	    } else {
+		size = ( (rand()+1)%((region_end-region_start)/0xfff))&0xffff0000;
+	    }
+	    addr = do_alloc(size);
+
+	    if( (j%100)==0 ) {
+		if( _bigmem_mmap_allocatedmemory_no_lock(cid) != bigmem_mmap_section_allocated_bytes[cid] ) {
+		    printf("Found inconsitency: free=%08x bigmem_mmap_section_allocated_bytes=0x%08x  bigmem_mmap_n_sections_allocated=%d iter=%d\n",
+			   _bigmem_mmap_freememory_no_lock(cid), bigmem_mmap_section_allocated_bytes[cid], bigmem_mmap_n_sections_allocated[cid],j );
+		    exit(1);
+		}
+
+		printf("free=%08x bigmem_mmap_section_allocated_bytes=0x%08x  bigmem_mmap_n_sections_allocated=%d iter=%d\n",
+		       _bigmem_mmap_freememory_no_lock(cid),      bigmem_mmap_section_allocated_bytes[cid], bigmem_mmap_n_sections_allocated[cid],j );
+
+	    }
+
+
+	    if( addr != BIGMEM_MMAP_ALLOCATION_FAILURE ) {
+		addr_keep[n_addr_keep++] = addr;
+		if(n_addr_keep>=10) {
+		    int k;
+		    if( (rand()%2)==0 ) {
+			for(k=0;k<(rand()%10);k++ ) {
+			    do_remove(addr_keep[k]);
+			}
+		    }
+		    n_addr_keep=0;
+		}
+	    }
+	}
+
+	printf("_bigmem_mmap_allocatedmemory_no_lock()=0x%08x bigmem_mmap_section_allocated_bytes=0x%08x  bigmem_mmap_n_sections_allocated=%d\n",
+	       _bigmem_mmap_allocatedmemory_no_lock(cid), bigmem_mmap_section_allocated_bytes[cid], bigmem_mmap_n_sections_allocated[cid] );
+
+	printf("@ traverse print\n");
+	bigmem_mmap_traverse_print(printf);
+
+	if( bigmem_mmap_finalize() !=BIGMEM_MMAP_SUCCESS ) {
+	    _bigmem_mmap_error_print( "bigmem_mmap_finalize() failed.\n");
+	    exit(1);
+	}
+
+    }
+    printf("done.\n");
+}
+
+void  bigmem_mmap_test_seq()
+{
+    unsigned region_start, region_end;
+    int i,j;
+
+    for(i=0; i<1; i++ ) {
+	region_start = 0x30000000 + ((rand()%3)-1)*0x10000000;
+	region_end   = 0x70000000 + ((rand()%3)-1)*0x10000000;
+
+	printf("\n==== test try %d ==============\n", i);
+
+	if( bigmem_mmap_init(region_start, region_end)!=BIGMEM_MMAP_SUCCESS ) {
+	    _bigmem_mmap_error_print( "bigmem_mmap_init() failed.\n");
+	    exit(1);
+	}
+	printf("bigmem_mmap region [%08x,%08x)\n",  get_bigmem_mmap_start(), get_bigmem_mmap_end() );
+	for( j=region_start; j<region_end-0x100000;j+=0x100000 ) {
+	    do_alloc(0x100000);
+	}
+	/* assume that alloc function returns the first found memory region */
+	for( j=region_start; j<region_end-0x100000;j+=0x100000 ) {
+	    do_remove(j);
+	}
+	
+	printf("@ traverse print\n");
+	bigmem_mmap_traverse_print(printf);
+
+	if( bigmem_mmap_finalize() !=BIGMEM_MMAP_SUCCESS ) {
+	    _bigmem_mmap_error_print( "bigmem_mmap_finalize() failed.\n");
+	    exit(1);
+	}
+
+    }
+}
+
+
+int main(int argc,char* argv[])
+{
+
+    printf("[random test]\n");
+    bigmem_mmap_test_random();
+
+    printf("[seq test]\n");
+    bigmem_mmap_test_seq();
+    printf("done.\n");
+
+    return 0;
+}
+
+#endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 76b148b..8eaa92e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
 #include <net/ip.h>
 #include <net/netdma.h>
 #include <net/sock.h>
+#include <net/tcp_hiatus.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -292,6 +293,17 @@
 EXPORT_SYMBOL(tcp_memory_allocated);
 
 /*
+ * Statistics about the number of waits in TCP for various reasons
+ */
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+int tcp_hiatus_counts[k_tcp_hiatus_reasons] ;
+EXPORT_SYMBOL(tcp_hiatus_counts) ;
+#endif
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+int tcp_scattergather_frag_limit  ;
+EXPORT_SYMBOL(tcp_scattergather_frag_limit) ;
+#endif
+/*
  * Current number of TCP sockets.
  */
 struct percpu_counter tcp_sockets_allocated;
@@ -306,6 +318,7 @@
 	unsigned int flags;
 };
 
+
 /*
  * Pressure flag: try to collapse.
  * Technical note: it is used by multiple contexts non atomically.
@@ -640,8 +653,13 @@
 {
 	struct sk_buff *skb;
 
+#if defined(CONFIG_BLUEGENE)
+	/* Desire to have the TCP header quadword-aligned.  */
+	size = ALIGN(size, 16);
+#else
 	/* The TCP header must be at least 32-bit aligned.  */
 	size = ALIGN(size, 4);
+#endif
 
 	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
 	if (skb) {
@@ -710,10 +728,18 @@
 
 		i = skb_shinfo(skb)->nr_frags;
 		can_coalesce = skb_can_coalesce(skb, i, page, offset);
+/* #if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) */
+/* 		// Scatter-gather in torus driver not handling well if we have more than one frag */
+/* 		if (!can_coalesce && ((i > tcp_scattergather_frag_limit) || (i >= MAX_SKB_FRAGS))) { */
+/* 			tcp_mark_push(tp, skb); */
+/* 			goto new_segment; */
+/* 		} */
+/* #else */
 		if (!can_coalesce && i >= MAX_SKB_FRAGS) {
 			tcp_mark_push(tp, skb);
 			goto new_segment;
 		}
+/* #endif */
 		if (!sk_wmem_schedule(sk, copy))
 			goto wait_for_memory;
 
@@ -753,8 +779,12 @@
 		continue;
 
 wait_for_sndbuf:
+
+		increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+
+		increment_tcp_hiatus_count(k_tcp_wait_for_memory) ;
 		if (copied)
 			tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
@@ -999,8 +1029,10 @@
 			continue;
 
 wait_for_sndbuf:
+			increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ;
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
+			increment_tcp_hiatus_count(k_tcp_wait_for_memory) ;
 			if (copied)
 				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8..69e77d9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -35,6 +35,7 @@
  */
 
 #include <net/tcp.h>
+#include <net/tcp_hiatus.h>
 
 #include <linux/compiler.h>
 #include <linux/module.h>
@@ -59,6 +60,15 @@
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+int sysctl_tcp_force_nodelay ;
+int sysctl_tcp_permit_cwnd ;
+int sysctl_tcp_max_cwnd = 1000 ;
+EXPORT_SYMBOL(sysctl_tcp_force_nodelay) ;
+EXPORT_SYMBOL(sysctl_tcp_permit_cwnd) ;
+EXPORT_SYMBOL(sysctl_tcp_max_cwnd) ;
+#endif
+
 static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1145,6 +1155,11 @@
 
 	in_flight = tcp_packets_in_flight(tp);
 	cwnd = tp->snd_cwnd;
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+	cwnd =   (cwnd < sysctl_tcp_permit_cwnd)
+	       ? sysctl_tcp_permit_cwnd
+	       : ( ( cwnd > sysctl_tcp_max_cwnd) ? sysctl_tcp_max_cwnd : cwnd ) ;
+#endif
 	if (in_flight < cwnd)
 		return (cwnd - in_flight);
 
@@ -1213,6 +1228,11 @@
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
 		return 1;
 
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+	if (sysctl_tcp_force_nodelay)
+		return 1 ;
+#endif
+
 	return 0;
 }
 
@@ -1508,6 +1528,7 @@
 	return -1;
 }
 
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -1534,6 +1555,7 @@
 		/* Do MTU probing. */
 		result = tcp_mtu_probe(sk);
 		if (!result) {
+			increment_tcp_hiatus_count(k_tcp_defer_mtu_probe) ;
 			return 0;
 		} else if (result > 0) {
 			sent_pkts = 1;
@@ -1548,20 +1570,32 @@
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota)
+			{
+				increment_tcp_hiatus_count(k_tcp_defer_cwnd_quota) ;
 			break;
+			}
 
 		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+			{
+				increment_tcp_hiatus_count(k_tcp_defer_snd_wnd) ;
 			break;
+			}
 
 		if (tso_segs == 1) {
 			if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
 						     (tcp_skb_is_last(sk, skb) ?
 						      nonagle : TCP_NAGLE_PUSH))))
+				{
+					increment_tcp_hiatus_count(k_tcp_defer_nagle) ;
 				break;
+				}
 		} else {
 			if (!push_one && tcp_tso_should_defer(sk, skb))
+				{
+					increment_tcp_hiatus_count(k_tcp_defer_should) ;
 				break;
 		}
+		}
 
 		limit = mss_now;
 		if (tso_segs > 1 && !tcp_urg_mode(tp))
@@ -1570,13 +1604,20 @@
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
+			{
+				increment_tcp_hiatus_count(k_tcp_defer_fragment) ;
 			break;
+			}
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+			{
+				increment_tcp_hiatus_count(k_tcp_launch_failed) ;  /*  e.g. no memory when building TCP header */
 			break;
+			}
 
+		increment_tcp_hiatus_count(k_tcp_launched) ;  /*  Eventually, we didn't 'sleep' it. */
 		/* Advance the send_head.  This one is sent out.
 		 * This call will increment packets_out.
 		 */
diff --git a/net/socket.c b/net/socket.c
index 35dd737..2ed4918 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1384,7 +1384,9 @@
  *	necessary for a listen, and if that works, we mark the socket as
  *	ready for listening.
  */
-
+#if defined(CONFIG_BGP_TORUS)
+extern int sysctl_bgp_torus_backlog_floor ;
+#endif
 SYSCALL_DEFINE2(listen, int, fd, int, backlog)
 {
 	struct socket *sock;
@@ -1396,6 +1398,10 @@
 		somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
 		if ((unsigned)backlog > somaxconn)
 			backlog = somaxconn;
+#if defined(CONFIG_BGP_TORUS)
+/*  Apps (particularly mpich2) sometimes set 'backlog' a long way too small for cloud computing */
+		if(backlog < sysctl_bgp_torus_backlog_floor ) backlog = sysctl_bgp_torus_backlog_floor ;
+#endif
 
 		err = security_socket_listen(sock, backlog);
 		if (!err)
diff --git a/testbuild.sh b/testbuild.sh
new file mode 100644
index 0000000..1bf7086
--- /dev/null
+++ b/testbuild.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+SPIDIR=../zepto-arch-runtime
+ZEPTODIR=../BGP
+
+RAMDISK=$ZEPTODIR/ramdisk/CN/bgp-cn-ramdisk.cpio.gz
+
+if [ ! -d $ZEPTODIR ] ; then 
+    echo "Please checkout the ZeptoOS svn repo, configure and make to create a ramdisk"
+    echo ""
+    echo "For example:"
+    echo '$ cd ../'
+    echo '$ svn svn co https://svn.mcs.anl.gov/repos/ZeptoOS/trunk/BGP'
+    echo '$ cd BGP'
+    echo '$ ./configure ; make'
+    echo '$ cd ../linux-2.6.29.1-BGP # back to this dir'
+    echo ""
+    exit 1
+fi
+
+if [ ! -d $SPIDIR/ ] ; then 
+    echo "Please checkout the Zepto arch runtime git repo, configure and make to create a ramdisk"
+    echo ""
+    echo "For example:"
+    echo '$ cd ../'
+    echo '$ git clone http://git.anl-external.org/bg-linux.repos/zepto-arch-runtime.git'
+    echo '$ cd ../linux-2.6.29.1-BGP # back to this dir'
+    echo ""
+    exit 1
+fi    
+
+
+if [ -f ../build-bgpzepto_defconfig/include/linux/compile.h ] ; then
+    rm ../build-bgpzepto_defconfig/include/linux/compile.h  # to always update timestamp in version
+fi
+
+sh make.sh --ramdisk=$RAMDISK --spi=$SPIDIR/arch/include  
+if [ $? -ne 0 ] ; then
+    exit 1
+fi
+
+
+./zkparam2.py ../build-bgpzepto_defconfig/arch/powerpc/boot/dtbImage.initrd.bgp  zepto_debug=3 zepto_console_output=1 bigmemsize=1024M
+
+# to make sure kernel param
+./zkparam2.py ../build-bgpzepto_defconfig/arch/powerpc/boot/dtbImage.initrd.bgp 
+
+echo ""
+echo "======================================================================"
+echo ""
+echo "Zepto kernel image(w/ ramdisk): ../build-bgpzepto_defconfig/arch/powerpc/boot/dtbImage.initrd.bgp"
+if [ -d /bgsys/argonne-utils/profiles/ ] ; then 
+    echo ""
+    echo "Please configure your kernel profile manually"
+    echo "For example:"
+    echo "$ cp ../build-bgpzepto_defconfig/arch/powerpc/boot/dtbImage.initrd.bgp /bgsys/argonne-utils/profiles/$USER/CNK"
+fi
+echo ""
+echo "======================================================================"
+
+
+echo ""
+echo "done"
+echo ""
+
+
diff --git a/zkparam2.py b/zkparam2.py
new file mode 100644
index 0000000..8b80114
--- /dev/null
+++ b/zkparam2.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+#***************************************************************************
+# ZEPTOOS:zepto-info
+#      This file is part of ZeptoOS: The Small Linux for Big Computers.
+#      See www.mcs.anl.gov/zeptoos for more information.
+# ZEPTOOS:zepto-info
+#
+# ZEPTOOS:zepto-fillin
+#      $Id:  $
+#      ZeptoOS_Version: 2.0
+#      ZeptoOS_Heredity: FOSS_ORIG
+#      ZeptoOS_License: GPL
+# ZEPTOOS:zepto-fillin
+#
+# ZEPTOOS:zepto-gpl
+#       Copyright: Argonne National Laboratory, Department of Energy,
+#                  and UChicago Argonne, LLC.  2004, 2005, 2006, 2007, 2008
+#       ZeptoOS License: GPL
+#  
+#       This software is free.  See the file ZeptoOS/misc/license.GPL
+#       for complete details on your rights to copy, modify, and use this
+#       software.
+# ZEPTOOS:zepto-gpl
+#***************************************************************************
+
+
+import sys, os, re
+
+debug = 0
+
+def main():
+    if len(sys.argv) < 2 :
+        print ""
+        print "Usage: zkparam.py KernelImage [param ...]"
+        print ""
+        print "This tool overwrites additional kernel command line parameters to "
+        print "BG/P 2.6.29 based kernel."
+        print ""
+        print "Avaiable Kernel Parameters: zepto_debug, bigmemsizeMB and zepto_console_output"
+        print "See http://wiki.mcs.anl.gov/zeptoos/index.php/Kernel"
+        print ""
+        sys.exit(1)
+
+    bootargs="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608 rdinit=/sbin/init "
+
+    print "[zkparam2]"
+    print ""
+    print "dt bootagrs:", bootargs, "(arch/powerpc/boot/dts/bgp.dts)"
+    print ""
+
+    kimage = sys.argv[1]
+
+    printonly = 0
+    kparam = ""
+
+    if len(sys.argv) < 3 :
+        printonly = 1
+    else:
+        printonly = 0
+        if len(sys.argv[2]) > 0 :
+            for s in sys.argv[2:]:
+                kparam = kparam + " " + s
+            kparam = kparam + '\0'
+        else:
+            kparam = '\0'
+
+    kparam = bootargs + kparam
+
+    # Find the virtual address and file offset of the data section
+    # Here is an output from readlef
+    # [ 3] __builtin_cmdline PROGBITS        0080ac7c 02ac7c 000200 00  WA  0   0  4
+  
+    cmd = "readelf -S " + kimage
+    try:
+        fp = os.popen(cmd)
+    except:
+        print 'failed to popen(' , cmd , ')'
+        sys.exit(1)
+  
+    re_section = re.compile( ".*\__builtin_cmdline\s+PROGBITS\s+(\S+)\s(\S+)" )
+    data_file_offset = 0
+    data_v_addr = 0
+    for line in fp.readlines():
+        m = re_section.match(line) 
+        if m :
+            data_v_addr  = int( m.group(1) , 16)  # conver a sting as a hex decimal value
+            data_file_offset = int( m.group(2), 16 )
+            break
+
+    if data_file_offset > 0 :
+        if debug > 0:
+            print "data_v_addr=%08x" % data_v_addr
+            print "data_file_offset=%08x" % data_file_offset
+    else:
+        print "Error: could not find ELF section!"
+        sys.exit(1)
+
+    fp.close();
+
+    builtin_cmdline_file_offset =  data_file_offset
+    if debug > 0 :
+        print "builtin_cmdline_file_offset=%08x" % builtin_cmdline_file_offset
+
+    if printonly < 1 : 
+        #
+        # overwrite zcl_cmd_line
+        #
+        fd = os.open( kimage, os.O_RDWR )
+        if fd < 0 :
+            print "Error: failed to open ", kimage
+            sys.exit(1)
+        os.lseek( fd, builtin_cmdline_file_offset, 0)
+        buf = os.write( fd, kparam )
+        os.close(fd)
+    else:
+        #
+        # overwrite builtin_cmdline
+        #
+        fd = os.open( kimage, os.O_RDONLY )
+        if fd < 0 :
+            print "Error: failed to open ", kimage
+            sys.exit(1)
+        os.lseek( fd, builtin_cmdline_file_offset, 0)
+        buf = os.read( fd, 160 )
+        os.close(fd)
+        nullpos=0
+        for i in range(0,160):
+            if buf[i] == '\0' :
+                nullpos = i
+                break
+        str = buf[:nullpos]
+
+        if len(str)==0 :
+            print "No additional kernel command line found!"
+        else:
+            print "Current kernel command line:"
+            print str
+	print "\n"
+
+if __name__ == '__main__':
+    main()
+    sys.exit(0)
+