Merge tag 'pm-config-3.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull CONFIG_PM_RUNTIME elimination from Rafael Wysocki:
 "This removes the last few uses of CONFIG_PM_RUNTIME introduced
  recently and makes that config option finally go away.

  CONFIG_PM will be available directly from the menu now and also it
  will be selected automatically if CONFIG_SUSPEND or CONFIG_HIBERNATION
  is set"

* tag 'pm-config-3.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: Eliminate CONFIG_PM_RUNTIME
  tty: 8250_omap: Replace CONFIG_PM_RUNTIME with CONFIG_PM
  sound: sst-haswell-pcm: Replace CONFIG_PM_RUNTIME with CONFIG_PM
  spi: Replace CONFIG_PM_RUNTIME with CONFIG_PM
diff --git a/.gitignore b/.gitignore
index e213b27..ce57b79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,3 +96,6 @@
 
 # Kconfig presets
 all.config
+
+# Kdevelop4
+*.kdev4
diff --git a/CREDITS b/CREDITS
index c56d8aa..96935df 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1734,14 +1734,14 @@
 S: USA
 
 N: Dave Jones
-E: davej@redhat.com
+E: davej@codemonkey.org.uk
 W: http://www.codemonkey.org.uk
 D: Assorted VIA x86 support.
 D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Fedora kernel maintenance.
+D: Fedora kernel maintenance (2003-2014).
+D: 'Trinity' and similar fuzz testing work.
 D: Misc/Other.
-S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
new file mode 100644
index 0000000..12bc614
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
@@ -0,0 +1,37 @@
+Device-tree bindings for I2C OPAL driver
+----------------------------------------
+
+Most of the device node and properties layout is specific to the firmware and
+used by the firmware itself for configuring the port. From the linux
+perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
+
+Required properties:
+
+- reg: Port-id within a given master
+- compatible: must be "ibm,opal-i2c"
+- ibm,opal-id: Refers to a specific bus and used to identify it when calling
+	       the relevant OPAL functions.
+- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
+		 linux, used by the FW though.
+
+Optional properties:
+- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
+		 port.
+
+The node contains a number of other properties that are used by the FW itself
+and depend on the specific hardware implementation. The example below depicts
+a P8 on-chip bus.
+
+Example:
+
+i2c-bus@0 {
+	reg = <0x0>;
+	bus-frequency = <0x61a80>;
+	compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
+	ibm,opal-id = <0x1>;
+	ibm,port-name = "p8_00000000_e1p0";
+	#address-cells = <0x1>;
+	phandle = <0x10000006>;
+	#size-cells = <0x0>;
+	linux,phandle = <0x10000006>;
+};
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 4472ed2..818518a 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt
@@ -7,11 +7,15 @@
 references, for those references whose compile-time normal intentions are
 usurped at runtime due to buffer overflow or underflow.
 
+You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
+
+	cat /proc/cpuinfo  | grep ' mpx '
+
 For more information, please refer to Intel(R) Architecture Instruction
 Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
 Extensions.
 
-Note: Currently no hardware with MPX ISA is available but it is always
+Note: As of December 2014, no hardware with MPX is available but it is
 possible to use SDE (Intel(R) Software Development Emulator) instead, which
 can be downloaded from
 http://software.intel.com/en-us/articles/intel-software-development-emulator
@@ -30,9 +34,15 @@
    instrumentation as well as some setup code called early after the app
    starts. New instruction prefixes are noops for old CPUs.
 2) That setup code allocates (virtual) space for the "bounds directory",
-   points the "bndcfgu" register to the directory and notifies the kernel
-   (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using
-   MPX.
+   points the "bndcfgu" register to the directory (must also set the valid
+   bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT))
+   that the app will be using MPX.  The app must be careful not to access
+   the bounds tables between the time when it populates "bndcfgu" and
+   when it calls the prctl().  This might be hard to guarantee if the app
+   is compiled with MPX.  You can add "__attribute__((bnd_legacy))" to
+   the function to disable MPX instrumentation to help guarantee this.
+   Also be careful not to call out to any other code which might be
+   MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
    expected to keep the bounds directory at that locationWe note it
diff --git a/Makefile b/Makefile
index fd80c6e..fa9604d 100644
--- a/Makefile
+++ b/Makefile
@@ -481,9 +481,10 @@
 # of make so .config is not included in this case either (for *config).
 
 version_h := include/generated/uapi/linux/version.h
+old_version_h := include/linux/version.h
 
 no-dot-config-targets := clean mrproper distclean \
-			 cscope gtags TAGS tags help %docs check% coccicheck \
+			 cscope gtags TAGS tags help% %docs check% coccicheck \
 			 $(version_h) headers_% archheaders archscripts \
 			 kernelversion %src-pkg
 
@@ -1005,6 +1006,7 @@
 
 $(version_h): $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
+	$(Q)rm -f $(old_version_h)
 
 include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
@@ -1036,8 +1038,6 @@
 #Default location for installed headers
 export INSTALL_HDR_PATH = $(objtree)/usr
 
-hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
-
 # If we do an all arch process set dst to asm-$(hdr-arch)
 hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm)
 
@@ -1175,7 +1175,7 @@
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
 		  signing_key.priv signing_key.x509 x509.genkey		\
 		  extra_certificates signing_key.x509.keyid		\
-		  signing_key.x509.signer include/linux/version.h
+		  signing_key.x509.signer
 
 # clean - Delete most, but leave enough to build external modules
 #
@@ -1235,7 +1235,7 @@
 # ---------------------------------------------------------------------------
 
 boards := $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*_defconfig)
-boards := $(notdir $(boards))
+boards := $(sort $(notdir $(boards)))
 board-dirs := $(dir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*/*_defconfig))
 board-dirs := $(sort $(notdir $(board-dirs:/=)))
 
@@ -1326,7 +1326,7 @@
 
 help-boards: $(help-board-dirs)
 
-boards-per-dir = $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig))
+boards-per-dir = $(sort $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig)))
 
 $(help-board-dirs): help-%:
 	@echo  'Architecture specific targets ($(SRCARCH) $*):'
@@ -1581,11 +1581,6 @@
   include $(cmd_files)
 endif
 
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
-
 endif	# skip-makefile
 
 PHONY += FORCE
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b0a608d..b964c66 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -30,8 +30,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -187,13 +186,14 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
 
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -369,11 +369,10 @@
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -521,3 +520,4 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 15a9ed1..4fc16b4 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -108,6 +108,7 @@
 	select MTD_JEDECPROBE
 	select MTD_BLOCK
 	select MTD_COMPLEX_MAPPINGS
+	select MTD_MTDRAM
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use
 	  flash memories.  If unsure, say Y.
@@ -358,13 +359,6 @@
 	default MMC
 	select SPI
 	select MMC_SPI
-	select ETRAX_SPI_MMC_BOARD
-
-# For the parts that can't be a module (due to restrictions in
-# framework elsewhere).
-config ETRAX_SPI_MMC_BOARD
-       boolean
-       default n
 
 # While the board info is MMC_SPI only, the drivers are written to be
 # independent of MMC_SPI, so we'll keep SPI non-dependent on the
diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
index 39aa3c1..15fbfef 100644
--- a/arch/cris/arch-v32/drivers/Makefile
+++ b/arch/cris/arch-v32/drivers/Makefile
@@ -10,4 +10,3 @@
 obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 obj-$(CONFIG_PCI)			+= pci/
-obj-$(CONFIG_ETRAX_SPI_MMC_BOARD)	+= board_mmcspi.o
diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h
index c073cf4..d9cc856 100644
--- a/arch/cris/arch-v32/drivers/i2c.h
+++ b/arch/cris/arch-v32/drivers/i2c.h
@@ -2,7 +2,6 @@
 #include <linux/init.h>
 
 /* High level I2C actions */
-int __init i2c_init(void);
 int i2c_write(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_read(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue);
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 5a14913..08a313f 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -1,8 +1,7 @@
 /*
- * Simple synchronous serial port driver for ETRAX FS and Artpec-3.
+ * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3.
  *
- * Copyright (c) 2005 Axis Communications AB
- *
+ * Copyright (c) 2005, 2008 Axis Communications AB
  * Author: Mikael Starvik
  *
  */
@@ -16,16 +15,17 @@
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
 #include <linux/wait.h>
 
 #include <asm/io.h>
-#include <dma.h>
+#include <mach/dma.h>
 #include <pinmux.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/sser_defs.h>
+#include <hwregs/timer_defs.h>
 #include <hwregs/dma_defs.h>
 #include <hwregs/dma.h>
 #include <hwregs/intr_vect_defs.h>
@@ -59,22 +59,23 @@
 /* the rest of the data pointed out by Descr1 and set readp to the start */
 /* of Descr2                                                             */
 
-#define SYNC_SERIAL_MAJOR 125
-
 /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */
 /* words can be handled */
-#define IN_BUFFER_SIZE 12288
-#define IN_DESCR_SIZE 256
-#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE)
+#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE
+#define NBR_IN_DESCR (8*6)
+#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR)
 
-#define OUT_BUFFER_SIZE 1024*8
 #define NBR_OUT_DESCR 8
+#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR)
 
 #define DEFAULT_FRAME_RATE 0
 #define DEFAULT_WORD_RATE 7
 
+/* To be removed when we move to pure udev. */
+#define SYNC_SERIAL_MAJOR 125
+
 /* NOTE: Enabling some debug will likely cause overrun or underrun,
- * especially if manual mode is use.
+ * especially if manual mode is used.
  */
 #define DEBUG(x)
 #define DEBUGREAD(x)
@@ -85,11 +86,28 @@
 #define DEBUGTRDMA(x)
 #define DEBUGOUTBUF(x)
 
-typedef struct sync_port
-{
-	reg_scope_instances regi_sser;
-	reg_scope_instances regi_dmain;
-	reg_scope_instances regi_dmaout;
+enum syncser_irq_setup {
+	no_irq_setup = 0,
+	dma_irq_setup = 1,
+	manual_irq_setup = 2,
+};
+
+struct sync_port {
+	unsigned long regi_sser;
+	unsigned long regi_dmain;
+	unsigned long regi_dmaout;
+
+	/* Interrupt vectors. */
+	unsigned long dma_in_intr_vect; /* Used for DMA in. */
+	unsigned long dma_out_intr_vect; /* Used for DMA out. */
+	unsigned long syncser_intr_vect; /* Used when no DMA. */
+
+	/* DMA number for in and out. */
+	unsigned int dma_in_nbr;
+	unsigned int dma_out_nbr;
+
+	/* DMA owner. */
+	enum dma_owner req_dma;
 
 	char started; /* 1 if port has been started */
 	char port_nbr; /* Port 0 or 1 */
@@ -99,22 +117,29 @@
 	char use_dma;  /* 1 if port uses dma */
 	char tr_running;
 
-	char init_irqs;
+	enum syncser_irq_setup init_irqs;
 	int output;
 	int input;
 
 	/* Next byte to be read by application */
-	volatile unsigned char *volatile readp;
+	unsigned char *readp;
 	/* Next byte to be written by etrax */
-	volatile unsigned char *volatile writep;
+	unsigned char *writep;
 
 	unsigned int in_buffer_size;
+	unsigned int in_buffer_len;
 	unsigned int inbufchunk;
-	unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32)));
-	unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	struct dma_descr_data* next_rx_desc;
-	struct dma_descr_data* prev_rx_desc;
+	/* Data buffers for in and output. */
+	unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32);
+	unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32);
+	unsigned char flip[IN_BUFFER_SIZE] __aligned(32);
+	struct timespec timestamp[NBR_IN_DESCR];
+	struct dma_descr_data *next_rx_desc;
+	struct dma_descr_data *prev_rx_desc;
+
+	struct timeval last_timestamp;
+	int read_ts_idx;
+	int write_ts_idx;
 
 	/* Pointer to the first available descriptor in the ring,
 	 * unless active_tr_descr == catch_tr_descr and a dma
@@ -135,114 +160,138 @@
 	/* Number of bytes currently locked for being read by DMA */
 	int out_buf_count;
 
-	dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16)));
-	dma_descr_context in_context __attribute__ ((__aligned__(32)));
-	dma_descr_data out_descr[NBR_OUT_DESCR]
-		__attribute__ ((__aligned__(16)));
-	dma_descr_context out_context __attribute__ ((__aligned__(32)));
+	dma_descr_context in_context __aligned(32);
+	dma_descr_context out_context __aligned(32);
+	dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16);
+	dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16);
+
 	wait_queue_head_t out_wait_q;
 	wait_queue_head_t in_wait_q;
 
 	spinlock_t lock;
-} sync_port;
+};
 
 static DEFINE_MUTEX(sync_serial_mutex);
 static int etrax_sync_serial_init(void);
 static void initialize_port(int portnbr);
 static inline int sync_data_avail(struct sync_port *port);
 
-static int sync_serial_open(struct inode *, struct file*);
-static int sync_serial_release(struct inode*, struct file*);
+static int sync_serial_open(struct inode *, struct file *);
+static int sync_serial_release(struct inode *, struct file *);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *,
-			     unsigned int cmd, unsigned long arg);
-static ssize_t sync_serial_write(struct file * file, const char * buf,
+static long sync_serial_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg);
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos);
-static ssize_t sync_serial_read(struct file *file, char *buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos);
 
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
+#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
+	(defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)))
 #define SYNC_SER_DMA
+#else
+#define SYNC_SER_MANUAL
 #endif
 
-static void send_word(sync_port* port);
-static void start_dma_out(struct sync_port *port, const char *data, int count);
-static void start_dma_in(sync_port* port);
 #ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count);
+static void start_dma_in(struct sync_port *port);
 static irqreturn_t tr_interrupt(int irq, void *dev_id);
 static irqreturn_t rx_interrupt(int irq, void *dev_id);
 #endif
-
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
-#define SYNC_SER_MANUAL
-#endif
 #ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port);
 static irqreturn_t manual_interrupt(int irq, void *dev_id);
 #endif
 
-#ifdef CONFIG_ETRAXFS	/* ETRAX FS */
-#define OUT_DMA_NBR 4
-#define IN_DMA_NBR 5
-#define PINMUX_SSER pinmux_sser0
-#define SYNCSER_INST regi_sser0
-#define SYNCSER_INTR_VECT SSER0_INTR_VECT
-#define OUT_DMA_INST regi_dma4
-#define IN_DMA_INST regi_dma5
-#define DMA_OUT_INTR_VECT DMA4_INTR_VECT
-#define DMA_IN_INTR_VECT DMA5_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser0
-#else			/* Artpec-3 */
-#define OUT_DMA_NBR 6
-#define IN_DMA_NBR 7
-#define PINMUX_SSER pinmux_sser
-#define SYNCSER_INST regi_sser
-#define SYNCSER_INTR_VECT SSER_INTR_VECT
-#define OUT_DMA_INST regi_dma6
-#define IN_DMA_INST regi_dma7
-#define DMA_OUT_INTR_VECT DMA6_INTR_VECT
-#define DMA_IN_INTR_VECT DMA7_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser
+#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed
+#define artpec_request_dma crisv32_request_dma
+#define artpec_free_dma crisv32_free_dma
+
+#ifdef CONFIG_ETRAXFS
+/* ETRAX FS */
+#define DMA_OUT_NBR0		SYNC_SER0_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER0_RX_DMA_NBR
+#define DMA_OUT_NBR1		SYNC_SER1_TX_DMA_NBR
+#define DMA_IN_NBR1		SYNC_SER1_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser0
+#define PINMUX_SSER1		pinmux_sser1
+#define SYNCSER_INST0		regi_sser0
+#define SYNCSER_INST1		regi_sser1
+#define SYNCSER_INTR_VECT0	SSER0_INTR_VECT
+#define SYNCSER_INTR_VECT1	SSER1_INTR_VECT
+#define OUT_DMA_INST0		regi_dma4
+#define IN_DMA_INST0		regi_dma5
+#define DMA_OUT_INTR_VECT0	DMA4_INTR_VECT
+#define DMA_OUT_INTR_VECT1	DMA7_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA5_INTR_VECT
+#define DMA_IN_INTR_VECT1	DMA6_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser0
+#define REQ_DMA_SYNCSER1	dma_sser1
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
+#define PORT1_DMA 1
+#else
+#define PORT1_DMA 0
+#endif
+#elif defined(CONFIG_CRIS_MACH_ARTPEC3)
+/* ARTPEC-3 */
+#define DMA_OUT_NBR0		SYNC_SER_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser
+#define SYNCSER_INST0		regi_sser
+#define SYNCSER_INTR_VECT0	SSER_INTR_VECT
+#define OUT_DMA_INST0		regi_dma6
+#define IN_DMA_INST0		regi_dma7
+#define DMA_OUT_INTR_VECT0	DMA6_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA7_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser
+#define REQ_DMA_SYNCSER1	dma_sser
+#endif
+
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
+#define PORT0_DMA 1
+#else
+#define PORT0_DMA 0
 #endif
 
 /* The ports */
-static struct sync_port ports[]=
-{
+static struct sync_port ports[] = {
 	{
-		.regi_sser             = SYNCSER_INST,
-		.regi_dmaout           = OUT_DMA_INST,
-		.regi_dmain            = IN_DMA_INST,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST0,
+		.regi_dmaout		= OUT_DMA_INST0,
+		.regi_dmain		= IN_DMA_INST0,
+		.use_dma		= PORT0_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT0,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT0,
+		.dma_in_nbr		= DMA_IN_NBR0,
+		.dma_out_nbr		= DMA_OUT_NBR0,
+		.req_dma		= REQ_DMA_SYNCSER0,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT0,
+	},
 #ifdef CONFIG_ETRAXFS
-	,
-
 	{
-		.regi_sser             = regi_sser1,
-		.regi_dmaout           = regi_dma6,
-		.regi_dmain            = regi_dma7,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST1,
+		.regi_dmaout		= regi_dma6,
+		.regi_dmain		= regi_dma7,
+		.use_dma		= PORT1_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT1,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT1,
+		.dma_in_nbr		= DMA_IN_NBR1,
+		.dma_out_nbr		= DMA_OUT_NBR1,
+		.req_dma		= REQ_DMA_SYNCSER1,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT1,
+	},
 #endif
 };
 
 #define NBR_PORTS ARRAY_SIZE(ports)
 
-static const struct file_operations sync_serial_fops = {
+static const struct file_operations syncser_fops = {
 	.owner		= THIS_MODULE,
 	.write		= sync_serial_write,
 	.read		= sync_serial_read,
@@ -253,61 +302,40 @@
 	.llseek		= noop_llseek,
 };
 
-static int __init etrax_sync_serial_init(void)
+static dev_t syncser_first;
+static int minor_count = NBR_PORTS;
+#define SYNCSER_NAME "syncser"
+static struct cdev *syncser_cdev;
+static struct class *syncser_class;
+
+static void sync_serial_start_port(struct sync_port *port)
 {
-	ports[0].enabled = 0;
-#ifdef CONFIG_ETRAXFS
-	ports[1].enabled = 0;
-#endif
-	if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial",
-			&sync_serial_fops) < 0) {
-		printk(KERN_WARNING
-			"Unable to get major for synchronous serial port\n");
-		return -EBUSY;
-	}
-
-	/* Initialize Ports */
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
-	if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[0].enabled = 1;
-	initialize_port(0);
-#endif
-
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
-	if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[1].enabled = 1;
-	initialize_port(1);
-#endif
-
-#ifdef CONFIG_ETRAXFS
-	printk(KERN_INFO "ETRAX FS synchronous serial port driver\n");
-#else
-	printk(KERN_INFO "Artpec-3 synchronous serial port driver\n");
-#endif
-	return 0;
+	reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	reg_sser_rw_tr_cfg tr_cfg =
+		REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_rec_cfg rec_cfg =
+		REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	cfg.en = regk_sser_yes;
+	tr_cfg.tr_en = regk_sser_yes;
+	rec_cfg.rec_en = regk_sser_yes;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	port->started = 1;
 }
 
 static void __init initialize_port(int portnbr)
 {
-	int __attribute__((unused)) i;
 	struct sync_port *port = &ports[portnbr];
-	reg_sser_rw_cfg cfg = {0};
-	reg_sser_rw_frm_cfg frm_cfg = {0};
-	reg_sser_rw_tr_cfg tr_cfg = {0};
-	reg_sser_rw_rec_cfg rec_cfg = {0};
+	reg_sser_rw_cfg cfg = { 0 };
+	reg_sser_rw_frm_cfg frm_cfg = { 0 };
+	reg_sser_rw_tr_cfg tr_cfg = { 0 };
+	reg_sser_rw_rec_cfg rec_cfg = { 0 };
 
-	DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr));
+	DEBUG(pr_info("Init sync serial port %d\n", portnbr));
 
 	port->port_nbr = portnbr;
-	port->init_irqs = 1;
+	port->init_irqs = no_irq_setup;
 
 	port->out_rd_ptr = port->out_buffer;
 	port->out_buf_count = 0;
@@ -318,10 +346,11 @@
 	port->readp = port->flip;
 	port->writep = port->flip;
 	port->in_buffer_size = IN_BUFFER_SIZE;
+	port->in_buffer_len = 0;
 	port->inbufchunk = IN_DESCR_SIZE;
-	port->next_rx_desc = &port->in_descr[0];
-	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1];
-	port->prev_rx_desc->eol = 1;
+
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
 	init_waitqueue_head(&port->out_wait_q);
 	init_waitqueue_head(&port->in_wait_q);
@@ -368,14 +397,18 @@
 	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
 
 #ifdef SYNC_SER_DMA
-	/* Setup the descriptor ring for dma out/transmit. */
-	for (i = 0; i < NBR_OUT_DESCR; i++) {
-		port->out_descr[i].wait = 0;
-		port->out_descr[i].intr = 1;
-		port->out_descr[i].eol = 0;
-		port->out_descr[i].out_eop = 0;
-		port->out_descr[i].next =
-			(dma_descr_data *)virt_to_phys(&port->out_descr[i+1]);
+	{
+		int i;
+		/* Setup the descriptor ring for dma out/transmit. */
+		for (i = 0; i < NBR_OUT_DESCR; i++) {
+			dma_descr_data *descr = &port->out_descr[i];
+			descr->wait = 0;
+			descr->intr = 1;
+			descr->eol = 0;
+			descr->out_eop = 0;
+			descr->next =
+				(dma_descr_data *)virt_to_phys(&descr[i+1]);
+		}
 	}
 
 	/* Create a ring from the list. */
@@ -391,201 +424,116 @@
 
 static inline int sync_data_avail(struct sync_port *port)
 {
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----      -    -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->in_buffer_size - (start - end);
-	return avail;
-}
-
-static inline int sync_data_avail_to_end(struct sync_port *port)
-{
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----           -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-	return avail;
+	return port->in_buffer_len;
 }
 
 static int sync_serial_open(struct inode *inode, struct file *file)
 {
+	int ret = 0;
 	int dev = iminor(inode);
-	int ret = -EBUSY;
-	sync_port *port;
-	reg_dma_rw_cfg cfg = {.en = regk_dma_yes};
-	reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes};
+	struct sync_port *port;
+#ifdef SYNC_SER_DMA
+	reg_dma_rw_cfg cfg = { .en = regk_dma_yes };
+	reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes };
+#endif
 
-	mutex_lock(&sync_serial_mutex);
-	DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev));
+	DEBUG(pr_debug("Open sync serial port %d\n", dev));
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev));
-		ret = -ENODEV;
-		goto out;
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
 	}
 	port = &ports[dev];
 	/* Allow open this device twice (assuming one reader and one writer) */
-	if (port->busy == 2)
-	{
-		DEBUG(printk(KERN_DEBUG "Device is busy.. \n"));
-		goto out;
+	if (port->busy == 2) {
+		DEBUG(pr_info("syncser%d is busy\n", dev));
+		return -EBUSY;
 	}
 
+	mutex_lock(&sync_serial_mutex);
 
-	if (port->init_irqs) {
-		if (port->use_dma) {
-			if (port == &ports[0]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA_OUT_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 0 dma tr",
-						&ports[0])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (request_irq(DMA_IN_INTR_VECT,
-						rx_interrupt,
-						0,
-						"synchronous serial 1 dma rx",
-						&ports[0])) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(OUT_DMA_NBR,
-						"synchronous serial 0 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(IN_DMA_NBR,
-						"synchronous serial 0 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					crisv32_free_dma(OUT_DMA_NBR);
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA6_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 1 dma tr",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ");
-					goto out;
-				} else if (request_irq(DMA7_INTR_VECT,
-						       rx_interrupt,
-						       0,
-						       "synchronous serial 1 dma rx",
-						       &ports[1])) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_TX_DMA_NBR,
-						"synchronous serial 1 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_RX_DMA_NBR,
-						"synchronous serial 3 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					crisv32_free_dma(SYNC_SER1_TX_DMA_NBR);
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#endif
-                        /* Enable DMAs */
-			REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
-			REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
-			/* Enable DMA IRQs */
-			REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
-			REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
-			/* Set up wordsize = 1 for DMAs. */
-			DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1);
-			DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1);
+	/* Clear any stale date left in the flip buffer */
+	port->readp = port->writep = port->flip;
+	port->in_buffer_len = 0;
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
-			start_dma_in(port);
-			port->init_irqs = 0;
-		} else { /* !port->use_dma */
-#ifdef SYNC_SER_MANUAL
-			if (port == &ports[0]) {
-				if (request_irq(SYNCSER_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[0])) {
-					printk("Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-				if (request_irq(SSER1_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#endif
-			port->init_irqs = 0;
-#else
-			panic("sync_serial: Manual mode not supported.\n");
-#endif /* SYNC_SER_MANUAL */
+	if (port->init_irqs != no_irq_setup) {
+		/* Init only on first call. */
+		port->busy++;
+		mutex_unlock(&sync_serial_mutex);
+		return 0;
+	}
+	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
+		const char *tmp;
+		DEBUG(pr_info("Using DMA for syncser%d\n", dev));
+
+		tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx";
+		if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0,
+				tmp, port)) {
+			pr_err("Can't alloc syncser%d TX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
 		}
+		if (artpec_request_dma(port->dma_out_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d TX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx";
+		if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0,
+				tmp, port)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_in_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		/* Enable DMAs */
+		REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
+		REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
+		/* Enable DMA IRQs */
+		REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
+		REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
+		/* Set up wordsize = 1 for DMAs. */
+		DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1);
+		DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1);
 
-	} /* port->init_irqs */
-
+		start_dma_in(port);
+		port->init_irqs = dma_irq_setup;
+#endif
+	} else { /* !port->use_dma */
+#ifdef SYNC_SER_MANUAL
+		const char *tmp = dev == 0 ? "syncser0 manual irq" :
+					     "syncser1 manual irq";
+		if (request_irq(port->syncser_intr_vect, manual_interrupt,
+				0, tmp, port)) {
+			pr_err("Can't alloc syncser%d manual irq",
+				dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		port->init_irqs = manual_irq_setup;
+#else
+		panic("sync_serial: Manual mode not supported\n");
+#endif /* SYNC_SER_MANUAL */
+	}
 	port->busy++;
 	ret = 0;
-out:
+
+unlock_and_exit:
 	mutex_unlock(&sync_serial_mutex);
 	return ret;
 }
@@ -593,18 +541,17 @@
 static int sync_serial_release(struct inode *inode, struct file *file)
 {
 	int dev = iminor(inode);
-	sync_port *port;
+	struct sync_port *port;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
 	if (port->busy)
 		port->busy--;
 	if (!port->busy)
-          /* XXX */ ;
+		/* XXX */;
 	return 0;
 }
 
@@ -612,21 +559,15 @@
 {
 	int dev = iminor(file_inode(file));
 	unsigned int mask = 0;
-	sync_port *port;
-	DEBUGPOLL( static unsigned int prev_mask = 0; );
+	struct sync_port *port;
+	DEBUGPOLL(
+	static unsigned int prev_mask;
+	);
 
 	port = &ports[dev];
 
-	if (!port->started) {
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg =
-			REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		rec_cfg.rec_en = port->input;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
+	if (!port->started)
+		sync_serial_start_port(port);
 
 	poll_wait(file, &port->out_wait_q, wait);
 	poll_wait(file, &port->in_wait_q, wait);
@@ -645,33 +586,175 @@
 	if (port->input && sync_data_avail(port) >= port->inbufchunk)
 		mask |= POLLIN | POLLRDNORM;
 
-	DEBUGPOLL(if (mask != prev_mask)
-	      printk("sync_serial_poll: mask 0x%08X %s %s\n", mask,
-		     mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":"");
-	      prev_mask = mask;
-	      );
+	DEBUGPOLL(
+	if (mask != prev_mask)
+		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
+			mask,
+			mask & POLLOUT ? "POLLOUT" : "",
+			mask & POLLIN ? "POLLIN" : "");
+		prev_mask = mask;
+	);
 	return mask;
 }
 
-static int sync_serial_ioctl(struct file *file,
-		  unsigned int cmd, unsigned long arg)
+static ssize_t __sync_serial_read(struct file *file,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos,
+				  struct timespec *ts)
+{
+	unsigned long flags;
+	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int avail;
+	struct sync_port *port;
+	unsigned char *start;
+	unsigned char *end;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	/* Calculate number of available bytes */
+	/* Save pointers to avoid that they are modified by interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	start = port->readp;
+	end = port->writep;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while ((start == end) && !port->in_buffer_len) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		wait_event_interruptible(port->in_wait_q,
+					 !(start == end && !port->full));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		spin_lock_irqsave(&port->lock, flags);
+		start = port->readp;
+		end = port->writep;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n",
+			  dev, count,
+			  start - port->flip, end - port->flip,
+			  port->in_buffer_size));
+
+	/* Lazy read, never return wrapped data. */
+	if (end > start)
+		avail = end - start;
+	else
+		avail = port->flip + port->in_buffer_size - start;
+
+	count = count > avail ? avail : count;
+	if (copy_to_user(buf, start, count))
+		return -EFAULT;
+
+	/* If timestamp requested, find timestamp of first returned byte
+	 * and copy it.
+	 * N.B: Applications that request timstamps MUST read data in
+	 * chunks that are multiples of IN_DESCR_SIZE.
+	 * Otherwise the timestamps will not be aligned to the data read.
+	 */
+	if (ts != NULL) {
+		int idx = port->read_ts_idx;
+		memcpy(ts, &port->timestamp[idx], sizeof(struct timespec));
+		port->read_ts_idx += count / IN_DESCR_SIZE;
+		if (port->read_ts_idx >= NBR_IN_DESCR)
+			port->read_ts_idx = 0;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->readp += count;
+	/* Check for wrap */
+	if (port->readp >= port->flip + port->in_buffer_size)
+		port->readp = port->flip;
+	port->in_buffer_len -= count;
+	port->full = 0;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	DEBUGREAD(pr_info("r %d\n", count));
+
+	return count;
+}
+
+static ssize_t sync_serial_input(struct file *file, unsigned long arg)
+{
+	struct ssp_request req;
+	int count;
+	int ret;
+
+	/* Copy the request structure from user-mode. */
+	ret = copy_from_user(&req, (struct ssp_request __user *)arg,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("sync_serial_input copy from user failed\n"));
+		return -EFAULT;
+	}
+
+	/* To get the timestamps aligned, make sure that 'len'
+	 * is a multiple of IN_DESCR_SIZE.
+	 */
+	if ((req.len % IN_DESCR_SIZE) != 0) {
+		DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n",
+			      req.len, IN_DESCR_SIZE));
+		return -EFAULT;
+	}
+
+	/* Do the actual read. */
+	/* Note that req.buf is actually a pointer to user space. */
+	count = __sync_serial_read(file, req.buf, req.len,
+				   NULL, &req.ts);
+
+	if (count < 0) {
+		DEBUG(pr_info("sync_serial_input read failed\n"));
+		return count;
+	}
+
+	/* Copy the request back to user-mode. */
+	ret = copy_to_user((struct ssp_request __user *)arg, &req,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("syncser input copy2user failed\n"));
+		return -EFAULT;
+	}
+
+	/* Return the number of bytes read. */
+	return count;
+}
+
+
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg)
 {
 	int return_val = 0;
 	int dma_w_size = regk_dma_set_w_size1;
 	int dev = iminor(file_inode(file));
-	sync_port *port;
+	struct sync_port *port;
 	reg_sser_rw_tr_cfg tr_cfg;
 	reg_sser_rw_rec_cfg rec_cfg;
 	reg_sser_rw_frm_cfg frm_cfg;
 	reg_sser_rw_cfg gen_cfg;
 	reg_sser_rw_intr_mask intr_mask;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -1;
 	}
-        port = &ports[dev];
+
+	if (cmd == SSP_INPUT)
+		return sync_serial_input(file, arg);
+
+	port = &ports[dev];
 	spin_lock_irq(&port->lock);
 
 	tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
@@ -680,11 +763,9 @@
 	gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg);
 	intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 
-	switch(cmd)
-	{
+	switch (cmd) {
 	case SSP_SPEED:
-		if (GET_SPEED(arg) == CODEC)
-		{
+		if (GET_SPEED(arg) == CODEC) {
 			unsigned int freq;
 
 			gen_cfg.base_freq = regk_sser_f32;
@@ -701,15 +782,25 @@
 			case FREQ_256kHz:
 				gen_cfg.clk_div = 125 *
 					(1 << (freq - FREQ_256kHz)) - 1;
-			break;
+				break;
 			case FREQ_512kHz:
 				gen_cfg.clk_div = 62;
-			break;
+				break;
 			case FREQ_1MHz:
 			case FREQ_2MHz:
 			case FREQ_4MHz:
 				gen_cfg.clk_div = 8 * (1 << freq) - 1;
-			break;
+				break;
+			}
+		} else if (GET_SPEED(arg) == CODEC_f32768) {
+			gen_cfg.base_freq = regk_sser_f32_768;
+			switch (GET_FREQ(arg)) {
+			case FREQ_4096kHz:
+				gen_cfg.clk_div = 7;
+				break;
+			default:
+				spin_unlock_irq(&port->lock);
+				return -EINVAL;
 			}
 		} else {
 			gen_cfg.base_freq = regk_sser_f29_493;
@@ -767,62 +858,64 @@
 
 		break;
 	case SSP_MODE:
-		switch(arg)
-		{
-			case MASTER_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.out_on = regk_sser_tr;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			default:
-				spin_unlock_irq(&port->lock);
-				return -EINVAL;
+		switch (arg) {
+		case MASTER_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.out_on = regk_sser_tr;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		default:
+			spin_unlock_irq(&port->lock);
+			return -EINVAL;
 		}
-		if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT))
+		if (!port->use_dma || arg == MASTER_OUTPUT ||
+				arg == SLAVE_OUTPUT)
 			intr_mask.rdav = regk_sser_yes;
 		break;
 	case SSP_FRAME_SYNC:
 		if (arg & NORMAL_SYNC) {
 			frm_cfg.rec_delay = 1;
 			frm_cfg.tr_delay = 1;
-		}
-		else if (arg & EARLY_SYNC)
+		} else if (arg & EARLY_SYNC)
 			frm_cfg.rec_delay = frm_cfg.tr_delay = 0;
-		else if (arg & SECOND_WORD_SYNC) {
+		else if (arg & LATE_SYNC) {
+			frm_cfg.tr_delay = 2;
+			frm_cfg.rec_delay = 2;
+		} else if (arg & SECOND_WORD_SYNC) {
 			frm_cfg.rec_delay = 7;
 			frm_cfg.tr_delay = 1;
 		}
@@ -914,15 +1007,12 @@
 		frm_cfg.type = regk_sser_level;
 		frm_cfg.tr_delay = 1;
 		frm_cfg.level = regk_sser_neg_lo;
-		if (arg & SPI_SLAVE)
-		{
+		if (arg & SPI_SLAVE) {
 			rec_cfg.clk_pol = regk_sser_neg;
 			gen_cfg.clk_dir = regk_sser_in;
 			port->input = 1;
 			port->output = 0;
-		}
-		else
-		{
+		} else {
 			gen_cfg.out_clk_pol = regk_sser_pos;
 			port->input = 0;
 			port->output = 1;
@@ -965,19 +1055,19 @@
 }
 
 static long sync_serial_ioctl(struct file *file,
-                             unsigned int cmd, unsigned long arg)
+		unsigned int cmd, unsigned long arg)
 {
-       long ret;
+	long ret;
 
-       mutex_lock(&sync_serial_mutex);
-       ret = sync_serial_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&sync_serial_mutex);
+	mutex_lock(&sync_serial_mutex);
+	ret = sync_serial_ioctl_unlocked(file, cmd, arg);
+	mutex_unlock(&sync_serial_mutex);
 
-       return ret;
+	return ret;
 }
 
 /* NOTE: sync_serial_write does not support concurrency */
-static ssize_t sync_serial_write(struct file *file, const char *buf,
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	int dev = iminor(file_inode(file));
@@ -993,7 +1083,7 @@
 	unsigned char *buf_stop_ptr; /* Last byte + 1 */
 
 	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
-		DEBUG(printk("Invalid minor %d\n", dev));
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
@@ -1006,9 +1096,9 @@
 	 * |_________|___________________|________________________|
 	 *           ^ rd_ptr            ^ wr_ptr
 	 */
-	DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n",
-			  port->port_nbr, count, port->active_tr_descr,
-			  port->catch_tr_descr));
+	DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n",
+			   port->port_nbr, count, port->active_tr_descr,
+			   port->catch_tr_descr));
 
 	/* Read variables that may be updated by interrupts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -1020,7 +1110,7 @@
 	if (port->tr_running &&
 	    ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) ||
 	     out_buf_count >= OUT_BUFFER_SIZE)) {
-		DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev));
+		DEBUGWRITE(pr_info("sser%d full\n", dev));
 		return -EAGAIN;
 	}
 
@@ -1043,15 +1133,16 @@
 	if (copy_from_user(wr_ptr, buf, trunc_count))
 		return -EFAULT;
 
-	DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d     %p %p %p\n",
-			   out_buf_count, trunc_count,
-			   port->out_buf_count, port->out_buffer,
-			   wr_ptr, buf_stop_ptr));
+	DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d     %p %p %p\n",
+			    out_buf_count, trunc_count,
+			    port->out_buf_count, port->out_buffer,
+			    wr_ptr, buf_stop_ptr));
 
 	/* Make sure transmitter/receiver is running */
 	if (!port->started) {
 		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
+		reg_sser_rw_rec_cfg rec_cfg =
+			REG_RD(sser, port->regi_sser, rw_rec_cfg);
 		cfg.en = regk_sser_yes;
 		rec_cfg.rec_en = port->input;
 		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
@@ -1068,8 +1159,11 @@
 	spin_lock_irqsave(&port->lock, flags);
 	port->out_buf_count += trunc_count;
 	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
 		start_dma_out(port, wr_ptr, trunc_count);
+#endif
 	} else if (!port->tr_running) {
+#ifdef SYNC_SER_MANUAL
 		reg_sser_rw_intr_mask intr_mask;
 		intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 		/* Start sender by writing data */
@@ -1077,14 +1171,15 @@
 		/* and enable transmitter ready IRQ */
 		intr_mask.trdy = 1;
 		REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+#endif
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* Exit if non blocking */
 	if (file->f_flags & O_NONBLOCK) {
-		DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu  %08x\n",
-				  port->port_nbr, trunc_count,
-				  REG_RD_INT(dma, port->regi_dmaout, r_intr)));
+		DEBUGWRITE(pr_info("w d%d c %u  %08x\n",
+				   port->port_nbr, trunc_count,
+				   REG_RD_INT(dma, port->regi_dmaout, r_intr)));
 		return trunc_count;
 	}
 
@@ -1094,105 +1189,32 @@
 	if (signal_pending(current))
 		return -EINTR;
 
-	DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n",
-			  port->port_nbr, trunc_count));
+	DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count));
 	return trunc_count;
 }
 
-static ssize_t sync_serial_read(struct file * file, char * buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = iminor(file_inode(file));
-	int avail;
-	sync_port *port;
-	unsigned char* start;
-	unsigned char* end;
-	unsigned long flags;
-
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
-		return -ENODEV;
-	}
-	port = &ports[dev];
-
-	DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size));
-
-	if (!port->started)
-	{
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		tr_cfg.tr_en = regk_sser_yes;
-		rec_cfg.rec_en = regk_sser_yes;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
-
-	/* Calculate number of available bytes */
-	/* Save pointers to avoid that they are modified by interrupt */
-	spin_lock_irqsave(&port->lock, flags);
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	spin_unlock_irqrestore(&port->lock, flags);
-	while ((start == end) && !port->full) /* No data */
-	{
-		DEBUGREAD(printk(KERN_DEBUG "&"));
-		if (file->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		wait_event_interruptible(port->in_wait_q,
-					 !(start == end && !port->full));
-		if (signal_pending(current))
-			return -EINTR;
-
-		spin_lock_irqsave(&port->lock, flags);
-		start = (unsigned char*)port->readp; /* cast away volatile */
-		end = (unsigned char*)port->writep;  /* cast away volatile */
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	/* Lazy read, never return wrapped data. */
-	if (port->full)
-		avail = port->in_buffer_size;
-	else if (end > start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-
-	count = count > avail ? avail : count;
-	if (copy_to_user(buf, start, count))
-		return -EFAULT;
-	/* Disable interrupts while updating readp */
-	spin_lock_irqsave(&port->lock, flags);
-	port->readp += count;
-	if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
-		port->readp = port->flip;
-	port->full = 0;
-	spin_unlock_irqrestore(&port->lock, flags);
-	DEBUGREAD(printk("r %d\n", count));
-	return count;
+	return __sync_serial_read(file, buf, count, ppos, NULL);
 }
 
-static void send_word(sync_port* port)
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port)
 {
 	reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
 	reg_sser_rw_tr_data tr_data =  {0};
 
-	switch(tr_cfg.sample_size)
+	switch (tr_cfg.sample_size) {
+	case 8:
+		port->out_buf_count--;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 12:
 	{
-	 case 8:
-		 port->out_buf_count--;
-		 tr_data.data = *port->out_rd_ptr++;
-		 REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
-		 if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
-			 port->out_rd_ptr = port->out_buffer;
-		 break;
-	 case 12:
-	 {
 		int data = (*port->out_rd_ptr++) << 8;
 		data |= *port->out_rd_ptr++;
 		port->out_buf_count -= 2;
@@ -1200,8 +1222,8 @@
 		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
 		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
 			port->out_rd_ptr = port->out_buffer;
+		break;
 	}
-	break;
 	case 16:
 		port->out_buf_count -= 2;
 		tr_data.data = *(unsigned short *)port->out_rd_ptr;
@@ -1233,27 +1255,28 @@
 		break;
 	}
 }
+#endif
 
-static void start_dma_out(struct sync_port *port,
-			  const char *data, int count)
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count)
 {
-	port->active_tr_descr->buf = (char *) virt_to_phys((char *) data);
+	port->active_tr_descr->buf = (char *)virt_to_phys((char *)data);
 	port->active_tr_descr->after = port->active_tr_descr->buf + count;
 	port->active_tr_descr->intr = 1;
 
 	port->active_tr_descr->eol = 1;
 	port->prev_tr_descr->eol = 0;
 
-	DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n",
+	DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n",
 		port->prev_tr_descr, port->active_tr_descr));
 	port->prev_tr_descr = port->active_tr_descr;
-	port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next);
+	port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next);
 
 	if (!port->tr_running) {
 		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser,
 			rw_tr_cfg);
 
-		port->out_context.next = 0;
+		port->out_context.next = NULL;
 		port->out_context.saved_data =
 			(dma_descr_data *)virt_to_phys(port->prev_tr_descr);
 		port->out_context.saved_data_buf = port->prev_tr_descr->buf;
@@ -1263,57 +1286,58 @@
 
 		tr_cfg.tr_en = regk_sser_yes;
 		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma s\n"););
+		DEBUGTRDMA(pr_info(KERN_INFO "dma s\n"););
 	} else {
 		DMA_CONTINUE_DATA(port->regi_dmaout);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma c\n"););
+		DEBUGTRDMA(pr_info("dma c\n"););
 	}
 
 	port->tr_running = 1;
 }
 
-static void start_dma_in(sync_port *port)
+static void start_dma_in(struct sync_port *port)
 {
 	int i;
 	char *buf;
+	unsigned long flags;
+	spin_lock_irqsave(&port->lock, flags);
 	port->writep = port->flip;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	if (port->writep > port->flip + port->in_buffer_size) {
-		panic("Offset too large in sync serial driver\n");
-		return;
-	}
-	buf = (char*)virt_to_phys(port->in_buffer);
+	buf = (char *)virt_to_phys(port->in_buffer);
 	for (i = 0; i < NBR_IN_DESCR; i++) {
 		port->in_descr[i].buf = buf;
 		port->in_descr[i].after = buf + port->inbufchunk;
 		port->in_descr[i].intr = 1;
-		port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]);
+		port->in_descr[i].next =
+			(dma_descr_data *)virt_to_phys(&port->in_descr[i+1]);
 		port->in_descr[i].buf = buf;
 		buf += port->inbufchunk;
 	}
 	/* Link the last descriptor to the first */
-	port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_descr[i-1].next =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_descr[i-1].eol = regk_sser_yes;
 	port->next_rx_desc = &port->in_descr[0];
 	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1];
-	port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_context.saved_data =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_context.saved_data_buf = port->in_descr[0].buf;
 	DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context));
 }
 
-#ifdef SYNC_SER_DMA
 static irqreturn_t tr_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
 	reg_dma_rw_stat stat;
 	int i;
 	int found = 0;
 	int stop_sser = 0;
 
 	for (i = 0; i < NBR_PORTS; i++) {
-		sync_port *port = &ports[i];
-		if (!port->enabled  || !port->use_dma)
+		struct sync_port *port = &ports[i];
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		/* IRQ active for the port? */
@@ -1338,19 +1362,20 @@
 			int sent;
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t"
-					  "in descr %p (ac: %p)\n",
-					  port->out_buf_count, sent,
-					  port->out_buf_count - sent,
-					  port->catch_tr_descr,
-					  port->active_tr_descr););
+			DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t"
+					   "in descr %p (ac: %p)\n",
+					   port->out_buf_count, sent,
+					   port->out_buf_count - sent,
+					   port->catch_tr_descr,
+					   port->active_tr_descr););
 			port->out_buf_count -= sent;
 			port->catch_tr_descr =
 				phys_to_virt((int) port->catch_tr_descr->next);
 			port->out_rd_ptr =
 				phys_to_virt((int) port->catch_tr_descr->buf);
 		} else {
-			int i, sent;
+			reg_sser_rw_tr_cfg tr_cfg;
+			int j, sent;
 			/* EOL handler.
 			 * Note that if an EOL was encountered during the irq
 			 * locked section of sync_ser_write the DMA will be
@@ -1358,11 +1383,11 @@
 			 * The remaining descriptors will be traversed by
 			 * the descriptor interrupts as usual.
 			 */
-			i = 0;
+			j = 0;
 			while (!port->catch_tr_descr->eol) {
 				sent = port->catch_tr_descr->after -
 					port->catch_tr_descr->buf;
-				DEBUGOUTBUF(printk(KERN_DEBUG
+				DEBUGOUTBUF(pr_info(
 					"traversing descr %p -%d (%d)\n",
 					port->catch_tr_descr,
 					sent,
@@ -1370,16 +1395,15 @@
 				port->out_buf_count -= sent;
 				port->catch_tr_descr = phys_to_virt(
 					(int)port->catch_tr_descr->next);
-				i++;
-				if (i >= NBR_OUT_DESCR) {
+				j++;
+				if (j >= NBR_OUT_DESCR) {
 					/* TODO: Reset and recover */
 					panic("sync_serial: missing eol");
 				}
 			}
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGOUTBUF(printk(KERN_DEBUG
-				"eol at descr %p -%d (%d)\n",
+			DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n",
 				port->catch_tr_descr,
 				sent,
 				port->out_buf_count));
@@ -1394,15 +1418,13 @@
 					OUT_BUFFER_SIZE)
 				port->out_rd_ptr = port->out_buffer;
 
-			reg_sser_rw_tr_cfg tr_cfg =
-				REG_RD(sser, port->regi_sser, rw_tr_cfg);
-			DEBUGTXINT(printk(KERN_DEBUG
+			tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+			DEBUGTXINT(pr_info(
 				"tr_int DMA stop %d, set catch @ %p\n",
 				port->out_buf_count,
 				port->active_tr_descr));
 			if (port->out_buf_count != 0)
-				printk(KERN_CRIT "sync_ser: buffer not "
-					"empty after eol.\n");
+				pr_err("sync_ser: buf not empty after eol\n");
 			port->catch_tr_descr = port->active_tr_descr;
 			port->tr_running = 0;
 			tr_cfg.tr_en = regk_sser_no;
@@ -1414,62 +1436,79 @@
 	return IRQ_RETVAL(found);
 } /* tr_interrupt */
 
+
+static inline void handle_rx_packet(struct sync_port *port)
+{
+	int idx;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	unsigned long flags;
+
+	DEBUGRXINT(pr_info(KERN_INFO "!"));
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* If we overrun the user experience is crap regardless if we
+	 * drop new or old data. Its much easier to get it right when
+	 * dropping new data so lets do that.
+	 */
+	if ((port->writep + port->inbufchunk <=
+	     port->flip + port->in_buffer_size) &&
+	    (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) {
+		memcpy(port->writep,
+		       phys_to_virt((unsigned)port->next_rx_desc->buf),
+		       port->inbufchunk);
+		port->writep += port->inbufchunk;
+		if (port->writep >= port->flip + port->in_buffer_size)
+			port->writep = port->flip;
+
+		/* Timestamp the new data chunk. */
+		if (port->write_ts_idx == NBR_IN_DESCR)
+			port->write_ts_idx = 0;
+		idx = port->write_ts_idx++;
+		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		port->in_buffer_len += port->inbufchunk;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	port->next_rx_desc->eol = 1;
+	port->prev_rx_desc->eol = 0;
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 0);
+	port->prev_rx_desc = port->next_rx_desc;
+	port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 1);
+	/* wake up the waiting process */
+	wake_up_interruptible(&port->in_wait_q);
+	DMA_CONTINUE(port->regi_dmain);
+	REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+
+}
+
 static irqreturn_t rx_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
 
 	int i;
 	int found = 0;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	DEBUG(pr_info("rx_interrupt\n"));
 
-		if (!port->enabled || !port->use_dma )
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		masked = REG_RD(dma, port->regi_dmain, r_masked_intr);
 
-		if (masked.data) /* Descriptor interrupt */
-		{
-			found = 1;
-			while (REG_RD(dma, port->regi_dmain, rw_data) !=
-			       virt_to_phys(port->next_rx_desc)) {
-				DEBUGRXINT(printk(KERN_DEBUG "!"));
-				if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) {
-					int first_size = port->flip + port->in_buffer_size - port->writep;
-					memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size);
-					memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size);
-					port->writep = port->flip + port->inbufchunk - first_size;
-				} else {
-					memcpy((char*)port->writep,
-					       phys_to_virt((unsigned)port->next_rx_desc->buf),
-					       port->inbufchunk);
-					port->writep += port->inbufchunk;
-					if (port->writep >= port->flip + port->in_buffer_size)
-						port->writep = port->flip;
-				}
-                                if (port->writep == port->readp)
-                                {
-				  port->full = 1;
-                                }
+		if (!masked.data)
+			continue;
 
-				port->next_rx_desc->eol = 1;
-				port->prev_rx_desc->eol = 0;
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 0);
-				port->prev_rx_desc = port->next_rx_desc;
-				port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 1);
-				/* wake up the waiting process */
-				wake_up_interruptible(&port->in_wait_q);
-				DMA_CONTINUE(port->regi_dmain);
-				REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
-
-			}
-		}
+		/* Descriptor interrupt */
+		found = 1;
+		while (REG_RD(dma, port->regi_dmain, rw_data) !=
+				virt_to_phys(port->next_rx_desc))
+			handle_rx_packet(port);
 	}
 	return IRQ_RETVAL(found);
 } /* rx_interrupt */
@@ -1478,75 +1517,83 @@
 #ifdef SYNC_SER_MANUAL
 static irqreturn_t manual_interrupt(int irq, void *dev_id)
 {
+	unsigned long flags;
 	int i;
 	int found = 0;
 	reg_sser_r_masked_intr masked;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
 		if (!port->enabled || port->use_dma)
-		{
 			continue;
-		}
 
 		masked = REG_RD(sser, port->regi_sser, r_masked_intr);
-		if (masked.rdav)	/* Data received? */
-		{
-			reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-			reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data);
+		/* Data received? */
+		if (masked.rdav) {
+			reg_sser_rw_rec_cfg rec_cfg =
+				REG_RD(sser, port->regi_sser, rw_rec_cfg);
+			reg_sser_r_rec_data data = REG_RD(sser,
+				port->regi_sser, r_rec_data);
 			found = 1;
 			/* Read data */
-			switch(rec_cfg.sample_size)
-			{
+			spin_lock_irqsave(&port->lock, flags);
+			switch (rec_cfg.sample_size) {
 			case 8:
 				*port->writep++ = data.data & 0xff;
 				break;
 			case 12:
 				*port->writep = (data.data & 0x0ff0) >> 4;
 				*(port->writep + 1) = data.data & 0x0f;
-				port->writep+=2;
+				port->writep += 2;
 				break;
 			case 16:
-				*(unsigned short*)port->writep = data.data;
-				port->writep+=2;
+				*(unsigned short *)port->writep = data.data;
+				port->writep += 2;
 				break;
 			case 24:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=3;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 3;
 				break;
 			case 32:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=4;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 4;
 				break;
 			}
 
-			if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */
+			/* Wrap? */
+			if (port->writep >= port->flip + port->in_buffer_size)
 				port->writep = port->flip;
 			if (port->writep == port->readp) {
-				/* receive buffer overrun, discard oldest data
-				 */
+				/* Receive buf overrun, discard oldest data */
 				port->readp++;
-				if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
+				/* Wrap? */
+				if (port->readp >= port->flip +
+						port->in_buffer_size)
 					port->readp = port->flip;
 			}
+			spin_unlock_irqrestore(&port->lock, flags);
 			if (sync_data_avail(port) >= port->inbufchunk)
-				wake_up_interruptible(&port->in_wait_q); /* Wake up application */
+				/* Wake up application */
+				wake_up_interruptible(&port->in_wait_q);
 		}
 
-		if (masked.trdy) /* Transmitter ready? */
-		{
+		/* Transmitter ready? */
+		if (masked.trdy) {
 			found = 1;
-			if (port->out_buf_count > 0) /* More data to send */
+			/* More data to send */
+			if (port->out_buf_count > 0)
 				send_word(port);
-			else /* transmission finished */
-			{
+			else {
+				/* Transmission finished */
 				reg_sser_rw_intr_mask intr_mask;
-				intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+				intr_mask = REG_RD(sser, port->regi_sser,
+					rw_intr_mask);
 				intr_mask.trdy = 0;
-				REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
-				wake_up_interruptible(&port->out_wait_q); /* Wake up application */
+				REG_WR(sser, port->regi_sser,
+					rw_intr_mask, intr_mask);
+				/* Wake up application */
+				wake_up_interruptible(&port->out_wait_q);
 			}
 		}
 	}
@@ -1554,4 +1601,109 @@
 }
 #endif
 
+static int __init etrax_sync_serial_init(void)
+{
+#if 1
+	/* This code will be removed when we move to udev for all devices. */
+	syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0);
+	if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR);
+		return -1;
+	}
+#else
+	/* Allocate dynamic major number. */
+	if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to allocate character device region\n");
+		return -1;
+	}
+#endif
+	syncser_cdev = cdev_alloc();
+	if (!syncser_cdev) {
+		pr_err("Failed to allocate cdev for syncser\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+	cdev_init(syncser_cdev, &syncser_fops);
+
+	/* Create a sysfs class for syncser */
+	syncser_class = class_create(THIS_MODULE, "syncser_class");
+
+	/* Initialize Ports */
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 0\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -EIO;
+	}
+	initialize_port(0);
+	ports[0].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 1\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		class_destroy(syncser_class);
+		return -EIO;
+	}
+	initialize_port(1);
+	ports[1].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+	/* Add it to system */
+	if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) {
+		pr_err("Failed to add syncser as char device\n");
+		device_destroy(syncser_class, syncser_first);
+		class_destroy(syncser_class);
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+
+
+	pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n",
+		SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first));
+
+	return 0;
+}
+
+static void __exit etrax_sync_serial_exit(void)
+{
+	int i;
+	device_destroy(syncser_class, syncser_first);
+	class_destroy(syncser_class);
+
+	if (syncser_cdev) {
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+	}
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (port->init_irqs == dma_irq_setup) {
+			/* Free dma irqs and dma channels. */
+#ifdef SYNC_SER_DMA
+			artpec_free_dma(port->dma_in_nbr);
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+#endif
+		} else if (port->init_irqs == manual_irq_setup) {
+			/* Free manual irq. */
+			free_irq(port->syncser_intr_vect, port);
+		}
+	}
+
+	pr_info("ARTPEC synchronous serial port unregistered\n");
+}
+
 module_init(etrax_sync_serial_init);
+module_exit(etrax_sync_serial_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
index 610909b..02e33eb 100644
--- a/arch/cris/arch-v32/kernel/debugport.c
+++ b/arch/cris/arch-v32/kernel/debugport.c
@@ -3,7 +3,9 @@
  */
 
 #include <linux/console.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/reg_map.h>
 #include <hwregs/ser_defs.h>
@@ -65,6 +67,7 @@
   },
 #endif
 };
+
 static struct dbg_port *port =
 #if defined(CONFIG_ETRAX_DEBUG_PORT0)
 	&ports[0];
@@ -97,14 +100,19 @@
 #endif
 #endif
 
-static void
-start_port(struct dbg_port* p)
+static void start_port(struct dbg_port *p)
 {
-	if (!p)
+	/* Set up serial port registers */
+	reg_ser_rw_tr_ctrl tr_ctrl = {0};
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
+
+	reg_ser_rw_rec_ctrl rec_ctrl = {0};
+	reg_ser_rw_tr_baud_div tr_baud_div = {0};
+	reg_ser_rw_rec_baud_div rec_baud_div = {0};
+
+	if (!p || p->started)
 		return;
 
-	if (p->started)
-		return;
 	p->started = 1;
 
 	if (p->nbr == 1)
@@ -118,36 +126,24 @@
 		crisv32_pinmux_alloc_fixed(pinmux_ser4);
 #endif
 
-	/* Set up serial port registers */
-	reg_ser_rw_tr_ctrl tr_ctrl = {0};
-	reg_ser_rw_tr_dma_en tr_dma_en = {0};
-
-	reg_ser_rw_rec_ctrl rec_ctrl = {0};
-	reg_ser_rw_tr_baud_div tr_baud_div = {0};
-	reg_ser_rw_rec_baud_div rec_baud_div = {0};
-
 	tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493;
 	tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no;
 	tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8;
 	tr_ctrl.en = rec_ctrl.en = 1;
 
-	if (p->parity == 'O')
-	{
+	if (p->parity == 'O') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_odd;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
-	}
-	else if (p->parity == 'E')
-	{
+	} else if (p->parity == 'E') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_even;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
 	}
 
-	if (p->bits == 7)
-	{
+	if (p->bits == 7) {
 		tr_ctrl.data_bits = regk_ser_bits7;
 		rec_ctrl.data_bits = regk_ser_bits7;
 	}
@@ -161,8 +157,7 @@
 
 #ifdef CONFIG_ETRAX_KGDB
 /* Use polling to get a single character from the kernel debug port */
-int
-getDebugChar(void)
+int getDebugChar(void)
 {
 	reg_ser_rs_stat_din stat;
 	reg_ser_rw_ack_intr ack_intr = { 0 };
@@ -179,8 +174,7 @@
 }
 
 /* Use polling to put a single character to the kernel debug port */
-void
-putDebugChar(int val)
+void putDebugChar(int val)
 {
 	reg_ser_r_stat_din stat;
 	do {
@@ -190,12 +184,48 @@
 }
 #endif /* CONFIG_ETRAX_KGDB */
 
+static void __init early_putch(int c)
+{
+	reg_ser_r_stat_din stat;
+	/* Wait until transmitter is ready and send. */
+	do
+		stat = REG_RD(ser, port->instance, r_stat_din);
+	while (!stat.tr_rdy);
+	REG_WR_INT(ser, port->instance, rw_dout, c);
+}
+
+static void __init
+early_console_write(struct console *con, const char *s, unsigned n)
+{
+	extern void reset_watchdog(void);
+	int i;
+
+	/* Send data. */
+	for (i = 0; i < n; i++) {
+		/* TODO: the '\n' -> '\n\r' translation should be done at the
+		   receiver. Remove it when the serial driver removes it.   */
+		if (s[i] == '\n')
+			early_putch('\r');
+		early_putch(s[i]);
+		reset_watchdog();
+	}
+}
+
+static struct console early_console_dev __initdata = {
+	.name   = "early",
+	.write  = early_console_write,
+	.flags  = CON_PRINTBUFFER | CON_BOOT,
+	.index  = -1
+};
+
 /* Register console for printk's, etc. */
-int __init
-init_etrax_debug(void)
+int __init init_etrax_debug(void)
 {
         start_port(port);
 
+	/* Register an early console if a debug port was chosen.  */
+	register_console(&early_console_dev);
+
 #ifdef CONFIG_ETRAX_KGDB
 	start_port(kgdb_port);
 #endif /* CONFIG_ETRAX_KGDB */
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index ee66866..eb74dab 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
 #include <asm/io.h>
@@ -56,7 +57,6 @@
 }
 arch_initcall(etrax_init_cont_rotime);
 
-
 unsigned long timer_regs[NR_CPUS] =
 {
 	regi_timer0,
@@ -68,9 +68,8 @@
 extern int set_rtc_mmss(unsigned long nowtime);
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data);
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data);
 
 static struct notifier_block cris_time_freq_notifier_block = {
 	.notifier_call = cris_time_freq_notifier,
@@ -87,7 +86,6 @@
 	return ns;
 }
 
-
 /* From timer MDS describing the hardware watchdog:
  * 4.3.1 Watchdog Operation
  * The watchdog timer is an 8-bit timer with a configurable start value.
@@ -109,11 +107,18 @@
  * is used though, so set this really low. */
 #define WATCHDOG_MIN_FREE_PAGES 8
 
+/* for reliable NICE_DOGGY behaviour */
+static int bite_in_progress;
+
 void reset_watchdog(void)
 {
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
 
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	if (unlikely(bite_in_progress))
+		return;
+#endif
 	/* Only keep watchdog happy as long as we have memory left! */
 	if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) {
 		/* Reset the watchdog with the inverse of the old key */
@@ -148,7 +153,9 @@
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	extern int cause_of_death;
 
+	nmi_enter();
 	oops_in_progress = 1;
+	bite_in_progress = 1;
 	printk(KERN_WARNING "Watchdog bite\n");
 
 	/* Check if forced restart or unexpected watchdog */
@@ -170,6 +177,7 @@
 	printk(KERN_WARNING "Oops: bitten by watchdog\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	printk("\n"); /* Flush mtdoops.  */
 #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
 #endif
@@ -202,7 +210,7 @@
 	/* Reset watchdog otherwise it resets us! */
 	reset_watchdog();
 
-        /* Update statistics. */
+	/* Update statistics. */
 	update_process_times(user_mode(regs));
 
 	cris_do_profile(regs); /* Save profiling information */
@@ -213,7 +221,7 @@
 
 	/* Call the real timer interrupt handler */
 	xtime_update(1);
-        return IRQ_HANDLED;
+	return IRQ_HANDLED;
 }
 
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
@@ -293,14 +301,13 @@
 
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_register_notifier(&cris_time_freq_notifier_block,
-		CPUFREQ_TRANSITION_NOTIFIER);
+				  CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 }
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data)
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct cpufreq_freqs *freqs = data;
 	if (val == CPUFREQ_POSTCHANGE) {
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index 0b5b70d..f0f335d 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -26,8 +26,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -155,13 +154,13 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
-
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -321,11 +320,10 @@
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -468,3 +466,4 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
index 38f29ee..05a0470 100644
--- a/arch/cris/arch-v32/mach-fs/pinmux.c
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c
@@ -26,44 +26,15 @@
 
 static void crisv32_pinmux_set(int port);
 
-int crisv32_pinmux_init(void)
-{
-	static int initialized;
-
-	if (!initialized) {
-		reg_pinmux_rw_pa pa = REG_RD(pinmux, regi_pinmux, rw_pa);
-		initialized = 1;
-		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
-		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
-		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
-		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
-		crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
-	}
-
-	return 0;
-}
-
-int
-crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
+static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+				 enum pin_mode mode)
 {
 	int i;
-	unsigned long flags;
-
-	crisv32_pinmux_init();
-
-	if (port > PORTS || port < 0)
-		return -EINVAL;
-
-	spin_lock_irqsave(&pinmux_lock, flags);
 
 	for (i = first_pin; i <= last_pin; i++) {
 		if ((pins[port][i] != pinmux_none)
 		    && (pins[port][i] != pinmux_gpio)
 		    && (pins[port][i] != mode)) {
-			spin_unlock_irqrestore(&pinmux_lock, flags);
 #ifdef DEBUG
 			panic("Pinmux alloc failed!\n");
 #endif
@@ -75,10 +46,46 @@
 		pins[port][i] = mode;
 
 	crisv32_pinmux_set(port);
+}
+
+static int crisv32_pinmux_init(void)
+{
+	static int initialized;
+
+	if (!initialized) {
+		reg_pinmux_rw_pa pa = REG_RD(pinmux, regi_pinmux, rw_pa);
+		initialized = 1;
+		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
+		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
+		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
+		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
+		__crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
+	}
+
+	return 0;
+}
+
+int crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+			 enum pin_mode mode)
+{
+	unsigned long flags;
+	int ret;
+
+	crisv32_pinmux_init();
+
+	if (port > PORTS || port < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode);
 
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 int crisv32_pinmux_alloc_fixed(enum fixed_function function)
@@ -98,58 +105,58 @@
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
 		hwprot.ser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
 		hwprot.ser2 = regk_pinmux_yes;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
 		hwprot.ser3 = regk_pinmux_yes;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.sser0 = regk_pinmux_yes;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
 		hwprot.sser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
 		hwprot.ata0 = regk_pinmux_yes;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
 		hwprot.ata1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
 		hwprot.ata = regk_pinmux_yes;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
 		hwprot.eth1 = regk_pinmux_yes;
 		hwprot.eth1_mgm = regk_pinmux_yes;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.timer = regk_pinmux_yes;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -188,9 +195,19 @@
 #endif
 }
 
-int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 {
 	int i;
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	return 0;
+}
+
+int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
 	unsigned long flags;
 
 	crisv32_pinmux_init();
@@ -199,11 +216,7 @@
 		return -EINVAL;
 
 	spin_lock_irqsave(&pinmux_lock, flags);
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = pinmux_none;
-
-	crisv32_pinmux_set(port);
+	__crisv32_pinmux_dealloc(port, first_pin, last_pin);
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
 	return 0;
@@ -226,58 +239,58 @@
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_dealloc(PORT_C, 4, 7);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7);
 		hwprot.ser1 = regk_pinmux_no;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 11);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11);
 		hwprot.ser2 = regk_pinmux_no;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 12, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15);
 		hwprot.ser3 = regk_pinmux_no;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_dealloc(PORT_C, 0, 3);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.sser0 = regk_pinmux_no;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
 		hwprot.sser1 = regk_pinmux_no;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_dealloc(PORT_D, 5, 7);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17);
 		hwprot.ata0 = regk_pinmux_no;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17);
 		hwprot.ata1 = regk_pinmux_no;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 11, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 10);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_dealloc(PORT_B, 0, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15);
 		hwprot.ata = regk_pinmux_no;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_dealloc(PORT_E, 0, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17);
 		hwprot.eth1 = regk_pinmux_no;
 		hwprot.eth1_mgm = regk_pinmux_no;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.timer = regk_pinmux_no;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -293,7 +306,8 @@
 	return ret;
 }
 
-void crisv32_pinmux_dump(void)
+#ifdef DEBUG
+static void crisv32_pinmux_dump(void)
 {
 	int i, j;
 
@@ -305,5 +319,5 @@
 			printk(KERN_DEBUG "  Pin %d = %d\n", j, pins[i][j]);
 	}
 }
-
+#endif
 __initcall(crisv32_pinmux_init);
diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
index c2b3036..09bf0c9 100644
--- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
+++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
@@ -28,11 +28,9 @@
   pinmux_timer
 };
 
-int crisv32_pinmux_init(void);
 int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode);
 int crisv32_pinmux_alloc_fixed(enum fixed_function function);
 int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin);
 int crisv32_pinmux_dealloc_fixed(enum fixed_function function);
-void crisv32_pinmux_dump(void);
 
 #endif
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d5f1248..889f2de 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,8 +1,4 @@
 
-header-y += arch-v10/
-header-y += arch-v32/
-
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 7d47b36..01f66b8 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,8 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += arch-v10/
-header-y += arch-v32/
+header-y += ../arch-v10/arch/
+header-y += ../arch-v32/arch/
 header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += byteorder.h
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 5868cee..3908b94 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -47,16 +47,16 @@
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Userspace access functions */
-EXPORT_SYMBOL(__copy_user_zeroing);
-EXPORT_SYMBOL(__copy_user);
-
 #undef memcpy
 #undef memset
 extern void * memset(void *, int, __kernel_size_t);
 extern void * memcpy(void *, const void *, __kernel_size_t);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
+#ifdef CONFIG_ETRAX_ARCH_V32
+#undef strcmp
+EXPORT_SYMBOL(strcmp);
+#endif
 
 #ifdef CONFIG_ETRAX_FAST_TIMER
 /* Fast timer functions */
@@ -66,3 +66,4 @@
 EXPORT_SYMBOL(schedule_usleep);
 #endif
 EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index 0ffda73..da4c724 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -14,6 +14,10 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/utsname.h>
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#endif
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -34,25 +38,24 @@
 
 void (*nmi_handler)(struct pt_regs *);
 
-void
-show_trace(unsigned long *stack)
+void show_trace(unsigned long *stack)
 {
 	unsigned long addr, module_start, module_end;
 	extern char _stext, _etext;
 	int i;
 
-	printk("\nCall Trace: ");
+	pr_err("\nCall Trace: ");
 
 	i = 1;
 	module_start = VMALLOC_START;
 	module_end = VMALLOC_END;
 
-	while (((long)stack & (THREAD_SIZE-1)) != 0) {
+	while (((long)stack & (THREAD_SIZE - 1)) != 0) {
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
@@ -68,10 +71,14 @@
 		if (((addr >= (unsigned long)&_stext) &&
 		     (addr <= (unsigned long)&_etext)) ||
 		    ((addr >= module_start) && (addr <= module_end))) {
+#ifdef CONFIG_KALLSYMS
+			print_ip_sym(addr);
+#else
 			if (i && ((i % 8) == 0))
-				printk("\n       ");
-			printk("[<%08lx>] ", addr);
+				pr_err("\n       ");
+			pr_err("[<%08lx>] ", addr);
 			i++;
+#endif
 		}
 	}
 }
@@ -111,21 +118,21 @@
 
 	stack = sp;
 
-	printk("\nStack from %08lx:\n       ", (unsigned long)stack);
+	pr_err("\nStack from %08lx:\n       ", (unsigned long)stack);
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (((long)stack & (THREAD_SIZE-1)) == 0)
 			break;
 		if (i && ((i % 8) == 0))
-			printk("\n       ");
+			pr_err("\n       ");
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
-		printk("%08lx ", addr);
+		pr_err("%08lx ", addr);
 	}
 	show_trace(sp);
 }
@@ -139,33 +146,32 @@
 	unsigned long *sp = (unsigned long *)rdusp();
 	int i;
 
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)sp);
+	pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp);
 	for (i = 0; i < 16; i++)
-		printk("sp + %d: 0x%08lx\n", i*4, sp[i]);
+		pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]);
 	return 0;
 }
 #endif
 
-void
-set_nmi_handler(void (*handler)(struct pt_regs *))
+void set_nmi_handler(void (*handler)(struct pt_regs *))
 {
 	nmi_handler = handler;
 	arch_enable_nmi();
 }
 
 #ifdef CONFIG_DEBUG_NMI_OOPS
-void
-oops_nmi_handler(struct pt_regs *regs)
+void oops_nmi_handler(struct pt_regs *regs)
 {
 	stop_watchdog();
 	oops_in_progress = 1;
-	printk("NMI!\n");
+	pr_err("NMI!\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	oops_exit();
+	pr_err("\n"); /* Flush mtdoops.  */
 }
 
-static int __init
-oops_nmi_register(void)
+static int __init oops_nmi_register(void)
 {
 	set_nmi_handler(oops_nmi_handler);
 	return 0;
@@ -180,8 +186,7 @@
  * similar to an Oops dump, and if the kernel is configured to be a nice
  * doggy, then halt instead of reboot.
  */
-void
-watchdog_bite_hook(struct pt_regs *regs)
+void watchdog_bite_hook(struct pt_regs *regs)
 {
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	local_irq_disable();
@@ -196,8 +201,7 @@
 }
 
 /* This is normally the Oops function. */
-void
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
+void die_if_kernel(const char *str, struct pt_regs *regs, long err)
 {
 	if (user_mode(regs))
 		return;
@@ -211,13 +215,17 @@
 	stop_watchdog();
 #endif
 
+	oops_enter();
 	handle_BUG(regs);
 
-	printk("%s: %04lx\n", str, err & 0xffff);
+	pr_err("Linux %s %s\n", utsname()->release, utsname()->version);
+	pr_err("%s: %04lx\n", str, err & 0xffff);
 
 	show_registers(regs);
 
+	oops_exit();
 	oops_in_progress = 0;
+	pr_err("\n"); /* Flush mtdoops.  */
 
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
@@ -225,8 +233,7 @@
 	do_exit(SIGSEGV);
 }
 
-void __init
-trap_init(void)
+void __init trap_init(void)
 {
 	/* Nothing needs to be done */
 }
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index c81af5b..1e7fd45 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -11,13 +11,15 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
 
 unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 
-void __init
-mem_init(void)
+void __init mem_init(void)
 {
 	BUG_ON(!mem_map);
 
@@ -31,10 +33,36 @@
 	mem_init_print_info(NULL);
 }
 
-/* free the pages occupied by initialization code */
+/* Free a range of init pages. Virtual addresses. */
 
-void 
-free_initmem(void)
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+/* Free the pages occupied by initialization code. */
+
+void free_initmem(void)
 {
 	free_initmem_default(-1);
 }
+
+/* Free the pages occupied by initrd code. */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory",
+	                start,
+	                end);
+}
+#endif
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index f9ca44b..80fdb99 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -76,10 +76,11 @@
  * Must be freed with iounmap.
  */
 
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
         return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0);
 }
+EXPORT_SYMBOL(ioremap_nocache);
 
 void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
index 2635117..69952c1 100644
--- a/arch/hexagon/include/asm/cache.h
+++ b/arch/hexagon/include/asm/cache.h
@@ -1,7 +1,7 @@
 /*
  * Cache definitions for the Hexagon architecture
  *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,8 @@
 #define L1_CACHE_SHIFT		(5)
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+
 #define __cacheline_aligned	__aligned(L1_CACHE_BYTES)
 #define ____cacheline_aligned	__aligned(L1_CACHE_BYTES)
 
diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index 49e0896..b86f9f3 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -21,10 +21,7 @@
 #ifndef _ASM_CACHEFLUSH_H
 #define _ASM_CACHEFLUSH_H
 
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-#include <asm-generic/cacheflush.h>
+#include <linux/mm_types.h>
 
 /* Cache flushing:
  *
@@ -41,6 +38,20 @@
 #define LINESIZE	32
 #define LINEBITS	5
 
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
 /*
  * Flush Dcache range through current map.
  */
@@ -49,7 +60,6 @@
 /*
  * Flush Icache range through current map.
  */
-#undef flush_icache_range
 extern void flush_icache_range(unsigned long start, unsigned long end);
 
 /*
@@ -79,19 +89,11 @@
 	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
 }
 
-#undef copy_to_user_page
-static inline void copy_to_user_page(struct vm_area_struct *vma,
-					     struct page *page,
-					     unsigned long vaddr,
-					     void *dst, void *src, int len)
-{
-	memcpy(dst, src, len);
-	if (vma->vm_flags & VM_EXEC) {
-		flush_icache_range((unsigned long) dst,
-		(unsigned long) dst + len);
-	}
-}
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len);
 
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
 
 extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
 extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 7029899..66f5e9a 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -24,14 +24,9 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/vmalloc.h>
-#include <asm/string.h>
-#include <asm/mem-layout.h>
 #include <asm/iomap.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 
 /*
  * We don't have PCI yet.
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 0e7c1db..6981949 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 7858663..110dab1 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -1,7 +1,7 @@
 /*
  * Kernel traps/events for Hexagon processor
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -423,7 +423,7 @@
 			 */
 			info.si_code = TRAP_BRKPT;
 			info.si_addr = (void __user *) pt_elr(regs);
-			send_sig_info(SIGTRAP, &info, current);
+			force_sig_info(SIGTRAP, &info, current);
 		} else {
 #ifdef CONFIG_KGDB
 			kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 44d8c47..5f268c1 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -1,7 +1,7 @@
 /*
  * Linker script for Hexagon kernel
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -59,7 +59,7 @@
 	INIT_DATA_SECTION(PAGE_SIZE)
 
 	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE)
+		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
 		RO_DATA_SECTION(PAGE_SIZE)
 	_edata = .;
 
diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c
index 0c76c80..a7c6d82 100644
--- a/arch/hexagon/mm/cache.c
+++ b/arch/hexagon/mm/cache.c
@@ -127,3 +127,13 @@
 	local_irq_restore(flags);
 	mb();
 }
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+		(unsigned long) dst + len);
+	}
+}
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
index 5905fd5..d27d672 100644
--- a/arch/hexagon/mm/ioremap.c
+++ b/arch/hexagon/mm/ioremap.c
@@ -20,6 +20,7 @@
 
 #include <linux/io.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 14aa1c5..0ec484d 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -35,8 +35,8 @@
 
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
- * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
- * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly
  * more efficient.
  */
 #define __ia64_per_cpu_var(var) (*({					\
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 0000000..d2f99ca
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING              0
+#define PNV_THREAD_NAP                  1
+#define PNV_THREAD_SLEEP                2
+#define PNV_THREAD_WINKLE               3
+#define PNV_CORE_IDLE_LOCK_BIT          0x100
+#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2f..eb95b67 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -56,6 +56,14 @@
 #define OPAL_HARDWARE_FROZEN	-13
 #define OPAL_WRONG_STATE	-14
 #define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
 
 /* API Tokens (in r0) */
 #define OPAL_INVALID_CALL			-1
@@ -152,12 +160,25 @@
 #define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
 #define OPAL_REGISTER_DUMP_REGION		101
 #define OPAL_UNREGISTER_DUMP_REGION		102
 #define OPAL_WRITE_TPO				103
 #define OPAL_READ_TPO				104
 #define OPAL_IPMI_SEND				107
 #define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED	0x00010000
+#define OPAL_PM_SLEEP_ENABLED	0x00020000
+#define OPAL_PM_WINKLE_ENABLED	0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
 
 #ifndef __ASSEMBLY__
 
@@ -712,6 +733,24 @@
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
 
@@ -876,11 +915,14 @@
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+			 struct opal_i2c_request *oreq);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386c..e5f22c6 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,16 @@
 	u64 tm_scratch;                 /* TM scratch area for reclaim */
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+	u32 *core_idle_state_ptr;
+	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+	/* Mask to indicate thread id in core */
+	u8 thread_mask;
+	/* Mask to denote subcore sibling threads */
+	u8 subcore_sibling_mask;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Exclusive emergency stack pointer for machine check exception. */
 	void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a52877..03cd858 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -194,6 +194,7 @@
 
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
+#define PPC_INST_WINKLE			0x4c0003e4
 
 /* A2 specific instructions */
 #define PPC_INST_ERATWE			0x7c0001a6
@@ -375,6 +376,7 @@
 
 #define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
 
 /* BHRB instructions */
 #define PPC_CLRBHRB		stringify_in_c(.long PPC_INST_CLRBHRB)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798..bf117d8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,8 @@
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279..1c874fb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,8 +118,10 @@
 #define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 #ifdef __BIG_ENDIAN__
 #define MSR_		__MSR
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV)
 #else
 #define MSR_		(__MSR | MSR_LE)
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV | MSR_LE)
 #endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
 #define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
 #define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
 #define SPRN_PPR	0x380	/* SMT Thread status Register */
+#define SPRN_TSCR	0x399	/* Thread Switch Control Register */
 
 #define SPRN_DEC	0x016		/* Decrement Register */
 #define SPRN_DER	0x095		/* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
 #define SPRN_BESCR	806	/* Branch event status and control register */
 #define   BESCR_GE	0x8000000000000000ULL /* Global Enable */
 #define SPRN_WORT	895	/* Workload optimization register - thread */
+#define SPRN_WORC	863	/* Workload optimization register - core */
 
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698..ff21b7a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+	arch |= __AUDIT_ARCH_LE;
+#endif
+	return arch;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43..a0c071d 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -284,7 +284,7 @@
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 #endif /* __powerpc64__ */
@@ -297,7 +297,7 @@
 	might_fault();							\
 	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
@@ -308,7 +308,7 @@
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 24d78e1..e624f96 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -726,5 +726,16 @@
 					arch.timing_last_enter.tv32.tbl));
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	DEFINE(PACA_CORE_IDLE_STATE_PTR,
+			offsetof(struct paca_struct, core_idle_state_ptr));
+	DEFINE(PACA_THREAD_IDLE_STATE,
+			offsetof(struct paca_struct, thread_idle_state));
+	DEFINE(PACA_THREAD_MASK,
+			offsetof(struct paca_struct, thread_mask));
+	DEFINE(PACA_SUBCORE_SIBLING_MASK,
+			offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382..c2df815 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
 #include <asm/hw_irq.h>
 #include <asm/exception-64s.h>
 #include <asm/ptrace.h>
+#include <asm/cpuidle.h>
 
 /*
  * We layout physical memory as follows:
@@ -101,23 +102,34 @@
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
 	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap. We only handle no state loss and
-	 * supervisor state loss. We do -not- handle hypervisor
-	 * state loss at this time.
+	 * waking up from nap/sleep/winkle.
 	 */
 	mfspr	r13,SPRN_SRR1
 	rlwinm.	r13,r13,47-31,30,31
 	beq	9f
 
-	/* waking up from powersave (nap) state */
-	cmpwi	cr1,r13,2
-	/* Total loss of HV state is fatal, we could try to use the
-	 * PIR to locate a PACA, then use an emergency stack etc...
-	 * OPAL v3 based powernv platforms have new idle states
-	 * which fall in this catagory.
+	cmpwi	cr3,r13,2
+
+	/*
+	 * Check if last bit of HSPGR0 is set. This indicates whether we are
+	 * waking up from winkle.
 	 */
-	bgt	cr1,8f
 	GET_PACA(r13)
+	clrldi	r5,r13,63
+	clrrdi	r13,r13,1
+	cmpwi	cr4,r5,1
+	mtspr	SPRN_HSPRG0,r13
+
+	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi   cr2,r0,PNV_THREAD_NAP
+	bgt     cr2,8f				/* Either sleep or Winkle */
+
+	/* Waking up from nap should not cause hypervisor state loss */
+	bgt	cr3,.
+
+	/* Waking up from nap */
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@
 
 	/* Return SRR1 from power7_nap() */
 	mfspr	r3,SPRN_SRR1
-	beq	cr1,2f
+	beq	cr3,2f
 	b	power7_wakeup_noloss
 2:	b	power7_wakeup_loss
 
@@ -1382,6 +1394,7 @@
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
+	li	r3,PNV_THREAD_NAP
 	b	power7_enter_nap_mode
 4:
 #endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687..05adc8b 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,9 +18,25 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
 
 #undef DEBUG
 
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1	GPR3
+#define _RPR	GPR4
+#define _SPURR	GPR5
+#define _PURR	GPR6
+#define _TSCR	GPR7
+#define _DSCR	GPR8
+#define _AMOR	GPR9
+#define _WORT	GPR10
+#define _WORC	GPR11
+
 /* Idle state entry routines */
 
 #define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
@@ -37,8 +53,7 @@
 
 /*
  * Pass requested state in r3:
- * 	0 - nap
- * 	1 - sleep
+ *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
  * To check IRQ_HAPPENED in r4
  * 	0 - don't check
@@ -101,18 +116,105 @@
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-_GLOBAL(power7_enter_nap_mode)
+	/*
+	 * Go to real mode to do the nap, as required by the architecture.
+	 * Also, we need to be in real mode before setting hwthread_state,
+	 * because as soon as we do that, another thread can switch
+	 * the MMU context to the guest.
+	 */
+	LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+	li	r6, MSR_RI
+	andc	r6, r9, r6
+	LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+	mtmsrd	r6, 1		/* clear RI before setting SRR0/1 */
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r5
+	rfid
+
+	.globl	power7_enter_nap_mode
+power7_enter_nap_mode:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're napping */
 	li	r4,KVM_HWTHREAD_IN_NAP
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
-	cmpwi	cr0,r3,1
-	beq	2f
+	stb	r3,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	cr3,r3,PNV_THREAD_SLEEP
+	bge	cr3,2f
 	IDLE_STATE_ENTER_SEQ(PPC_NAP)
 	/* No return */
-2:	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
-	/* No return */
+2:
+	/* Sleep or winkle */
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+	lwarx	r15,0,r14
+	andc	r15,r15,r7			/* Clear thread bit */
+
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+	beq	fastsleep_workaround_at_entry
+
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+	bgt	cr3,enter_winkle
+	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+	/* Fast sleep workaround */
+	li	r3,1
+	li	r4,1
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+
+	/* Clear Lock bit */
+	li	r0,0
+	lwsync
+	stw	r0,0(r14)
+	b	common_enter
+
+enter_winkle:
+	/*
+	 * Note all register i.e per-core, per-subcore or per-thread is saved
+	 * here since any thread in the core might wake up first
+	 */
+	mfspr	r3,SPRN_SDR1
+	std	r3,_SDR1(r1)
+	mfspr	r3,SPRN_RPR
+	std	r3,_RPR(r1)
+	mfspr	r3,SPRN_SPURR
+	std	r3,_SPURR(r1)
+	mfspr	r3,SPRN_PURR
+	std	r3,_PURR(r1)
+	mfspr	r3,SPRN_TSCR
+	std	r3,_TSCR(r1)
+	mfspr	r3,SPRN_DSCR
+	std	r3,_DSCR(r1)
+	mfspr	r3,SPRN_AMOR
+	std	r3,_AMOR(r1)
+	mfspr	r3,SPRN_WORT
+	std	r3,_WORT(r1)
+	mfspr	r3,SPRN_WORC
+	std	r3,_WORC(r1)
+	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 
 _GLOBAL(power7_idle)
 	/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@
 
 _GLOBAL(power7_nap)
 	mr	r4,r3
-	li	r3,0
+	li	r3,PNV_THREAD_NAP
 	b	power7_powersave_common
 	/* No return */
 
 _GLOBAL(power7_sleep)
-	li	r3,1
+	li	r3,PNV_THREAD_SLEEP
 	li	r4,1
 	b	power7_powersave_common
 	/* No return */
 
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1  - stack pointer
- * r3  - opal token
- */
-opal_call_realmode:
-	mflr	r12
-	std	r12,_LINK(r1)
-	ld	r2,PACATOC(r13)
-	/* Set opal return address */
-	LOAD_REG_ADDR(r0,return_from_opal_call)
-	mtlr	r0
-	/* Handle endian-ness */
-	li	r0,MSR_LE
-	mfmsr	r12
-	andc	r12,r12,r0
-	mtspr	SPRN_HSRR1,r12
-	mr	r0,r3			/* Move opal token to r0 */
-	LOAD_REG_ADDR(r11,opal)
-	ld	r12,8(r11)
-	ld	r2,0(r11)
-	mtspr	SPRN_HSRR0,r12
-	hrfid
-
-return_from_opal_call:
-	FIXUP_ENDIAN
-	ld	r0,_LINK(r1)
-	mtlr	r0
-	blr
+_GLOBAL(power7_winkle)
+	li	r3,3
+	li	r4,1
+	b	power7_powersave_common
+	/* No return */
 
 #define CHECK_HMI_INTERRUPT						\
 	mfspr	r0,SPRN_SRR1;						\
@@ -181,7 +256,7 @@
 	ld	r2,PACATOC(r13);					\
 	ld	r1,PACAR1(r13);						\
 	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
-	li	r3,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
+	li	r0,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
 	bl	opal_call_realmode;					\
 	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
 20:	nop;
@@ -190,16 +265,190 @@
 _GLOBAL(power7_wakeup_tb_loss)
 	ld	r2,PACATOC(r13);
 	ld	r1,PACAR1(r13)
+	/*
+	 * Before entering any idle state, the NVGPRs are saved in the stack
+	 * and they are restored before switching to the process context. Hence
+	 * until they are restored, they are free to be used.
+	 *
+	 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+	 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+	 * wakeup reason if we branch to kvm_start_guest.
+	 */
 
+	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	/* Time base re-sync */
-	li	r3,OPAL_RESYNC_TIMEBASE
-	bl	opal_call_realmode;
 
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+	lwarx	r15,0,r14
+	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	/*
+	 * Lock bit is set in one of the 2 cases-
+	 * a. In the sleep/winkle enter path, the last thread is executing
+	 * fastsleep workaround code.
+	 * b. In the wake up path, another thread is executing fastsleep
+	 * workaround undo code or resyncing timebase or restoring context
+	 * In either case loop until the lock bit is cleared.
+	 */
+	bne	core_idle_lock_held
+
+	cmpwi	cr2,r15,0
+	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
+	and	r4,r4,r15
+	cmpwi	cr1,r4,0	/* Check if first in subcore */
+
+	/*
+	 * At this stage
+	 * cr1 - 0b0100 if first thread to wakeup in subcore
+	 * cr2 - 0b0100 if first thread to wakeup in core
+	 * cr3-  0b0010 if waking up from sleep or winkle
+	 * cr4 - 0b0100 if waking up from winkle
+	 */
+
+	or	r15,r15,r7		/* Set thread bit */
+
+	beq	cr1,first_thread_in_subcore
+
+	/* Not first thread in subcore to wake up */
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+	b	common_exit
+
+core_idle_lock_held:
+	HMT_LOW
+core_idle_lock_loop:
+	lwz	r15,0(14)
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	core_idle_lock_loop
+	HMT_MEDIUM
+	b	lwarx_loop2
+
+first_thread_in_subcore:
+	/* First thread in subcore to wakeup */
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+
+	/*
+	 * If waking up from sleep, subcore state is not lost. Hence
+	 * skip subcore state restore
+	 */
+	bne	cr4,subcore_state_restored
+
+	/* Restore per-subcore state */
+	ld      r4,_SDR1(r1)
+	mtspr   SPRN_SDR1,r4
+	ld      r4,_RPR(r1)
+	mtspr   SPRN_RPR,r4
+	ld	r4,_AMOR(r1)
+	mtspr	SPRN_AMOR,r4
+
+subcore_state_restored:
+	/*
+	 * Check if the thread is also the first thread in the core. If not,
+	 * skip to clear_lock.
+	 */
+	bne	cr2,clear_lock
+
+first_thread_in_core:
+
+	/*
+	 * First thread in the core waking up from fastsleep. It needs to
+	 * call the fastsleep workaround code if the platform requires it.
+	 * Call it unconditionally here. The below branch instruction will
+	 * be patched out when the idle states are discovered if platform
+	 * does not require workaround.
+	 */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+	b	fastsleep_workaround_at_exit
+
+timebase_resync:
+	/* Do timebase resync if we are waking up from sleep. Use cr3 value
+	 * set in exceptions-64s.S */
+	ble	cr3,clear_lock
+	/* Time base re-sync */
+	li	r0,OPAL_RESYNC_TIMEBASE
+	bl	opal_call_realmode;
 	/* TODO: Check r3 for failure */
 
+	/*
+	 * If waking up from sleep, per core state is not lost, skip to
+	 * clear_lock.
+	 */
+	bne	cr4,clear_lock
+
+	/* Restore per core state */
+	ld	r4,_TSCR(r1)
+	mtspr	SPRN_TSCR,r4
+	ld	r4,_WORC(r1)
+	mtspr	SPRN_WORC,r4
+
+clear_lock:
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	lwsync
+	stw	r15,0(r14)
+
+common_exit:
+	/*
+	 * Common to all threads.
+	 *
+	 * If waking up from sleep, hypervisor state is not lost. Hence
+	 * skip hypervisor state restore.
+	 */
+	bne	cr4,hypervisor_state_restored
+
+	/* Waking up from winkle */
+
+	/* Restore per thread state */
+	bl	__restore_cpu_power8
+
+	/* Restore SLB  from PACA */
+	ld	r8,PACA_SLBSHADOWPTR(r13)
+
+	.rept	SLB_NUM_BOLTED
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
+	andis.	r7,r5,SLB_ESID_V@h
+	beq	1f
+	slbmte	r6,r5
+1:	addi	r8,r8,16
+	.endr
+
+	ld	r4,_SPURR(r1)
+	mtspr	SPRN_SPURR,r4
+	ld	r4,_PURR(r1)
+	mtspr	SPRN_PURR,r4
+	ld	r4,_DSCR(r1)
+	mtspr	SPRN_DSCR,r4
+	ld	r4,_WORT(r1)
+	mtspr	SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+	li	r5,PNV_THREAD_RUNNING
+	stb     r5,PACA_THREAD_IDLE_STATE(r13)
+
+	mtspr	SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li      r0,KVM_HWTHREAD_IN_KERNEL
+	stb     r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz     r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi   r0,0
+	beq     6f
+	b       kvm_start_guest
+6:
+#endif
+
 	REST_NVGPRS(r1)
 	REST_GPR(2, r1)
 	ld	r3,_CCR(r1)
@@ -212,6 +461,13 @@
 	mtspr	SPRN_SRR0,r5
 	rfid
 
+fastsleep_workaround_at_exit:
+	li	r3,1
+	li	r4,0
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+	b	timebase_resync
+
 /*
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc..8ec017c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
-	cpu_callin_map[cpu] = 1;
 
 	if (smp_ops->setup_cpu)
 		smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
+	/*
+	 * CPU must be marked active and online before we signal back to the
+	 * master, because the scheduler needs to see the cpu_online and
+	 * cpu_active bits set.
+	 */
+	smp_wmb();
+	cpu_callin_map[cpu] = 1;
+
 	local_irq_enable();
 
 	cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba3408..f162d0b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -177,7 +177,7 @@
 	}							\
 	ret = sprintf(buf, _fmt, _expr);			\
 e_free:								\
-	kfree(page);						\
+	kmem_cache_free(hv_page_cache, page);			\
 	return ret;						\
 }								\
 static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@
 		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
 }
 
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
 static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 					 u16 lpar, u64 *res,
 					 bool success_expected)
 {
-	unsigned long ret = -ENOMEM;
+	unsigned long ret;
 
 	/*
 	 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
 
-	request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!request_buffer)
-		goto out;
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!result_buffer)
-		goto out_free_request_buffer;
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
 
 	*request_buffer = (struct reqb) {
 		.buf = {
@@ -278,15 +279,11 @@
 				domain, offset, ix, lpar, ret, ret,
 				result_buffer->buf.detailed_rc,
 				result_buffer->buf.failing_request_ix);
-		goto out_free_result_buffer;
+		goto out;
 	}
 
 	*res = be64_to_cpu(result_buffer->result);
 
-out_free_result_buffer:
-	kfree(result_buffer);
-out_free_request_buffer:
-	kfree(request_buffer);
 out:
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be..54eca8b 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@
 	blr
 #endif
 
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r0  - opal token
+ */
+_GLOBAL(opal_call_realmode)
+	mflr	r12
+	std	r12,PPC_LR_STKOFF(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r12,return_from_opal_call)
+	mtlr	r12
+
+	mfmsr	r12
+#ifdef __LITTLE_ENDIAN__
+	/* Handle endian-ness */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+#endif
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+	FIXUP_ENDIAN
+#endif
+	ld	r12,PPC_LR_STKOFF(r1)
+	mtlr	r12
+	blr
+
 OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@
 OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
 OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
 OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de..f10b9ec 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define pr_fmt(fmt)	"opal: " fmt
 
+#include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -625,6 +626,39 @@
 	return 0;
 }
 
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+	const __be64 *syms;
+	unsigned int size;
+	struct device_node *fw;
+	int rc;
+
+	fw = of_find_node_by_path("/ibm,opal/firmware");
+	if (!fw)
+		return;
+	syms = of_get_property(fw, "symbol-map", &size);
+	if (!syms || size != 2 * sizeof(__be64))
+		return;
+
+	/* Setup attributes */
+	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
 static void __init opal_dump_region_init(void)
 {
 	void *addr;
@@ -653,6 +687,14 @@
 			of_platform_device_create(np, NULL, NULL);
 }
 
+static void opal_i2c_create_devs(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+		of_platform_device_create(np, NULL, NULL);
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -679,6 +721,9 @@
 		of_node_put(consoles);
 	}
 
+	/* Create i2c platform devices */
+	opal_i2c_create_devs();
+
 	/* Find all OPAL interrupts and request them */
 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
 	pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@
 	/* Create "opal" kobject under /sys/firmware */
 	rc = opal_sysfs_init();
 	if (rc == 0) {
+		/* Export symbol map to userspace */
+		opal_export_symmap();
 		/* Setup dump region interface */
 		opal_dump_region_init();
 		/* Setup error log interface */
@@ -824,3 +871,4 @@
 EXPORT_SYMBOL_GPL(opal_rtc_write);
 EXPORT_SYMBOL_GPL(opal_tpo_read);
 EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d1..604c48e 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@
 }
 #endif
 
+extern u32 pnv_get_supported_cpuidle_states(void);
+
 extern void pnv_lpc_init(void);
 
 bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e..b700a32 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
 
 #include "powernv.h"
+#include "subcore.h"
 
 static void __init pnv_setup_arch(void)
 {
@@ -288,6 +292,168 @@
 }
 #endif /* CONFIG_PPC_POWERNV_RTAS */
 
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same accross all cpus.
+	 */
+	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t hid0_val = mfspr(SPRN_HID0);
+	uint64_t hid1_val = mfspr(SPRN_HID1);
+	uint64_t hid4_val = mfspr(SPRN_HID4);
+	uint64_t hid5_val = mfspr(SPRN_HID5);
+	uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+	for_each_possible_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+		/*
+		 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+		 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+		 * with 63rd bit set, so that when a thread wakes up at 0x100 we
+		 * can use this bit to distinguish between fastsleep and
+		 * deep winkle.
+		 */
+		hsprg0_val |= 1;
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	u32 *core_idle_state;
+
+	/*
+	 * core_idle_state - First 8 bits track the idle state of each thread
+	 * of the core. The 8th bit is the lock bit. Initially all thread bits
+	 * are set. They are cleared when the thread enters deep idle state
+	 * like sleep and winkle. Initially the lock bit is cleared.
+	 * The lock bit has 2 purposes
+	 * a. While the first thread is restoring core state, it prevents
+	 * other threads in the core from switching to process context.
+	 * b. While the last thread in the core is saving the core state, it
+	 * prevents a different thread from waking up.
+	 */
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+		*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].core_idle_state_ptr = core_idle_state;
+			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+			paca[cpu].thread_mask = 1 << j;
+		}
+	}
+
+	update_subcore_sibling_mask();
+
+	if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+		pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+	struct device_node *power_mgt;
+	int dt_idle_states;
+	const __be32 *idle_state_flags;
+	u32 len_flags, flags;
+	int i;
+
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return 0;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return 0;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return 0;
+	}
+
+	idle_state_flags = of_get_property(power_mgt,
+			"ibm,cpu-idle-state-flags", &len_flags);
+	if (!idle_state_flags) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+		return 0;
+	}
+
+	dt_idle_states = len_flags / sizeof(u32);
+
+	for (i = 0; i < dt_idle_states; i++) {
+		flags = be32_to_cpu(idle_state_flags[i]);
+		supported_cpuidle_states |= flags;
+	}
+	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_entry,
+			PPC_INST_NOP);
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_exit,
+			PPC_INST_NOP);
+	}
+	pnv_alloc_idle_core_states();
+	return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
 static int __init pnv_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f66..fc34025 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -150,6 +150,7 @@
 {
 	unsigned int cpu;
 	unsigned long srr1;
+	u32 idle_states;
 
 	/* Standard hot unplug procedure */
 	local_irq_disable();
@@ -160,13 +161,23 @@
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 
+	idle_states = pnv_get_supported_cpuidle_states();
 	/* We don't want to take decrementer interrupts while we are offline,
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
+
 		ppc64_runlatch_off();
-		srr1 = power7_nap(1);
+
+		if (idle_states & OPAL_PM_WINKLE_ENABLED)
+			srr1 = power7_winkle();
+		else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+				(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			srr1 = power7_sleep();
+		else
+			srr1 = power7_nap(1);
+
 		ppc64_runlatch_on();
 
 		/*
@@ -198,13 +209,27 @@
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
 static struct smp_ops_t pnv_smp_ops = {
 	.message_pass	= smp_muxed_ipi_message_pass,
 	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
 	.probe		= xics_smp_probe,
 	.kick_cpu	= pnv_smp_kick_cpu,
 	.setup_cpu	= pnv_smp_setup_cpu,
-	.cpu_bootable	= smp_generic_cpu_bootable,
+	.cpu_bootable	= pnv_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b..f60f80a 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@
 	mb();
 }
 
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
 static void unsplit_core(void)
 {
 	u64 hid0, mask;
@@ -179,6 +191,7 @@
 	hid0 = mfspr(SPRN_HID0);
 	hid0 &= ~HID0_POWER8_DYNLPARDIS;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	while (mfspr(SPRN_HID0) & mask)
 		cpu_relax();
@@ -215,6 +228,7 @@
 	hid0  = mfspr(SPRN_HID0);
 	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	/* Wait for it to happen */
 	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@
 	return true;
 }
 
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca[cpu].subcore_sibling_mask = mask;
+
+	}
+}
+
 static int cpu_update_split_mode(void *data)
 {
 	int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@
 		/* Make the new mode public */
 		subcores_per_core = new_mode;
 		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
 
 		/* Make sure the new mode is written before we exit */
 		mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc9..84e02ae 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
 #define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
 
 #ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
 void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d69f1cd..ba397bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -249,10 +249,6 @@
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
 
-config X86_INTEL_MPX
-	def_bool y
-	depends on CPU_SUP_INTEL
-
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
@@ -887,11 +883,11 @@
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
+	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool y
-	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
-	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+	depends on X86_LOCAL_APIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1594,6 +1590,32 @@
 
 	  If unsure, say Y.
 
+config X86_INTEL_MPX
+	prompt "Intel MPX (Memory Protection Extensions)"
+	def_bool n
+	depends on CPU_SUP_INTEL
+	---help---
+	  MPX provides hardware features that can be used in
+	  conjunction with compiler-instrumented code to check
+	  memory references.  It is designed to detect buffer
+	  overflow or underflow bugs.
+
+	  This option enables running applications which are
+	  instrumented or otherwise use MPX.  It does not use MPX
+	  itself inside the kernel or to protect the kernel
+	  against bad memory references.
+
+	  Enabling this option will make the kernel larger:
+	  ~8k of kernel text and 36 bytes of data on a 64-bit
+	  defconfig.  It adds a long to the 'mm_struct' which
+	  will increase the kernel memory overhead of each
+	  process and adds some branches to paths used during
+	  exec() and munmap().
+
+	  For details, see Documentation/x86/intel_mpx.txt
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..9662290 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,30 +94,7 @@
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
 #endif /* CONFIG_TRACING */
 
-/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
-extern unsigned long io_apic_irqs;
-
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-
-struct io_apic_irq_attr {
-	int ioapic;
-	int ioapic_pin;
-	int trigger;
-	int polarity;
-};
-
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
-					int ioapic, int ioapic_pin,
-					int trigger, int polarity)
-{
-	irq_attr->ioapic	= ioapic;
-	irq_attr->ioapic_pin	= ioapic_pin;
-	irq_attr->trigger	= trigger;
-	irq_attr->polarity	= polarity;
-}
-
+#ifdef CONFIG_IRQ_REMAP
 /* Intel specific interrupt remapping information */
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
@@ -131,14 +108,12 @@
 	u16 devid; /* Device ID for IRTE table */
 	u16 index; /* Index into IRTE table*/
 };
+#endif	/* CONFIG_IRQ_REMAP */
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
+#ifdef	CONFIG_X86_LOCAL_APIC
+struct irq_data;
+
 struct irq_cfg {
-	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			vector;
@@ -150,18 +125,39 @@
 		struct irq_2_irte  irq_2_irte;
 	};
 #endif
+	union {
+#ifdef CONFIG_X86_IO_APIC
+		struct {
+			struct list_head	irq_2_pin;
+		};
+#endif
+	};
 };
 
+extern struct irq_cfg *irq_cfg(unsigned int irq);
+extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
+extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
+extern void setup_vector_irq(int cpu);
+#ifdef CONFIG_SMP
 extern void send_cleanup_vector(struct irq_cfg *);
+extern void irq_complete_move(struct irq_cfg *cfg);
+#else
+static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void irq_complete_move(struct irq_cfg *c) { }
+#endif
 
-struct irq_data;
-int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
-			  unsigned int *dest_id);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
-extern void setup_ioapic_dest(void);
-
-extern void enable_IO_APIC(void);
+extern int apic_retrigger_irq(struct irq_data *data);
+extern void apic_ack_edge(struct irq_data *data);
+extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			     unsigned int *dest_id);
+#else	/*  CONFIG_X86_LOCAL_APIC */
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+#endif	/* CONFIG_X86_LOCAL_APIC */
 
 /* Statistics */
 extern atomic_t irq_err_count;
@@ -185,7 +181,8 @@
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
+				    - FIRST_EXTERNAL_VECTOR])(void);
 #ifdef CONFIG_TRACING
 #define trace_interrupt interrupt
 #endif
@@ -195,17 +192,6 @@
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void setup_vector_irq(int cpu);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
-extern void __setup_vector_irq(int cpu);
-#else
-static inline void lock_vector_lock(void) {}
-static inline void unlock_vector_lock(void) {}
-static inline void __setup_vector_irq(int cpu) {}
-#endif
 
 #endif /* !ASSEMBLY_ */
 
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 1733ab4..bf006cc 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -132,6 +132,10 @@
 /* -1 if "noapic" boot option passed */
 extern int noioapicreroute;
 
+extern unsigned long io_apic_irqs;
+
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
+
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
@@ -139,18 +143,15 @@
 #define io_apic_assign_pci_irqs \
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
-struct io_apic_irq_attr;
 struct irq_cfg;
 extern void ioapic_insert_resources(void);
+extern int arch_early_ioapic_init(void);
 
 extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
 				     unsigned int, int,
 				     struct io_apic_irq_attr *);
 extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
 
-extern void native_compose_msi_msg(struct pci_dev *pdev,
-				   unsigned int irq, unsigned int dest,
-				   struct msi_msg *msg, u8 hpet_id);
 extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
 
 extern int save_ioapic_entries(void);
@@ -160,6 +161,13 @@
 extern void setup_ioapic_ids_from_mpc(void);
 extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
 enum ioapic_domain_type {
 	IOAPIC_DOMAIN_INVALID,
 	IOAPIC_DOMAIN_LEGACY,
@@ -188,8 +196,10 @@
 extern u32 mp_pin_to_gsi(int ioapic, int pin);
 extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
 extern void mp_unmap_irq(int irq);
-extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-				      struct ioapic_domain_cfg *cfg);
+extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+			      struct ioapic_domain_cfg *cfg);
+extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
 extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq);
 extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
@@ -227,19 +237,25 @@
 
 extern void io_apic_eoi(unsigned int apic, unsigned int vector);
 
-extern bool mp_should_keep_irq(struct device *dev);
-
+extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void setup_ioapic_dest(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
+extern void print_IO_APICs(void);
 #else  /* !CONFIG_X86_IO_APIC */
 
+#define IO_APIC_IRQ(x)		0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static inline void ioapic_insert_resources(void) { }
+static inline int arch_early_ioapic_init(void) { return 0; }
+static inline void print_IO_APICs(void) {}
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
 static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
 static inline void mp_unmap_irq(int irq) { }
-static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
 
 static inline int save_ioapic_entries(void)
 {
@@ -262,7 +278,6 @@
 #define native_io_apic_print_entries	NULL
 #define native_ioapic_set_affinity	NULL
 #define native_setup_ioapic_entry	NULL
-#define native_compose_msi_msg		NULL
 #define native_eoi_ioapic_pin		NULL
 #endif
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..666c89e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -126,6 +126,12 @@
 
 #define NR_VECTORS			 256
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#else
+#define FIRST_SYSTEM_VECTOR		NR_VECTORS
+#endif
+
 #define FPU_IRQ				  13
 
 #define	FIRST_VM86_IRQ			   3
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0..4e370a5 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,12 +96,15 @@
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
+void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
+			    unsigned int dest, struct msi_msg *msg, u8 hpet_id);
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 		  unsigned int irq_base, unsigned int irq_offset);
 #else
+#define native_compose_msi_msg		NULL
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195d..164e3f8 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb..6e1aaf7 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -28,6 +28,13 @@
 	unsigned int  seg_not_present:1;
 	unsigned int  useable:1;
 #ifdef __x86_64__
+	/*
+	 * Because this bit is not present in 32-bit user code, user
+	 * programs can pass uninitialized values here.  Therefore, in
+	 * any context in which a user_desc comes from a 32-bit program,
+	 * the kernel must act as though lm == 0, regardless of the
+	 * actual value.
+	 */
 	unsigned int  lm:1;
 #endif
 };
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a142e77..4433a4b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -76,6 +76,19 @@
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #endif
 
+/*
+ * Locks related to IOAPIC hotplug
+ * Hotplug side:
+ *	->device_hotplug_lock
+ *		->acpi_ioapic_lock
+ *			->ioapic_lock
+ * Interrupt mapping side:
+ *	->acpi_ioapic_lock
+ *		->ioapic_mutex
+ *			->ioapic_lock
+ */
+static DEFINE_MUTEX(acpi_ioapic_lock);
+
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -395,10 +408,6 @@
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
 
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
-
 	trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
 	polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
 	node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
@@ -411,7 +420,8 @@
 	if (irq < 0)
 		return irq;
 
-	if (enable_update_mptable)
+	/* Don't set up the ACPI SCI because it's already set up */
+	if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
 		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
 	return irq;
@@ -424,9 +434,6 @@
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return;
 
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return;
-
 	irq = mp_map_gsi_to_irq(gsi, 0);
 	if (irq > 0)
 		mp_unmap_irq(irq);
@@ -609,8 +616,10 @@
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
 		*irqp = gsi;
 	} else {
+		mutex_lock(&acpi_ioapic_lock);
 		irq = mp_map_gsi_to_irq(gsi,
 					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
+		mutex_unlock(&acpi_ioapic_lock);
 		if (irq < 0)
 			return -1;
 		*irqp = irq;
@@ -650,7 +659,9 @@
 	int irq = gsi;
 
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	irq = mp_register_gsi(dev, gsi, trigger, polarity);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 
 	return irq;
@@ -659,7 +670,9 @@
 static void acpi_unregister_gsi_ioapic(u32 gsi)
 {
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	mp_unregister_gsi(gsi);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 }
 
@@ -690,6 +703,7 @@
 }
 EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
 
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __init acpi_set_irq_model_ioapic(void)
 {
 	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
@@ -697,6 +711,7 @@
 	__acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
 	acpi_ioapic = 1;
 }
+#endif
 
 /*
  *  ACPI based hotplug support for CPU
@@ -759,20 +774,74 @@
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	int ioapic_id;
+	u64 addr;
+	struct ioapic_domain_cfg cfg = {
+		.type = IOAPIC_DOMAIN_DYNAMIC,
+		.ops = &acpi_irqdomain_ops,
+	};
 
+	ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
+	if (ioapic_id < 0) {
+		unsigned long long uid;
+		acpi_status status;
+
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &uid);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
+			return -EINVAL;
+		}
+		ioapic_id = (int)uid;
+	}
+
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_register_ioapic);
 
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_unregister_ioapic(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/**
+ * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ *			    has been registered
+ * @handle:	ACPI handle of the IOAPIC deivce
+ * @gsi_base:	GSI base associated with the IOAPIC
+ *
+ * Assume caller holds some type of lock to serialize acpi_ioapic_registered()
+ * with acpi_register_ioapic()/acpi_unregister_ioapic().
+ */
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
+{
+	int ret = 0;
+
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_ioapic_registered(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
+
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
@@ -1185,7 +1254,9 @@
 			/*
 			 * Parse MADT IO-APIC entries
 			 */
+			mutex_lock(&acpi_ioapic_lock);
 			error = acpi_parse_madt_ioapic_entries();
+			mutex_unlock(&acpi_ioapic_lock);
 			if (!error) {
 				acpi_set_irq_model_ioapic();
 
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index dcb5b15..8bb12dd 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,10 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..29b5b18 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -196,7 +196,7 @@
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-int first_system_vector = 0xfe;
+int first_system_vector = FIRST_SYSTEM_VECTOR;
 
 /*
  * Debug level, exported for io_apic.c
@@ -1930,7 +1930,7 @@
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static inline void __smp_spurious_interrupt(void)
+static inline void __smp_spurious_interrupt(u8 vector)
 {
 	u32 v;
 
@@ -1939,30 +1939,32 @@
 	 * if it is a vectored one.  Just in case...
 	 * Spurious interrupts should not be ACKed.
 	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+	if (v & (1 << (vector & 0x1f)))
 		ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	pr_info("spurious APIC interrupt on CPU#%d, "
-		"should never happen.\n", smp_processor_id());
+	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+		"should never happen.\n", vector, smp_processor_id());
 }
 
 __visible void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
-	__smp_spurious_interrupt();
+	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
 __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
+	u8 vector = ~regs->orig_ax;
+
 	entering_irq();
-	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
-	__smp_spurious_interrupt();
-	trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+	trace_spurious_apic_entry(vector);
+	__smp_spurious_interrupt(vector);
+	trace_spurious_apic_exit(vector);
 	exiting_irq();
 }
 
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
new file mode 100644
index 0000000..816f36e
--- /dev/null
+++ b/arch/x86/kernel/apic/htirq.c
@@ -0,0 +1,107 @@
+/*
+ * Support Hypertransport IRQ
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/htirq.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/hypertransport.h>
+
+/*
+ * Hypertransport interrupt support
+ */
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	target_ht_irq(data->irq, dest, cfg->vector);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ht_irq_chip = {
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= ht_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
+	int err;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
+
+	write_ht_irq_msg(irq, &msg);
+
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a6745e7..3f5f604 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -32,15 +32,11 @@
 #include <linux/module.h>
 #include <linux/syscore_ops.h>
 #include <linux/irqdomain.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
 #include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -52,17 +48,12 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
-#include <asm/hpet.h>
 #include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
-#define __apicdebuginit(type) static type __init
-
 #define	for_each_ioapic(idx)		\
 	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
 #define	for_each_ioapic_reverse(idx)	\
@@ -74,7 +65,7 @@
 		for_each_pin((idx), (pin))
 
 #define for_each_irq_pin(entry, head) \
-	for (entry = head; entry; entry = entry->next)
+	list_for_each_entry(entry, &head, list)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -83,7 +74,6 @@
 int sis_apic_bug = -1;
 
 static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
 static DEFINE_MUTEX(ioapic_mutex);
 static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
@@ -112,6 +102,7 @@
 	struct ioapic_domain_cfg irqdomain_cfg;
 	struct irq_domain *irqdomain;
 	struct mp_pin_info *pin_info;
+	struct resource *iomem_res;
 } ioapics[MAX_IO_APICS];
 
 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
@@ -205,8 +196,6 @@
 }
 early_param("noapic", parse_noapic);
 
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
-
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
@@ -228,8 +217,8 @@
 }
 
 struct irq_pin_list {
+	struct list_head list;
 	int apic, pin;
-	struct irq_pin_list *next;
 };
 
 static struct irq_pin_list *alloc_irq_pin_list(int node)
@@ -237,7 +226,26 @@
 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
-int __init arch_early_irq_init(void)
+static void alloc_ioapic_saved_registers(int idx)
+{
+	size_t size;
+
+	if (ioapics[idx].saved_registers)
+		return;
+
+	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+	if (!ioapics[idx].saved_registers)
+		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+static void free_ioapic_saved_registers(int idx)
+{
+	kfree(ioapics[idx].saved_registers);
+	ioapics[idx].saved_registers = NULL;
+}
+
+int __init arch_early_ioapic_init(void)
 {
 	struct irq_cfg *cfg;
 	int i, node = cpu_to_node(0);
@@ -245,13 +253,8 @@
 	if (!nr_legacy_irqs())
 		io_apic_irqs = ~0UL;
 
-	for_each_ioapic(i) {
-		ioapics[i].saved_registers =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				ioapics[i].nr_registers, GFP_KERNEL);
-		if (!ioapics[i].saved_registers)
-			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
-	}
+	for_each_ioapic(i)
+		alloc_ioapic_saved_registers(i);
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -266,61 +269,6 @@
 	return 0;
 }
 
-static inline struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq_get_chip_data(irq);
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-
-	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
-	if (!cfg)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
-		goto out_cfg;
-	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	return cfg;
-out_domain:
-	free_cpumask_var(cfg->domain);
-out_cfg:
-	kfree(cfg);
-	return NULL;
-}
-
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
-{
-	if (!cfg)
-		return;
-	irq_set_chip_data(at, NULL);
-	free_cpumask_var(cfg->domain);
-	free_cpumask_var(cfg->old_domain);
-	kfree(cfg);
-}
-
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
-{
-	int res = irq_alloc_desc_at(at, node);
-	struct irq_cfg *cfg;
-
-	if (res < 0) {
-		if (res != -EEXIST)
-			return NULL;
-		cfg = irq_cfg(at);
-		if (cfg)
-			return cfg;
-	}
-
-	cfg = alloc_irq_cfg(at, node);
-	if (cfg)
-		irq_set_chip_data(at, cfg);
-	else
-		irq_free_desc(at);
-	return cfg;
-}
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -445,15 +393,12 @@
  */
 static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *entry;
 
 	/* don't allow duplicates */
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
+	for_each_irq_pin(entry, cfg->irq_2_pin)
 		if (entry->apic == apic && entry->pin == pin)
 			return 0;
-		last = &entry->next;
-	}
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
@@ -464,22 +409,19 @@
 	entry->apic = apic;
 	entry->pin = pin;
 
-	*last = entry;
+	list_add_tail(&entry->list, &cfg->irq_2_pin);
 	return 0;
 }
 
 static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *tmp, *entry;
 
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin)
+	list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
 		if (entry->apic == apic && entry->pin == pin) {
-			*last = entry->next;
+			list_del(&entry->list);
 			kfree(entry);
 			return;
-		} else {
-			last = &entry->next;
 		}
 }
 
@@ -559,7 +501,7 @@
 
 static void mask_ioapic_irq(struct irq_data *data)
 {
-	mask_ioapic(data->chip_data);
+	mask_ioapic(irqd_cfg(data));
 }
 
 static void __unmask_ioapic(struct irq_cfg *cfg)
@@ -578,7 +520,7 @@
 
 static void unmask_ioapic_irq(struct irq_data *data)
 {
-	unmask_ioapic(data->chip_data);
+	unmask_ioapic(irqd_cfg(data));
 }
 
 /*
@@ -1164,8 +1106,7 @@
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
  */
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
-				struct io_apic_irq_attr *irq_attr)
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int irq, i, best_ioapic = -1, best_idx = -1;
 
@@ -1219,195 +1160,11 @@
 		return -1;
 
 out:
-	irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
-			IOAPIC_MAP_ALLOC);
-	if (irq > 0)
-		set_io_apic_irq_attr(irq_attr, best_ioapic,
-				     mp_irqs[best_idx].dstirq,
-				     irq_trigger(best_idx),
-				     irq_polarity(best_idx));
-	return irq;
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+			 IOAPIC_MAP_ALLOC);
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	raw_spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	raw_spin_unlock(&vector_lock);
-}
-
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
-	cpumask_var_t tmp_mask;
-
-	if (cfg->move_in_progress)
-		return -EBUSY;
-
-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-		return -ENOMEM;
-
-	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
-	cpumask_clear(cfg->old_domain);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
-
-		apic->vector_allocation_domain(cpu, tmp_mask, mask);
-
-		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			err = 0;
-			if (cpumask_equal(tmp_mask, cfg->domain))
-				break;
-			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
-			 */
-			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
-			break;
-		}
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= first_system_vector) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
-
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
-			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
-			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
-			continue;
-		}
-
-		if (test_bit(vector, used_vectors))
-			goto next;
-
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
-			if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (cfg->vector) {
-			cpumask_copy(cfg->old_domain, cfg->domain);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-		}
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cpumask_copy(cfg->domain, tmp_mask);
-		err = 0;
-		break;
-	}
-	free_cpumask_var(tmp_mask);
-	return err;
-}
-
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	int err;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, cfg, mask);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
-	int cpu, vector;
-
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-
-	cfg->vector = 0;
-	cpumask_clear(cfg->domain);
-
-	if (likely(!cfg->move_in_progress))
-		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != irq)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-			break;
-		}
-	}
-	cfg->move_in_progress = 0;
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
-	/* Mark the inuse vectors */
-	for_each_active_irq(irq) {
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-	}
-	raw_spin_unlock(&vector_lock);
-}
-
 static struct irq_chip ioapic_chip;
 
 #ifdef CONFIG_X86_32
@@ -1496,7 +1253,7 @@
 					 &dest)) {
 		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1510,7 +1267,7 @@
 	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
 		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1641,7 +1398,7 @@
 	raw_spin_lock_init(&ioapic_lock);
 }
 
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+static void __init print_IO_APIC(int ioapic_idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -1698,7 +1455,7 @@
 	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
 }
 
-__apicdebuginit(void) print_IO_APICs(void)
+void __init print_IO_APICs(void)
 {
 	int ioapic_idx;
 	struct irq_cfg *cfg;
@@ -1731,8 +1488,7 @@
 		cfg = irq_cfg(irq);
 		if (!cfg)
 			continue;
-		entry = cfg->irq_2_pin;
-		if (!entry)
+		if (list_empty(&cfg->irq_2_pin))
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
@@ -1743,205 +1499,6 @@
 	printk(KERN_INFO ".................................... done.\n");
 }
 
-__apicdebuginit(void) print_APIC_field(int base)
-{
-	int i;
-
-	printk(KERN_DEBUG);
-
-	for (i = 0; i < 8; i++)
-		pr_cont("%08x", apic_read(base + i*0x10));
-
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int i, v, ver, maxlvt;
-	u64 icr;
-
-	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		if (!APIC_XAPIC(ver)) {
-			v = apic_read(APIC_ARBPRI);
-			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			       v & APIC_ARBPRI_MASK);
-		}
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	/*
-	 * Remote read supported only in the 82489DX and local APIC for
-	 * Pentium processors.
-	 */
-	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
-		v = apic_read(APIC_RRR);
-		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	if (!x2apic_enabled()) {
-		v = apic_read(APIC_DFR);
-		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	}
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_field(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_field(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_field(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-
-	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
-		v = apic_read(APIC_EFEAT);
-		maxlvt = (v >> 16) & 0xff;
-		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
-		v = apic_read(APIC_ECTRL);
-		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
-		for (i = 0; i < maxlvt; i++) {
-			v = apic_read(APIC_EILVTn(i));
-			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
-		}
-	}
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APICs(int maxcpu)
-{
-	int cpu;
-
-	if (!maxcpu)
-		return;
-
-	preempt_disable();
-	for_each_online_cpu(cpu) {
-		if (cpu >= maxcpu)
-			break;
-		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
-	}
-	preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (!nr_legacy_irqs())
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-static int __initdata show_lapic = 1;
-static __init int setup_show_lapic(char *arg)
-{
-	int num = -1;
-
-	if (strcmp(arg, "all") == 0) {
-		show_lapic = CONFIG_NR_CPUS;
-	} else {
-		get_option(&arg, &num);
-		if (num >= 0)
-			show_lapic = num;
-	}
-
-	return 1;
-}
-__setup("show_lapic=", setup_show_lapic);
-
-__apicdebuginit(int) print_ICs(void)
-{
-	if (apic_verbosity == APIC_QUIET)
-		return 0;
-
-	print_PIC();
-
-	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
-		return 0;
-
-	print_local_APICs(show_lapic);
-	print_IO_APICs();
-
-	return 0;
-}
-
-late_initcall(print_ICs);
-
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -2244,26 +1801,12 @@
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
-	__unmask_ioapic(data->chip_data);
+	__unmask_ioapic(irqd_cfg(data));
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
 }
 
-static int ioapic_retrigger_irq(struct irq_data *data)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned long flags;
-	int cpu;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
@@ -2273,113 +1816,6 @@
  * races.
  */
 
-#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-
-	ack_APIC_irq();
-	irq_enter();
-	exit_idle();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		int irq;
-		unsigned int irr;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __this_cpu_read(vector_irq[vector]);
-
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		raw_spin_lock(&desc->lock);
-
-		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
-		 */
-		if (cfg->move_in_progress)
-			goto unlock;
-
-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
-		 */
-		if (irr  & (1 << (vector % 32))) {
-			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
-		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
-unlock:
-		raw_spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
-{
-	unsigned me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	me = smp_processor_id();
-
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-		send_cleanup_vector(cfg);
-}
-
-static void irq_complete_move(struct irq_cfg *cfg)
-{
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
-void irq_force_complete_move(int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	if (!cfg)
-		return;
-
-	__irq_complete_move(cfg, cfg->vector);
-}
-#else
-static inline void irq_complete_move(struct irq_cfg *cfg) { }
-#endif
-
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -2400,41 +1836,6 @@
 	}
 }
 
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int irq = data->irq;
-	int err;
-
-	if (!config_enabled(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, cfg, mask);
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
-	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	cpumask_copy(data->affinity, mask);
-
-	return 0;
-}
-
-
 int native_ioapic_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask,
 			       bool force)
@@ -2447,24 +1848,17 @@
 		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
+	ret = apic_set_affinity(data, mask, &dest);
 	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		__target_IO_APIC_irq(irq, dest, irqd_cfg(data));
 		ret = IRQ_SET_MASK_OK_NOCOPY;
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return ret;
 }
 
-static void ack_apic_edge(struct irq_data *data)
-{
-	irq_complete_move(data->chip_data);
-	irq_move_irq(data);
-	ack_APIC_irq();
-}
-
 atomic_t irq_mis_count;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -2547,9 +1941,9 @@
 }
 #endif
 
-static void ack_apic_level(struct irq_data *data)
+static void ack_ioapic_level(struct irq_data *data)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	int i, irq = data->irq;
 	unsigned long v;
 	bool masked;
@@ -2619,10 +2013,10 @@
 	.irq_startup		= startup_ioapic_irq,
 	.irq_mask		= mask_ioapic_irq,
 	.irq_unmask		= unmask_ioapic_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_eoi		= ack_apic_level,
+	.irq_ack		= apic_ack_edge,
+	.irq_eoi		= ack_ioapic_level,
 	.irq_set_affinity	= native_ioapic_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
+	.irq_retrigger		= apic_retrigger_irq,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -2965,6 +2359,16 @@
 	return 0;
 }
 
+static void ioapic_destroy_irqdomain(int idx)
+{
+	if (ioapics[idx].irqdomain) {
+		irq_domain_remove(ioapics[idx].irqdomain);
+		ioapics[idx].irqdomain = NULL;
+	}
+	kfree(ioapics[idx].pin_info);
+	ioapics[idx].pin_info = NULL;
+}
+
 void __init setup_IO_APIC(void)
 {
 	int ioapic;
@@ -3044,399 +2448,6 @@
 
 device_initcall(ioapic_init_ops);
 
-/*
- * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
- */
-int arch_setup_hwirq(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int ret;
-
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg)
-		return -ENOMEM;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (!ret)
-		irq_set_chip_data(irq, cfg);
-	else
-		free_irq_cfg(irq, cfg);
-	return ret;
-}
-
-void arch_teardown_hwirq(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	free_remapped_irq(irq);
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, cfg);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	free_irq_cfg(irq, cfg);
-}
-
-/*
- * MSI message composition
- */
-void native_compose_msi_msg(struct pci_dev *pdev,
-			    unsigned int irq, unsigned int dest,
-			    struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	msg->address_hi = MSI_ADDR_BASE_HI;
-
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
-
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((apic->irq_dest_mode == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
-			   struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-
-	return 0;
-}
-
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	__get_cached_msi_msg(data->msi_desc, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	__pci_write_msi_msg(data->msi_desc, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name			= "PCI-MSI",
-	.irq_unmask		= pci_msi_unmask_irq,
-	.irq_mask		= pci_msi_mask_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset)
-{
-	struct irq_chip *chip = &msi_chip;
-	struct msi_msg msg;
-	unsigned int irq = irq_base + irq_offset;
-	int ret;
-
-	ret = msi_compose_msg(dev, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-
-	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
-	/*
-	 * MSI-X message is written per-IRQ, the offset is always 0.
-	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
-	 */
-	if (!irq_offset)
-		pci_write_msi_msg(irq, &msg);
-
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
-
-	return 0;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_desc *msidesc;
-	unsigned int irq;
-	int node, ret;
-
-	/* Multiple MSI vectors only supported with interrupt remapping */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
-	node = dev_to_node(&dev->dev);
-
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = irq_alloc_hwirq(node);
-		if (!irq)
-			return -ENOSPC;
-
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0) {
-			irq_free_hwirq(irq);
-			return ret;
-		}
-
-	}
-	return 0;
-}
-
-void native_teardown_msi_irq(unsigned int irq)
-{
-	irq_free_hwirq(irq);
-}
-
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		      bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct msi_msg msg;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip dmar_msi_type = {
-	.name			= "DMAR_MSI",
-	.irq_unmask		= dmar_msi_unmask,
-	.irq_mask		= dmar_msi_mask,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= dmar_msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-				      "edge");
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-static int hpet_msi_set_affinity(struct irq_data *data,
-				 const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	hpet_msi_read(data->handler_data, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	hpet_msi_write(data->handler_data, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip hpet_msi_type = {
-	.name = "HPET_MSI",
-	.irq_unmask = hpet_msi_unmask,
-	.irq_mask = hpet_msi_mask,
-	.irq_ack = ack_apic_edge,
-	.irq_set_affinity = hpet_msi_set_affinity,
-	.irq_retrigger = ioapic_retrigger_irq,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
-{
-	struct irq_chip *chip = &hpet_msi_type;
-	struct msi_msg msg;
-	int ret;
-
-	ret = msi_compose_msg(NULL, irq, &msg, id);
-	if (ret < 0)
-		return ret;
-
-	hpet_msi_write(irq_get_handler_data(irq), &msg);
-	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	target_ht_irq(data->irq, dest, cfg->vector);
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	struct ht_irq_msg msg;
-	unsigned dest;
-	int err;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(dest) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-
-	write_ht_irq_msg(irq, &msg);
-
-	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-				      handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-
-	return 0;
-}
-#endif /* CONFIG_HT_IRQ */
-
 static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
@@ -3451,7 +2462,7 @@
 	return ret;
 }
 
-static int __init io_apic_get_redir_entries(int ioapic)
+static int io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3476,28 +2487,8 @@
 	return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
 }
 
-int __init arch_probe_nr_irqs(void)
-{
-	int nr;
-
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += gsi_top * 16;
-#endif
-	if (nr < nr_irqs)
-		nr_irqs = nr;
-
-	return 0;
-}
-
 #ifdef CONFIG_X86_32
-static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3572,30 +2563,63 @@
 	return apic_id;
 }
 
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
 	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
+		return io_apic_get_unique_id(idx, id);
 	else
 		return id;
 }
 #else
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
-	int i;
+	union IO_APIC_reg_00 reg_00;
 	DECLARE_BITMAP(used, 256);
+	unsigned long flags;
+	u8 new_id;
+	int i;
 
 	bitmap_zero(used, 256);
 	for_each_ioapic(i)
 		__set_bit(mpc_ioapic_id(i), used);
+
+	/* Hand out the requested id if available */
 	if (!test_bit(id, used))
 		return id;
-	return find_first_zero_bit(used, 256);
+
+	/*
+	 * Read the current id from the ioapic and keep it if
+	 * available.
+	 */
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	new_id = reg_00.bits.ID;
+	if (!test_bit(new_id, used)) {
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+			 idx, new_id, id);
+		return new_id;
+	}
+
+	/*
+	 * Get the next free id and write it to the ioapic.
+	 */
+	new_id = find_first_zero_bit(used, 256);
+	reg_00.bits.ID = new_id;
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(idx, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	/* Sanity check */
+	BUG_ON(reg_00.bits.ID != new_id);
+
+	return new_id;
 }
 #endif
 
-static int __init io_apic_get_version(int ioapic)
+static int io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3702,6 +2726,7 @@
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 		num++;
+		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
@@ -3799,21 +2824,7 @@
 	return gsi - gsi_cfg->gsi_base;
 }
 
-static __init int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
-			MAX_IO_APICS, nr_ioapics);
-		return 1;
-	}
-	if (!address) {
-		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static __init int bad_ioapic_register(int idx)
+static int bad_ioapic_register(int idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -3832,32 +2843,61 @@
 	return 0;
 }
 
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-			       struct ioapic_domain_cfg *cfg)
+static int find_free_ioapic_entry(void)
 {
-	int idx = 0;
-	int entries;
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id:		hardware IOAPIC ID
+ * @address:	physical address of IOAPIC register area
+ * @gsi_base:	base of GSI associated with the IOAPIC
+ * @cfg:	configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+		       struct ioapic_domain_cfg *cfg)
+{
+	bool hotplug = !!ioapic_initialized;
 	struct mp_ioapic_gsi *gsi_cfg;
+	int idx, ioapic, entries;
+	u32 gsi_end;
 
-	if (bad_ioapic(address))
-		return;
+	if (!address) {
+		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+		return -EINVAL;
+	}
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].mp_config.apicaddr == address) {
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+				address, ioapic);
+			return -EEXIST;
+		}
 
-	idx = nr_ioapics;
+	idx = find_free_ioapic_entry();
+	if (idx >= MAX_IO_APICS) {
+		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, idx);
+		return -ENOSPC;
+	}
 
 	ioapics[idx].mp_config.type = MP_IOAPIC;
 	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
 	ioapics[idx].mp_config.apicaddr = address;
-	ioapics[idx].irqdomain = NULL;
-	ioapics[idx].irqdomain_cfg = *cfg;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
 	if (bad_ioapic_register(idx)) {
 		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
-		return;
+		return -ENODEV;
 	}
 
-	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
 	/*
@@ -3865,24 +2905,112 @@
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
 	entries = io_apic_get_redir_entries(idx);
+	gsi_end = gsi_base + entries - 1;
+	for_each_ioapic(ioapic) {
+		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+		if ((gsi_base >= gsi_cfg->gsi_base &&
+		     gsi_base <= gsi_cfg->gsi_end) ||
+		    (gsi_end >= gsi_cfg->gsi_base &&
+		     gsi_end <= gsi_cfg->gsi_end)) {
+			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+				gsi_base, gsi_end,
+				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOSPC;
+		}
+	}
 	gsi_cfg = mp_ioapic_gsi_routing(idx);
 	gsi_cfg->gsi_base = gsi_base;
-	gsi_cfg->gsi_end = gsi_base + entries - 1;
+	gsi_cfg->gsi_end = gsi_end;
+
+	ioapics[idx].irqdomain = NULL;
+	ioapics[idx].irqdomain_cfg = *cfg;
 
 	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
+	 * If mp_register_ioapic() is called during early boot stage when
+	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
 	 */
-	ioapics[idx].nr_registers = entries;
+	if (hotplug) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
 
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
+	if (nr_ioapics <= idx)
+		nr_ioapics = idx + 1;
+
+	/* Set nr_registers to mark entry present */
+	ioapics[idx].nr_registers = entries;
 
 	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
 		idx, mpc_ioapic_id(idx),
 		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
 		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
-	nr_ioapics++;
+	return 0;
+}
+
+int mp_unregister_ioapic(u32 gsi_base)
+{
+	int ioapic, pin;
+	int found = 0;
+	struct mp_pin_info *pin_info;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+		return -ENODEV;
+	}
+
+	for_each_pin(ioapic, pin) {
+		pin_info = mp_pin_info(ioapic, pin);
+		if (pin_info->count) {
+			pr_warn("pin%d on IOAPIC%d is still in use.\n",
+				pin, ioapic);
+			return -EBUSY;
+		}
+	}
+
+	/* Mark entry not present */
+	ioapics[ioapic].nr_registers  = 0;
+	ioapic_destroy_irqdomain(ioapic);
+	free_ioapic_saved_registers(ioapic);
+	if (ioapics[ioapic].iomem_res)
+		release_resource(ioapics[ioapic].iomem_res);
+	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+	return 0;
+}
+
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
 int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
@@ -3931,7 +3059,7 @@
 
 	ioapic_mask_entry(ioapic, pin);
 	__remove_pin_from_irq(cfg, ioapic, pin);
-	WARN_ON(cfg->irq_2_pin != NULL);
+	WARN_ON(!list_empty(&cfg->irq_2_pin));
 	arch_teardown_hwirq(virq);
 }
 
@@ -3964,18 +3092,6 @@
 	return ret;
 }
 
-bool mp_should_keep_irq(struct device *dev)
-{
-	if (dev->power.is_prepared)
-		return true;
-#ifdef	CONFIG_PM
-	if (dev->power.runtime_status == RPM_SUSPENDING)
-		return true;
-#endif
-
-	return false;
-}
-
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 0000000..d6ba2d6
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c
@@ -0,0 +1,286 @@
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/msidef.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+
+void native_compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+
+	if (x2apic_enabled())
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((apic->irq_dest_mode == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL :
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU :
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED :
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+}
+
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+	return 0;
+}
+
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	__get_cached_msi_msg(data->msi_desc, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	__pci_write_msi_msg(data->msi_desc, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_unmask		= pci_msi_unmask_irq,
+	.irq_mask		= pci_msi_mask_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
+{
+	struct irq_chip *chip = &msi_chip;
+	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
+	int ret;
+
+	ret = msi_compose_msg(dev, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		pci_write_msi_msg(irq, &msg);
+
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+	return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *msidesc;
+	unsigned int irq;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = irq_alloc_hwirq(node);
+		if (!irq)
+			return -ENOSPC;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0) {
+			irq_free_hwirq(irq);
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+	irq_free_hwirq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest, irq = data->irq;
+	struct msi_msg msg;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip dmar_msi_type = {
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= dmar_msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_HPET_TIMER
+
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	hpet_msi_read(data->handler_data, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(data->handler_data, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = apic_ack_edge,
+	.irq_set_affinity = hpet_msi_set_affinity,
+	.irq_retrigger = apic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_chip *chip = &hpet_msi_type;
+	struct msi_msg msg;
+	int ret;
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+	return 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 0000000..6cedd79
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c
@@ -0,0 +1,719 @@
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	raw_spin_unlock(&vector_lock);
+}
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+{
+	return irq_data->chip_data;
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
+#ifdef	CONFIG_X86_IO_APIC
+	INIT_LIST_HEAD(&cfg->irq_2_pin);
+#endif
+	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
+}
+
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = irq_cfg(at);
+		if (cfg)
+			return cfg;
+	}
+
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		irq_set_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+	if (!cfg)
+		return;
+	irq_set_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+	static int current_offset = VECTOR_OFFSET_START % 16;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
+
+	if (cfg->move_in_progress)
+		return -EBUSY;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
+
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 16;
+		if (vector >= first_system_vector) {
+			offset = (offset + 1) % 16;
+			vector = FIRST_EXTERNAL_VECTOR + offset;
+		}
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
+		}
+
+		if (test_bit(vector, used_vectors))
+			goto next;
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] >
+			    VECTOR_UNDEFINED)
+				goto next;
+		}
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (cfg->vector) {
+			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+		}
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
+	}
+	free_cpumask_var(tmp_mask);
+
+	return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, cfg, mask);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+void clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+	int cpu, vector;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+
+	cfg->vector = 0;
+	cpumask_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return;
+	}
+
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+		nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+	/*
+	 * for MSI and HT dyn irq
+	 */
+	if (gsi_top <= NR_IRQS_LEGACY)
+		nr +=  8 * nr_cpu_ids;
+	else
+		nr += gsi_top * 16;
+#endif
+	if (nr < nr_irqs)
+		nr_irqs = nr;
+
+	return nr_legacy_irqs();
+}
+
+int __init arch_early_irq_init(void)
+{
+	return arch_early_ioapic_init();
+}
+
+static void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/*
+	 * vector_lock will make sure that we don't run into irq vector
+	 * assignments that might be happening on another cpu in parallel,
+	 * while we setup our initial vector to irq mappings.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Mark the inuse vectors */
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+/*
+ * Setup the vector to irq mappings.
+ */
+void setup_vector_irq(int cpu)
+{
+	int irq;
+
+	/*
+	 * On most of the platforms, legacy PIC delivers the interrupts on the
+	 * boot cpu. But there are certain platforms where PIC interrupts are
+	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
+	 * legacy PIC, for the new cpu that is coming online, setup the static
+	 * legacy vector to irq mapping:
+	 */
+	for (irq = 0; irq < nr_legacy_irqs(); irq++)
+		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+
+	__setup_vector_irq(cpu);
+}
+
+int apic_retrigger_irq(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+
+void apic_ack_edge(struct irq_data *data)
+{
+	irq_complete_move(irqd_cfg(data));
+	irq_move_irq(data);
+	ack_APIC_irq();
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i),
+					    IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+
+	ack_APIC_irq();
+	irq_enter();
+	exit_idle();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		int irq;
+		unsigned int irr;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+
+		irq = __this_cpu_read(vector_irq[vector]);
+
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
+			goto unlock;
+
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+			goto unlock;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
+		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+	unsigned me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+}
+#endif
+
+/*
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
+ */
+int arch_setup_hwirq(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	int ret;
+
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg)
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (!ret)
+		irq_set_chip_data(irq, cfg);
+	else
+		free_irq_cfg(irq, cfg);
+	return ret;
+}
+
+void arch_teardown_hwirq(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	free_remapped_irq(irq);
+	clear_irq_vector(irq, cfg);
+	free_irq_cfg(irq, cfg);
+}
+
+static void __init print_APIC_field(int base)
+{
+	int i;
+
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		pr_cont("%08x", apic_read(base + i*0x10));
+
+	pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+	unsigned int i, v, ver, maxlvt;
+	u64 icr;
+
+	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+		 smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	pr_info("... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+				 v, v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		pr_debug("... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		pr_debug("... APIC RRR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_LDR);
+	pr_debug("... APIC LDR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		pr_debug("... APIC DFR: %08x\n", v);
+	}
+	v = apic_read(APIC_SPIV);
+	pr_debug("... APIC SPIV: %08x\n", v);
+
+	pr_debug("... APIC ISR field:\n");
+	print_APIC_field(APIC_ISR);
+	pr_debug("... APIC TMR field:\n");
+	print_APIC_field(APIC_TMR);
+	pr_debug("... APIC IRR field:\n");
+	print_APIC_field(APIC_IRR);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		/* Due to the Pentium erratum 3AP. */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		pr_debug("... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	pr_debug("... APIC ICR: %08x\n", (u32)icr);
+	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+	v = apic_read(APIC_LVTT);
+	pr_debug("... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {
+		/* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		pr_debug("... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	pr_debug("... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	pr_debug("... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {
+		/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		pr_debug("... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	pr_debug("... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	pr_debug("... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	pr_debug("... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		pr_debug("... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		pr_debug("... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			pr_debug("... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
+	pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+	int cpu;
+
+	if (!maxcpu)
+		return;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
+	preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (!nr_legacy_irqs())
+		return;
+
+	pr_debug("\nprinting PIC contents\n");
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	pr_debug("... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	pr_debug("... PIC  IRR: %04x\n", v);
+
+	outb(0x0b, 0xa0);
+	outb(0x0b, 0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a, 0xa0);
+	outb(0x0a, 0x20);
+
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	pr_debug("... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
+	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return 0;
+
+	print_local_APICs(show_lapic);
+	print_IO_APICs();
+
+	return 0;
+}
+
+late_initcall(print_ICs);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 08f3fed..10b8d3e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -276,6 +276,17 @@
 	return box;
 }
 
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+	return event->pmu->event_init == uncore_pmu_event_init;
+}
+
 static int
 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 {
@@ -290,13 +301,18 @@
 		return -EINVAL;
 
 	n = box->n_events;
-	box->event_list[n] = leader;
-	n++;
+
+	if (is_uncore_event(leader)) {
+		box->event_list[n] = leader;
+		n++;
+	}
+
 	if (!dogrp)
 		return n;
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF)
+		if (!is_uncore_event(event) ||
+		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
 		if (n >= max_count)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f5ab56d..aceb2f9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -28,6 +28,7 @@
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/cpu.h>
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 1cf7c97..000d419 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -732,10 +732,10 @@
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 90878aa..9ebaf63 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -740,10 +740,10 @@
 ENTRY(irq_entries_start)
 	INTR_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -8
       .endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..70e181e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -99,32 +99,9 @@
 	x86_init.irqs.intr_init();
 }
 
-/*
- * Setup the vector to irq mappings.
- */
-void setup_vector_irq(int cpu)
-{
-#ifndef CONFIG_X86_IO_APIC
-	int irq;
-
-	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
-	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
-#endif
-
-	__setup_vector_irq(cpu);
-}
-
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -144,7 +121,6 @@
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif
 #endif /* CONFIG_SMP */
 }
 
@@ -159,7 +135,7 @@
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+#ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -197,10 +173,17 @@
 	 * 'special' SMP interrupts)
 	 */
 	i = FIRST_EXTERNAL_VECTOR;
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifndef CONFIG_X86_LOCAL_APIC
+#define first_system_vector NR_VECTORS
+#endif
+	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
 		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
+#ifdef CONFIG_X86_LOCAL_APIC
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
+		set_intr_gate(i, spurious_interrupt);
+#endif
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 72e8e31..469b23d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <asm/cacheflush.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4859810..415480d 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/io_apic.h>
 #include <asm/debugreg.h>
 #include <asm/kexec-bzimage64.h>
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 17962e6..bae6c60 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7a8f584..6d7022c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1084,7 +1084,6 @@
 {
 	unsigned int i;
 
-	preempt_disable();
 	smp_cpu_index_default();
 
 	/*
@@ -1102,22 +1101,19 @@
 	}
 	set_cpu_sibling_map(0);
 
-
 	if (smp_sanity_check(max_cpus) < 0) {
 		pr_info("SMP disabled\n");
 		disable_smp();
-		goto out;
+		return;
 	}
 
 	default_setup_apic_routing();
 
-	preempt_disable();
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
-	preempt_enable();
 
 	connect_bsp_APIC();
 
@@ -1151,8 +1147,6 @@
 		uv_system_init();
 
 	set_mtrr_aps_delayed_init();
-out:
-	preempt_enable();
 }
 
 void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3e551ee..4e942f3 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -55,12 +55,6 @@
 	if (info->seg_not_present)
 		return false;
 
-#ifdef CONFIG_X86_64
-	/* The L bit makes no sense for data. */
-	if (info->lm)
-		return false;
-#endif
-
 	return true;
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9ae205..88900e2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -331,7 +331,7 @@
 		break; /* Success, it was handled */
 	case 1: /* Bound violation. */
 		info = mpx_generate_siginfo(regs, xsave_buf);
-		if (PTR_ERR(info)) {
+		if (IS_ERR(info)) {
 			/*
 			 * We failed to decode the MPX instruction.  Act as if
 			 * the exception was not caused by MPX.
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index aae9413..c1c1544 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -841,7 +841,7 @@
 {
 	unsigned int i;
 
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index b9958c3..44b9271 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -210,6 +210,9 @@
 {
 	int polarity;
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
 		polarity = 0; /* active high */
 	else
@@ -224,13 +227,18 @@
 	if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
 		return -EBUSY;
 
+	dev->irq_managed = 1;
+
 	return 0;
 }
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0)
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
+		dev->irq_managed = 0;
+	}
 }
 
 struct pci_ops intel_mid_pci_ops = {
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index eb500c2..5dc6ca5 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1200,11 +1200,12 @@
 #ifdef CONFIG_X86_IO_APIC
 			struct pci_dev *temp_dev;
 			int irq;
-			struct io_apic_irq_attr irq_attr;
+
+			if (dev->irq_managed && dev->irq > 0)
+				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn),
-						pin - 1, &irq_attr);
+						PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1218,7 +1219,7 @@
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn),
-						pin - 1, &irq_attr);
+						pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1228,6 +1229,7 @@
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				dev->irq_managed = 1;
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,11 +1256,24 @@
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
 	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq) {
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index b233681..0ce6736 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@
 		       unsigned long mmr_offset, int limit)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
@@ -198,13 +198,13 @@
 uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 		    bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest;
 	unsigned long mmr_value, mmr_offset;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
+	if (apic_set_affinity(data, mask, &dest))
 		return -1;
 
 	mmr_value = 0;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 7cc4e33..5277a0e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -413,6 +413,9 @@
 		return 0;
 	}
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry) {
 		/*
@@ -456,6 +459,7 @@
 		return rc;
 	}
 	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -478,7 +482,7 @@
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin)
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
 	/* Keep IOAPIC pin configuration when suspending */
@@ -506,6 +510,9 @@
 	 */
 
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
-	if (gsi >= 0 && dev->irq > 0)
+	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
+		dev->irq = 0;
+		dev->irq_managed = 0;
+	}
 }
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index ef58f46..342942f 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -125,13 +125,12 @@
 	}
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
-	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
+	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
 		map_lapic_id(header, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
 		map_lsapic_id(header, type, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
 		map_x2apic_id(header, type, acpi_id, &apic_id);
-	}
 
 exit:
 	kfree(buffer.pointer);
@@ -164,7 +163,7 @@
 		 * For example,
 		 *
 		 * Scope (_PR)
-                 * {
+		 * {
 		 *     Processor (CPU0, 0x00, 0x00000410, 0x06) {}
 		 *     Processor (CPU1, 0x01, 0x00000410, 0x06) {}
 		 *     Processor (CPU2, 0x02, 0x00000410, 0x06) {}
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 2ba8f02..782a0d1 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -200,7 +200,7 @@
 
 	status = acpi_resource_to_address64(ares, &addr);
 	if (ACPI_FAILURE(status))
-		return true;
+		return false;
 
 	res->start = addr.minimum;
 	res->end = addr.maximum;
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c913906..0f6b229 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
+ *  (C) 2001-2004  Dave Jones.
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1008,7 +1008,7 @@
 module_param(enable, int, 0644);
 MODULE_PARM_DESC(enable, "Enable driver");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index f910272..e6f24b2 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -300,7 +300,7 @@
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones, "
 		"Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index e61e224..37c5742 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -1,7 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
  *  (C) 2003 Dave Jones on behalf of SuSE Labs.
- *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
  *  Based upon datasheets & sample CPUs kindly provided by AMD.
@@ -701,7 +700,7 @@
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 1a07b59..e56d632 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -378,8 +378,7 @@
 }
 
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Dave Jones, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
 		"with ICH-M southbridges.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e9248bb..aedec09 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -16,13 +16,10 @@
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
+#include <asm/opal.h>
 #include <asm/runlatch.h>
 
-/* Flags and constants used in PowerNV platform */
-
 #define MAX_POWERNV_IDLE_STATES	8
-#define IDLE_USE_INST_NAP	0x00010000 /* Use nap instruction */
-#define IDLE_USE_INST_SLEEP	0x00020000 /* Use sleep instruction */
 
 struct cpuidle_driver powernv_idle_driver = {
 	.name             = "powernv_idle",
@@ -197,7 +194,7 @@
 		 * target residency to be 10x exit_latency
 		 */
 		latency_ns = be32_to_cpu(idle_state_latency[i]);
-		if (flags & IDLE_USE_INST_NAP) {
+		if (flags & OPAL_PM_NAP_ENABLED) {
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
@@ -210,7 +207,8 @@
 			nr_idle_states++;
 		}
 
-		if (flags & IDLE_USE_INST_SLEEP) {
+		if (flags & OPAL_PM_SLEEP_ENABLED ||
+			flags & OPAL_PM_SLEEP_ENABLED_ER1) {
 			/* Add FASTSLEEP state */
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c1351d9..91a488c 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1072,4 +1072,15 @@
 	  This support is also available as a module.  If so, the module
 	  will be called scx200_acb.
 
+config I2C_OPAL
+	tristate "IBM OPAL I2C driver"
+	depends on PPC_POWERNV
+	default y
+	help
+	  This exposes the PowerNV platform i2c busses to the linux i2c layer,
+	  the driver is based on the OPAL interfaces.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called as i2c-opal.
+
 endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 5e6c822..56388f6 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -102,6 +102,7 @@
 obj-$(CONFIG_I2C_BCM_KONA)	+= i2c-bcm-kona.o
 obj-$(CONFIG_I2C_CROS_EC_TUNNEL)	+= i2c-cros-ec-tunnel.o
 obj-$(CONFIG_I2C_ELEKTOR)	+= i2c-elektor.o
+obj-$(CONFIG_I2C_OPAL)		+= i2c-opal.o
 obj-$(CONFIG_I2C_PCA_ISA)	+= i2c-pca-isa.o
 obj-$(CONFIG_I2C_SIBYTE)	+= i2c-sibyte.o
 obj-$(CONFIG_SCx200_ACB)	+= scx200_acb.o
diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c
new file mode 100644
index 0000000..16f90b1
--- /dev/null
+++ b/drivers/i2c/busses/i2c-opal.c
@@ -0,0 +1,294 @@
+/*
+ * IBM OPAL I2C driver
+ * Copyright (C) 2014 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/opal.h>
+
+static int i2c_opal_translate_error(int rc)
+{
+	switch (rc) {
+	case OPAL_NO_MEM:
+		return -ENOMEM;
+	case OPAL_PARAMETER:
+		return -EINVAL;
+	case OPAL_I2C_ARBT_LOST:
+		return -EAGAIN;
+	case OPAL_I2C_TIMEOUT:
+		return -ETIMEDOUT;
+	case OPAL_I2C_NACK_RCVD:
+		return -ENXIO;
+	case OPAL_I2C_STOP_ERR:
+		return -EBUSY;
+	default:
+		return -EIO;
+	}
+}
+
+static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
+{
+	struct opal_msg msg;
+	int token, rc;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("Failed to get the async token\n");
+
+		return token;
+	}
+
+	rc = opal_i2c_request(token, bus_id, req);
+	if (rc != OPAL_ASYNC_COMPLETION) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+	rc = opal_async_wait_response(token, &msg);
+	if (rc)
+		goto exit;
+
+	rc = be64_to_cpu(msg.params[1]);
+	if (rc != OPAL_SUCCESS) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+exit:
+	opal_async_release_token(token);
+	return rc;
+}
+
+static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+				int num)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	int rc, i;
+
+	/* We only support fairly simple combinations here of one
+	 * or two messages
+	 */
+	memset(&req, 0, sizeof(req));
+	switch(num) {
+	case 0:
+		return 0;
+	case 1:
+		req.type = (msgs[0].flags & I2C_M_RD) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.size = cpu_to_be32(msgs[0].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf));
+		break;
+	case 2:
+		/* For two messages, we basically support only simple
+		 * smbus transactions of a write plus a read. We might
+		 * want to allow also two writes but we'd have to bounce
+		 * the data into a single buffer.
+		 */
+		if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD))
+			return -EOPNOTSUPP;
+		if (msgs[0].len > 4)
+			return -EOPNOTSUPP;
+		if (msgs[0].addr != msgs[1].addr)
+			return -EOPNOTSUPP;
+		req.type = OPAL_I2C_SM_READ;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.subaddr_sz = msgs[0].len;
+		for (i = 0; i < msgs[0].len; i++)
+			req.subaddr = (req.subaddr << 8) | msgs[0].buf[i];
+		req.subaddr = cpu_to_be32(req.subaddr);
+		req.size = cpu_to_be32(msgs[1].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf));
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (rc)
+		return rc;
+
+	return num;
+}
+
+static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			       unsigned short flags, char read_write,
+			       u8 command, int size, union i2c_smbus_data *data)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	u8 local[2];
+	int rc;
+
+	memset(&req, 0, sizeof(req));
+
+	req.addr = cpu_to_be16(addr);
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		/* Fall through */
+	case I2C_SMBUS_QUICK:
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		if (!read_write) {
+			local[0] = data->word & 0xff;
+			local[1] = (data->word >> 8) & 0xff;
+		}
+		req.buffer_ra = cpu_to_be64(__pa(local));
+		req.size = cpu_to_be32(2);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->block[1]));
+		req.size = cpu_to_be32(data->block[0]);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) {
+		data->word = ((u16)local[1]) << 8;
+		data->word |= local[0];
+	}
+
+	return rc;
+}
+
+static u32 i2c_opal_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static const struct i2c_algorithm i2c_opal_algo = {
+	.master_xfer	= i2c_opal_master_xfer,
+	.smbus_xfer	= i2c_opal_smbus_xfer,
+	.functionality	= i2c_opal_func,
+};
+
+static int i2c_opal_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter	*adapter;
+	const char		*pname;
+	u32			opal_id;
+	int			rc;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id);
+	if (rc) {
+		dev_err(&pdev->dev, "Missing ibm,opal-id property !\n");
+		return -EIO;
+	}
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->algo = &i2c_opal_algo;
+	adapter->algo_data = (void *)(unsigned long)opal_id;
+	adapter->dev.parent = &pdev->dev;
+	adapter->dev.of_node = of_node_get(pdev->dev.of_node);
+	pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL);
+	if (pname)
+		strlcpy(adapter->name, pname, sizeof(adapter->name));
+	else
+		strlcpy(adapter->name, "opal", sizeof(adapter->name));
+
+	platform_set_drvdata(pdev, adapter);
+	rc = i2c_add_adapter(adapter);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to register the i2c adapter\n");
+
+	return rc;
+}
+
+static int i2c_opal_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(adapter);
+
+	return 0;
+}
+
+static const struct of_device_id i2c_opal_of_match[] = {
+	{
+		.compatible = "ibm,opal-i2c",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_opal_of_match);
+
+static struct platform_driver i2c_opal_driver = {
+	.probe	= i2c_opal_probe,
+	.remove	= i2c_opal_remove,
+	.driver	= {
+		.name		= "i2c-opal",
+		.of_match_table	= i2c_opal_of_match,
+	},
+};
+
+static int __init i2c_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	return platform_driver_register(&i2c_opal_driver);
+}
+module_init(i2c_opal_init);
+
+static void __exit i2c_opal_exit(void)
+{
+	return platform_driver_unregister(&i2c_opal_driver);
+}
+module_exit(i2c_opal_exit);
+
+MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM OPAL I2C driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 10641b7..dafb3c5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -22,7 +22,6 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <linux/llist.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 #include <target/target_core_base.h>
@@ -36,11 +35,17 @@
 #define	ISERT_MAX_CONN		8
 #define ISER_MAX_RX_CQ_LEN	(ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
 #define ISER_MAX_TX_CQ_LEN	(ISERT_QP_MAX_REQ_DTOS  * ISERT_MAX_CONN)
+#define ISER_MAX_CQ_LEN		(ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
+				 ISERT_MAX_CONN)
+
+int isert_debug_level = 0;
+module_param_named(debug_level, isert_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)");
 
 static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(device_list);
-static struct workqueue_struct *isert_rx_wq;
 static struct workqueue_struct *isert_comp_wq;
+static struct workqueue_struct *isert_release_wq;
 
 static void
 isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
@@ -54,19 +59,32 @@
 	       struct isert_rdma_wr *wr);
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
+static int
+isert_rdma_post_recvl(struct isert_conn *isert_conn);
+static int
+isert_rdma_accept(struct isert_conn *isert_conn);
+struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
+
+static inline bool
+isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
+{
+	return (conn->pi_support &&
+		cmd->prot_op != TARGET_PROT_NORMAL);
+}
+
 
 static void
 isert_qp_event_callback(struct ib_event *e, void *context)
 {
 	struct isert_conn *isert_conn = (struct isert_conn *)context;
 
-	pr_err("isert_qp_event_callback event: %d\n", e->event);
+	isert_err("conn %p event: %d\n", isert_conn, e->event);
 	switch (e->event) {
 	case IB_EVENT_COMM_EST:
 		rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
 		break;
 	case IB_EVENT_QP_LAST_WQE_REACHED:
-		pr_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED:\n");
+		isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
 		break;
 	default:
 		break;
@@ -80,39 +98,41 @@
 
 	ret = ib_query_device(ib_dev, devattr);
 	if (ret) {
-		pr_err("ib_query_device() failed: %d\n", ret);
+		isert_err("ib_query_device() failed: %d\n", ret);
 		return ret;
 	}
-	pr_debug("devattr->max_sge: %d\n", devattr->max_sge);
-	pr_debug("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
+	isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
+	isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
 
 	return 0;
 }
 
 static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
-		    u8 protection)
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
 {
 	struct isert_device *device = isert_conn->conn_device;
 	struct ib_qp_init_attr attr;
-	int ret, index, min_index = 0;
+	struct isert_comp *comp;
+	int ret, i, min = 0;
 
 	mutex_lock(&device_list_mutex);
-	for (index = 0; index < device->cqs_used; index++)
-		if (device->cq_active_qps[index] <
-		    device->cq_active_qps[min_index])
-			min_index = index;
-	device->cq_active_qps[min_index]++;
-	pr_debug("isert_conn_setup_qp: Using min_index: %d\n", min_index);
+	for (i = 0; i < device->comps_used; i++)
+		if (device->comps[i].active_qps <
+		    device->comps[min].active_qps)
+			min = i;
+	comp = &device->comps[min];
+	comp->active_qps++;
+	isert_info("conn %p, using comp %p min_index: %d\n",
+		   isert_conn, comp, min);
 	mutex_unlock(&device_list_mutex);
 
 	memset(&attr, 0, sizeof(struct ib_qp_init_attr));
 	attr.event_handler = isert_qp_event_callback;
 	attr.qp_context = isert_conn;
-	attr.send_cq = device->dev_tx_cq[min_index];
-	attr.recv_cq = device->dev_rx_cq[min_index];
+	attr.send_cq = comp->cq;
+	attr.recv_cq = comp->cq;
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
-	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS;
+	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	/*
 	 * FIXME: Use devattr.max_sge - 2 for max_send_sge as
 	 * work-around for RDMA_READs with ConnectX-2.
@@ -126,29 +146,29 @@
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
-	if (protection)
+	if (device->pi_capable)
 		attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
 
-	pr_debug("isert_conn_setup_qp cma_id->device: %p\n",
-		 cma_id->device);
-	pr_debug("isert_conn_setup_qp conn_pd->device: %p\n",
-		 isert_conn->conn_pd->device);
-
 	ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
 	if (ret) {
-		pr_err("rdma_create_qp failed for cma_id %d\n", ret);
-		return ret;
+		isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+		goto err;
 	}
 	isert_conn->conn_qp = cma_id->qp;
-	pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n");
 
 	return 0;
+err:
+	mutex_lock(&device_list_mutex);
+	comp->active_qps--;
+	mutex_unlock(&device_list_mutex);
+
+	return ret;
 }
 
 static void
 isert_cq_event_callback(struct ib_event *e, void *context)
 {
-	pr_debug("isert_cq_event_callback event: %d\n", e->event);
+	isert_dbg("event: %d\n", e->event);
 }
 
 static int
@@ -182,6 +202,7 @@
 	}
 
 	isert_conn->conn_rx_desc_head = 0;
+
 	return 0;
 
 dma_map_fail:
@@ -193,6 +214,8 @@
 	kfree(isert_conn->conn_rx_descs);
 	isert_conn->conn_rx_descs = NULL;
 fail:
+	isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
+
 	return -ENOMEM;
 }
 
@@ -216,27 +239,23 @@
 	isert_conn->conn_rx_descs = NULL;
 }
 
-static void isert_cq_tx_work(struct work_struct *);
-static void isert_cq_tx_callback(struct ib_cq *, void *);
-static void isert_cq_rx_work(struct work_struct *);
-static void isert_cq_rx_callback(struct ib_cq *, void *);
+static void isert_cq_work(struct work_struct *);
+static void isert_cq_callback(struct ib_cq *, void *);
 
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
 	struct ib_device *ib_dev = device->ib_device;
-	struct isert_cq_desc *cq_desc;
 	struct ib_device_attr *dev_attr;
-	int ret = 0, i, j;
-	int max_rx_cqe, max_tx_cqe;
+	int ret = 0, i;
+	int max_cqe;
 
 	dev_attr = &device->dev_attr;
 	ret = isert_query_device(ib_dev, dev_attr);
 	if (ret)
 		return ret;
 
-	max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe);
-	max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe);
+	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
 
 	/* asign function handlers */
 	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
@@ -254,55 +273,38 @@
 	device->pi_capable = dev_attr->device_cap_flags &
 			     IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
-	device->cqs_used = min_t(int, num_online_cpus(),
-				 device->ib_device->num_comp_vectors);
-	device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
-	pr_debug("Using %d CQs, device %s supports %d vectors support "
-		 "Fast registration %d pi_capable %d\n",
-		 device->cqs_used, device->ib_device->name,
-		 device->ib_device->num_comp_vectors, device->use_fastreg,
-		 device->pi_capable);
-	device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
-				device->cqs_used, GFP_KERNEL);
-	if (!device->cq_desc) {
-		pr_err("Unable to allocate device->cq_desc\n");
+	device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
+					device->ib_device->num_comp_vectors));
+	isert_info("Using %d CQs, %s supports %d vectors support "
+		   "Fast registration %d pi_capable %d\n",
+		   device->comps_used, device->ib_device->name,
+		   device->ib_device->num_comp_vectors, device->use_fastreg,
+		   device->pi_capable);
+
+	device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
+				GFP_KERNEL);
+	if (!device->comps) {
+		isert_err("Unable to allocate completion contexts\n");
 		return -ENOMEM;
 	}
-	cq_desc = device->cq_desc;
 
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc[i].device = device;
-		cq_desc[i].cq_index = i;
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work);
-		device->dev_rx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_rx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_rx_cqe, i);
-		if (IS_ERR(device->dev_rx_cq[i])) {
-			ret = PTR_ERR(device->dev_rx_cq[i]);
-			device->dev_rx_cq[i] = NULL;
+		comp->device = device;
+		INIT_WORK(&comp->work, isert_cq_work);
+		comp->cq = ib_create_cq(device->ib_device,
+					isert_cq_callback,
+					isert_cq_event_callback,
+					(void *)comp,
+					max_cqe, i);
+		if (IS_ERR(comp->cq)) {
+			ret = PTR_ERR(comp->cq);
+			comp->cq = NULL;
 			goto out_cq;
 		}
 
-		INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work);
-		device->dev_tx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_tx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_tx_cqe, i);
-		if (IS_ERR(device->dev_tx_cq[i])) {
-			ret = PTR_ERR(device->dev_tx_cq[i]);
-			device->dev_tx_cq[i] = NULL;
-			goto out_cq;
-		}
-
-		ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP);
-		if (ret)
-			goto out_cq;
-
-		ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP);
+		ret = ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
 		if (ret)
 			goto out_cq;
 	}
@@ -310,19 +312,15 @@
 	return 0;
 
 out_cq:
-	for (j = 0; j < i; j++) {
-		cq_desc = &device->cq_desc[j];
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		if (device->dev_rx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_rx_work);
-			ib_destroy_cq(device->dev_rx_cq[j]);
-		}
-		if (device->dev_tx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_tx_work);
-			ib_destroy_cq(device->dev_tx_cq[j]);
+		if (comp->cq) {
+			cancel_work_sync(&comp->work);
+			ib_destroy_cq(comp->cq);
 		}
 	}
-	kfree(device->cq_desc);
+	kfree(device->comps);
 
 	return ret;
 }
@@ -330,21 +328,18 @@
 static void
 isert_free_device_ib_res(struct isert_device *device)
 {
-	struct isert_cq_desc *cq_desc;
 	int i;
 
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc = &device->cq_desc[i];
+	isert_info("device %p\n", device);
 
-		cancel_work_sync(&cq_desc->cq_rx_work);
-		cancel_work_sync(&cq_desc->cq_tx_work);
-		ib_destroy_cq(device->dev_rx_cq[i]);
-		ib_destroy_cq(device->dev_tx_cq[i]);
-		device->dev_rx_cq[i] = NULL;
-		device->dev_tx_cq[i] = NULL;
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
+
+		cancel_work_sync(&comp->work);
+		ib_destroy_cq(comp->cq);
+		comp->cq = NULL;
 	}
-
-	kfree(device->cq_desc);
+	kfree(device->comps);
 }
 
 static void
@@ -352,6 +347,7 @@
 {
 	mutex_lock(&device_list_mutex);
 	device->refcount--;
+	isert_info("device %p refcount %d\n", device, device->refcount);
 	if (!device->refcount) {
 		isert_free_device_ib_res(device);
 		list_del(&device->dev_node);
@@ -370,6 +366,8 @@
 	list_for_each_entry(device, &device_list, dev_node) {
 		if (device->ib_device->node_guid == cma_id->device->node_guid) {
 			device->refcount++;
+			isert_info("Found iser device %p refcount %d\n",
+				   device, device->refcount);
 			mutex_unlock(&device_list_mutex);
 			return device;
 		}
@@ -393,6 +391,8 @@
 
 	device->refcount++;
 	list_add_tail(&device->dev_node, &device_list);
+	isert_info("Created a new iser device %p refcount %d\n",
+		   device, device->refcount);
 	mutex_unlock(&device_list_mutex);
 
 	return device;
@@ -407,7 +407,7 @@
 	if (list_empty(&isert_conn->conn_fr_pool))
 		return;
 
-	pr_debug("Freeing conn %p fastreg pool", isert_conn);
+	isert_info("Freeing conn %p fastreg pool", isert_conn);
 
 	list_for_each_entry_safe(fr_desc, tmp,
 				 &isert_conn->conn_fr_pool, list) {
@@ -425,87 +425,97 @@
 	}
 
 	if (i < isert_conn->conn_fr_pool_size)
-		pr_warn("Pool still has %d regions registered\n",
+		isert_warn("Pool still has %d regions registered\n",
 			isert_conn->conn_fr_pool_size - i);
 }
 
 static int
+isert_create_pi_ctx(struct fast_reg_descriptor *desc,
+		    struct ib_device *device,
+		    struct ib_pd *pd)
+{
+	struct ib_mr_init_attr mr_init_attr;
+	struct pi_context *pi_ctx;
+	int ret;
+
+	pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+	if (!pi_ctx) {
+		isert_err("Failed to allocate pi context\n");
+		return -ENOMEM;
+	}
+
+	pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
+					    ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_frpl)) {
+		isert_err("Failed to allocate prot frpl err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_frpl));
+		ret = PTR_ERR(pi_ctx->prot_frpl);
+		goto err_pi_ctx;
+	}
+
+	pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_mr)) {
+		isert_err("Failed to allocate prot frmr err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_mr));
+		ret = PTR_ERR(pi_ctx->prot_mr);
+		goto err_prot_frpl;
+	}
+	desc->ind |= ISERT_PROT_KEY_VALID;
+
+	memset(&mr_init_attr, 0, sizeof(mr_init_attr));
+	mr_init_attr.max_reg_descriptors = 2;
+	mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+	pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+	if (IS_ERR(pi_ctx->sig_mr)) {
+		isert_err("Failed to allocate signature enabled mr err=%ld\n",
+			  PTR_ERR(pi_ctx->sig_mr));
+		ret = PTR_ERR(pi_ctx->sig_mr);
+		goto err_prot_mr;
+	}
+
+	desc->pi_ctx = pi_ctx;
+	desc->ind |= ISERT_SIG_KEY_VALID;
+	desc->ind &= ~ISERT_PROTECTED;
+
+	return 0;
+
+err_prot_mr:
+	ib_dereg_mr(desc->pi_ctx->prot_mr);
+err_prot_frpl:
+	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+err_pi_ctx:
+	kfree(desc->pi_ctx);
+
+	return ret;
+}
+
+static int
 isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
-		     struct fast_reg_descriptor *fr_desc, u8 protection)
+		     struct fast_reg_descriptor *fr_desc)
 {
 	int ret;
 
 	fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
 							 ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_frpl)) {
-		pr_err("Failed to allocate data frpl err=%ld\n",
-		       PTR_ERR(fr_desc->data_frpl));
+		isert_err("Failed to allocate data frpl err=%ld\n",
+			  PTR_ERR(fr_desc->data_frpl));
 		return PTR_ERR(fr_desc->data_frpl);
 	}
 
 	fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_mr)) {
-		pr_err("Failed to allocate data frmr err=%ld\n",
-		       PTR_ERR(fr_desc->data_mr));
+		isert_err("Failed to allocate data frmr err=%ld\n",
+			  PTR_ERR(fr_desc->data_mr));
 		ret = PTR_ERR(fr_desc->data_mr);
 		goto err_data_frpl;
 	}
-	pr_debug("Create fr_desc %p page_list %p\n",
-		 fr_desc, fr_desc->data_frpl->page_list);
 	fr_desc->ind |= ISERT_DATA_KEY_VALID;
 
-	if (protection) {
-		struct ib_mr_init_attr mr_init_attr = {0};
-		struct pi_context *pi_ctx;
-
-		fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL);
-		if (!fr_desc->pi_ctx) {
-			pr_err("Failed to allocate pi context\n");
-			ret = -ENOMEM;
-			goto err_data_mr;
-		}
-		pi_ctx = fr_desc->pi_ctx;
-
-		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
-						    ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_frpl)) {
-			pr_err("Failed to allocate prot frpl err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_frpl));
-			ret = PTR_ERR(pi_ctx->prot_frpl);
-			goto err_pi_ctx;
-		}
-
-		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_mr)) {
-			pr_err("Failed to allocate prot frmr err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_mr));
-			ret = PTR_ERR(pi_ctx->prot_mr);
-			goto err_prot_frpl;
-		}
-		fr_desc->ind |= ISERT_PROT_KEY_VALID;
-
-		mr_init_attr.max_reg_descriptors = 2;
-		mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
-		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
-		if (IS_ERR(pi_ctx->sig_mr)) {
-			pr_err("Failed to allocate signature enabled mr err=%ld\n",
-			       PTR_ERR(pi_ctx->sig_mr));
-			ret = PTR_ERR(pi_ctx->sig_mr);
-			goto err_prot_mr;
-		}
-		fr_desc->ind |= ISERT_SIG_KEY_VALID;
-	}
-	fr_desc->ind &= ~ISERT_PROTECTED;
+	isert_dbg("Created fr_desc %p\n", fr_desc);
 
 	return 0;
-err_prot_mr:
-	ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
-err_prot_frpl:
-	ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
-err_pi_ctx:
-	kfree(fr_desc->pi_ctx);
-err_data_mr:
-	ib_dereg_mr(fr_desc->data_mr);
+
 err_data_frpl:
 	ib_free_fast_reg_page_list(fr_desc->data_frpl);
 
@@ -513,7 +523,7 @@
 }
 
 static int
-isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
+isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
 {
 	struct fast_reg_descriptor *fr_desc;
 	struct isert_device *device = isert_conn->conn_device;
@@ -531,16 +541,15 @@
 	for (i = 0; i < tag_num; i++) {
 		fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
 		if (!fr_desc) {
-			pr_err("Failed to allocate fast_reg descriptor\n");
+			isert_err("Failed to allocate fast_reg descriptor\n");
 			ret = -ENOMEM;
 			goto err;
 		}
 
 		ret = isert_create_fr_desc(device->ib_device,
-					   isert_conn->conn_pd, fr_desc,
-					   pi_support);
+					   isert_conn->conn_pd, fr_desc);
 		if (ret) {
-			pr_err("Failed to create fastreg descriptor err=%d\n",
+			isert_err("Failed to create fastreg descriptor err=%d\n",
 			       ret);
 			kfree(fr_desc);
 			goto err;
@@ -550,7 +559,7 @@
 		isert_conn->conn_fr_pool_size++;
 	}
 
-	pr_debug("Creating conn %p fastreg pool size=%d",
+	isert_dbg("Creating conn %p fastreg pool size=%d",
 		 isert_conn, isert_conn->conn_fr_pool_size);
 
 	return 0;
@@ -563,47 +572,45 @@
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-	struct iscsi_np *np = cma_id->context;
-	struct isert_np *isert_np = np->np_context;
+	struct isert_np *isert_np = cma_id->context;
+	struct iscsi_np *np = isert_np->np;
 	struct isert_conn *isert_conn;
 	struct isert_device *device;
 	struct ib_device *ib_dev = cma_id->device;
 	int ret = 0;
-	u8 pi_support;
 
 	spin_lock_bh(&np->np_thread_lock);
 	if (!np->enabled) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("iscsi_np is not enabled, reject connect request\n");
+		isert_dbg("iscsi_np is not enabled, reject connect request\n");
 		return rdma_reject(cma_id, NULL, 0);
 	}
 	spin_unlock_bh(&np->np_thread_lock);
 
-	pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n",
+	isert_dbg("cma_id: %p, portal: %p\n",
 		 cma_id, cma_id->context);
 
 	isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
 	if (!isert_conn) {
-		pr_err("Unable to allocate isert_conn\n");
+		isert_err("Unable to allocate isert_conn\n");
 		return -ENOMEM;
 	}
 	isert_conn->state = ISER_CONN_INIT;
 	INIT_LIST_HEAD(&isert_conn->conn_accept_node);
 	init_completion(&isert_conn->conn_login_comp);
+	init_completion(&isert_conn->login_req_comp);
 	init_completion(&isert_conn->conn_wait);
-	init_completion(&isert_conn->conn_wait_comp_err);
 	kref_init(&isert_conn->conn_kref);
 	mutex_init(&isert_conn->conn_mutex);
 	spin_lock_init(&isert_conn->conn_lock);
 	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
 
-	cma_id->context = isert_conn;
 	isert_conn->conn_cm_id = cma_id;
 
 	isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
 					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
 	if (!isert_conn->login_buf) {
-		pr_err("Unable to allocate isert_conn->login_buf\n");
+		isert_err("Unable to allocate isert_conn->login_buf\n");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -611,7 +618,7 @@
 	isert_conn->login_req_buf = isert_conn->login_buf;
 	isert_conn->login_rsp_buf = isert_conn->login_buf +
 				    ISCSI_DEF_MAX_RECV_SEG_LEN;
-	pr_debug("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
+	isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
 		 isert_conn->login_buf, isert_conn->login_req_buf,
 		 isert_conn->login_rsp_buf);
 
@@ -621,7 +628,7 @@
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
 		       ret);
 		isert_conn->login_req_dma = 0;
 		goto out_login_buf;
@@ -633,7 +640,7 @@
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
 		       ret);
 		isert_conn->login_rsp_dma = 0;
 		goto out_req_dma_map;
@@ -649,13 +656,13 @@
 	isert_conn->initiator_depth = min_t(u8,
 				event->param.conn.initiator_depth,
 				device->dev_attr.max_qp_init_rd_atom);
-	pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+	isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
 
 	isert_conn->conn_device = device;
 	isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
 	if (IS_ERR(isert_conn->conn_pd)) {
 		ret = PTR_ERR(isert_conn->conn_pd);
-		pr_err("ib_alloc_pd failed for conn %p: ret=%d\n",
+		isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_pd;
 	}
@@ -664,20 +671,20 @@
 					   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(isert_conn->conn_mr)) {
 		ret = PTR_ERR(isert_conn->conn_mr);
-		pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
+		isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_mr;
 	}
 
-	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
-	if (pi_support && !device->pi_capable) {
-		pr_err("Protection information requested but not supported, "
-		       "rejecting connect request\n");
-		ret = rdma_reject(cma_id, NULL, 0);
-		goto out_mr;
-	}
+	ret = isert_conn_setup_qp(isert_conn, cma_id);
+	if (ret)
+		goto out_conn_dev;
 
-	ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support);
+	ret = isert_rdma_post_recvl(isert_conn);
+	if (ret)
+		goto out_conn_dev;
+
+	ret = isert_rdma_accept(isert_conn);
 	if (ret)
 		goto out_conn_dev;
 
@@ -685,7 +692,7 @@
 	list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
 	mutex_unlock(&isert_np->np_accept_mutex);
 
-	pr_debug("isert_connect_request() up np_sem np: %p\n", np);
+	isert_info("np %p: Allow accept_np to continue\n", np);
 	up(&isert_np->np_sem);
 	return 0;
 
@@ -705,6 +712,7 @@
 	kfree(isert_conn->login_buf);
 out:
 	kfree(isert_conn);
+	rdma_reject(cma_id, NULL, 0);
 	return ret;
 }
 
@@ -713,25 +721,26 @@
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_device *device = isert_conn->conn_device;
-	int cq_index;
 
-	pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p\n", isert_conn);
 
 	if (device && device->use_fastreg)
 		isert_conn_free_fastreg_pool(isert_conn);
 
-	if (isert_conn->conn_qp) {
-		cq_index = ((struct isert_cq_desc *)
-			isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
-		pr_debug("isert_connect_release: cq_index: %d\n", cq_index);
-		isert_conn->conn_device->cq_active_qps[cq_index]--;
-
-		rdma_destroy_qp(isert_conn->conn_cm_id);
-	}
-
 	isert_free_rx_descriptors(isert_conn);
 	rdma_destroy_id(isert_conn->conn_cm_id);
 
+	if (isert_conn->conn_qp) {
+		struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
+
+		isert_dbg("dec completion context %p active_qps\n", comp);
+		mutex_lock(&device_list_mutex);
+		comp->active_qps--;
+		mutex_unlock(&device_list_mutex);
+
+		ib_destroy_qp(isert_conn->conn_qp);
+	}
+
 	ib_dereg_mr(isert_conn->conn_mr);
 	ib_dealloc_pd(isert_conn->conn_pd);
 
@@ -747,16 +756,24 @@
 
 	if (device)
 		isert_device_try_release(device);
-
-	pr_debug("Leaving isert_connect_release >>>>>>>>>>>>\n");
 }
 
 static void
 isert_connected_handler(struct rdma_cm_id *cma_id)
 {
-	struct isert_conn *isert_conn = cma_id->context;
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
-	kref_get(&isert_conn->conn_kref);
+	isert_info("conn %p\n", isert_conn);
+
+	if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+		isert_warn("conn %p connect_release is running\n", isert_conn);
+		return;
+	}
+
+	mutex_lock(&isert_conn->conn_mutex);
+	if (isert_conn->state != ISER_CONN_FULL_FEATURE)
+		isert_conn->state = ISER_CONN_UP;
+	mutex_unlock(&isert_conn->conn_mutex);
 }
 
 static void
@@ -765,8 +782,8 @@
 	struct isert_conn *isert_conn = container_of(kref,
 				struct isert_conn, conn_kref);
 
-	pr_debug("Calling isert_connect_release for final kref %s/%d\n",
-		 current->comm, current->pid);
+	isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
+		   current->pid);
 
 	isert_connect_release(isert_conn);
 }
@@ -777,75 +794,111 @@
 	kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
 }
 
+/**
+ * isert_conn_terminate() - Initiate connection termination
+ * @isert_conn: isert connection struct
+ *
+ * Notes:
+ * In case the connection state is FULL_FEATURE, move state
+ * to TEMINATING and start teardown sequence (rdma_disconnect).
+ * In case the connection state is UP, complete flush as well.
+ *
+ * This routine must be called with conn_mutex held. Thus it is
+ * safe to call multiple times.
+ */
 static void
-isert_disconnect_work(struct work_struct *work)
+isert_conn_terminate(struct isert_conn *isert_conn)
 {
-	struct isert_conn *isert_conn = container_of(work,
-				struct isert_conn, conn_logout_work);
+	int err;
 
-	pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
-	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->state == ISER_CONN_UP)
+	switch (isert_conn->state) {
+	case ISER_CONN_TERMINATING:
+		break;
+	case ISER_CONN_UP:
+	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
+		isert_info("Terminating conn %p state %d\n",
+			   isert_conn, isert_conn->state);
 		isert_conn->state = ISER_CONN_TERMINATING;
-
-	if (isert_conn->post_recv_buf_count == 0 &&
-	    atomic_read(&isert_conn->post_send_buf_count) == 0) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		goto wake_up;
+		err = rdma_disconnect(isert_conn->conn_cm_id);
+		if (err)
+			isert_warn("Failed rdma_disconnect isert_conn %p\n",
+				   isert_conn);
+		break;
+	default:
+		isert_warn("conn %p teminating in state %d\n",
+			   isert_conn, isert_conn->state);
 	}
-	if (!isert_conn->conn_cm_id) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		isert_put_conn(isert_conn);
-		return;
-	}
-
-	if (isert_conn->disconnect) {
-		/* Send DREQ/DREP towards our initiator */
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
-
-	mutex_unlock(&isert_conn->conn_mutex);
-
-wake_up:
-	complete(&isert_conn->conn_wait);
 }
 
 static int
-isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
+isert_np_cma_handler(struct isert_np *isert_np,
+		     enum rdma_cm_event_type event)
 {
-	struct isert_conn *isert_conn;
+	isert_dbg("isert np %p, handling event %d\n", isert_np, event);
 
-	if (!cma_id->qp) {
-		struct isert_np *isert_np = cma_id->context;
-
+	switch (event) {
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		isert_np->np_cm_id = NULL;
-		return -1;
+		break;
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+		isert_np->np_cm_id = isert_setup_id(isert_np);
+		if (IS_ERR(isert_np->np_cm_id)) {
+			isert_err("isert np %p setup id failed: %ld\n",
+				  isert_np, PTR_ERR(isert_np->np_cm_id));
+			isert_np->np_cm_id = NULL;
+		}
+		break;
+	default:
+		isert_err("isert np %p Unexpected event %d\n",
+			  isert_np, event);
 	}
 
-	isert_conn = (struct isert_conn *)cma_id->context;
+	return -1;
+}
 
-	isert_conn->disconnect = disconnect;
-	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
-	schedule_work(&isert_conn->conn_logout_work);
+static int
+isert_disconnected_handler(struct rdma_cm_id *cma_id,
+			   enum rdma_cm_event_type event)
+{
+	struct isert_np *isert_np = cma_id->context;
+	struct isert_conn *isert_conn;
+
+	if (isert_np->np_cm_id == cma_id)
+		return isert_np_cma_handler(cma_id->context, event);
+
+	isert_conn = cma_id->qp->qp_context;
+
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn_terminate(isert_conn);
+	mutex_unlock(&isert_conn->conn_mutex);
+
+	isert_info("conn %p completing conn_wait\n", isert_conn);
+	complete(&isert_conn->conn_wait);
 
 	return 0;
 }
 
+static void
+isert_connect_error(struct rdma_cm_id *cma_id)
+{
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
+
+	isert_put_conn(isert_conn);
+}
+
 static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	int ret = 0;
-	bool disconnect = false;
 
-	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
-		 event->event, event->status, cma_id->context, cma_id);
+	isert_info("event %d status %d id %p np %p\n", event->event,
+		   event->status, cma_id, cma_id->context);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		ret = isert_connect_request(cma_id, event);
 		if (ret)
-			pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n",
-				event->event, ret);
+			isert_err("failed handle connect request %d\n", ret);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		isert_connected_handler(cma_id);
@@ -853,13 +906,16 @@
 	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
 	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
-		disconnect = true;
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
-		ret = isert_disconnected_handler(cma_id, disconnect);
+		ret = isert_disconnected_handler(cma_id, event->event);
 		break;
+	case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
+	case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_CONNECT_ERROR:
+		isert_connect_error(cma_id);
+		break;
 	default:
-		pr_err("Unhandled RDMA CMA event: %d\n", event->event);
+		isert_err("Unhandled RDMA CMA event: %d\n", event->event);
 		break;
 	}
 
@@ -876,7 +932,7 @@
 
 	for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc		= &isert_conn->conn_rx_descs[rx_head];
-		rx_wr->wr_id	= (unsigned long)rx_desc;
+		rx_wr->wr_id	= (uintptr_t)rx_desc;
 		rx_wr->sg_list	= &rx_desc->rx_sg;
 		rx_wr->num_sge	= 1;
 		rx_wr->next	= rx_wr + 1;
@@ -890,10 +946,10 @@
 	ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
 				&rx_wr_failed);
 	if (ret) {
-		pr_err("ib_post_recv() failed with ret: %d\n", ret);
+		isert_err("ib_post_recv() failed with ret: %d\n", ret);
 		isert_conn->post_recv_buf_count -= count;
 	} else {
-		pr_debug("isert_post_recv(): Posted %d RX buffers\n", count);
+		isert_dbg("isert_post_recv(): Posted %d RX buffers\n", count);
 		isert_conn->conn_rx_desc_head = rx_head;
 	}
 	return ret;
@@ -910,19 +966,15 @@
 				      ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
 	send_wr.next	= NULL;
-	send_wr.wr_id	= (unsigned long)tx_desc;
+	send_wr.wr_id	= (uintptr_t)tx_desc;
 	send_wr.sg_list	= tx_desc->tx_sg;
 	send_wr.num_sge	= tx_desc->num_sge;
 	send_wr.opcode	= IB_WR_SEND;
 	send_wr.send_flags = IB_SEND_SIGNALED;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
-	if (ret) {
-		pr_err("ib_post_send() failed, ret: %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
-	}
+	if (ret)
+		isert_err("ib_post_send() failed, ret: %d\n", ret);
 
 	return ret;
 }
@@ -945,7 +997,7 @@
 
 	if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
 		tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
-		pr_debug("tx_desc %p lkey mismatch, fixing\n", tx_desc);
+		isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
 	}
 }
 
@@ -959,7 +1011,7 @@
 	dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
 			ISER_HEADERS_LEN, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(ib_dev, dma_addr)) {
-		pr_err("ib_dma_mapping_error() failed\n");
+		isert_err("ib_dma_mapping_error() failed\n");
 		return -ENOMEM;
 	}
 
@@ -968,40 +1020,24 @@
 	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
 	tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("isert_init_tx_hdrs: Setup tx_sg[0].addr: 0x%llx length: %u"
-		 " lkey: 0x%08x\n", tx_desc->tx_sg[0].addr,
-		 tx_desc->tx_sg[0].length, tx_desc->tx_sg[0].lkey);
+	isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
+		  tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
+		  tx_desc->tx_sg[0].lkey);
 
 	return 0;
 }
 
 static void
 isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-		   struct ib_send_wr *send_wr, bool coalesce)
+		   struct ib_send_wr *send_wr)
 {
 	struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
 
 	isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	send_wr->opcode = IB_WR_SEND;
 	send_wr->sg_list = &tx_desc->tx_sg[0];
 	send_wr->num_sge = isert_cmd->tx_desc.num_sge;
-	/*
-	 * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED
-	 * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls.
-	 */
-	mutex_lock(&isert_conn->conn_mutex);
-	if (coalesce && isert_conn->state == ISER_CONN_UP &&
-	    ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) {
-		tx_desc->llnode_active = true;
-		llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist);
-		mutex_unlock(&isert_conn->conn_mutex);
-		return;
-	}
-	isert_conn->conn_comp_batch = 0;
-	tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist);
-	mutex_unlock(&isert_conn->conn_mutex);
-
 	send_wr->send_flags = IB_SEND_SIGNALED;
 }
 
@@ -1017,22 +1053,21 @@
 	sge.length = ISER_RX_LOGIN_SIZE;
 	sge.lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("Setup sge: addr: %llx length: %d 0x%08x\n",
+	isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
 		sge.addr, sge.length, sge.lkey);
 
 	memset(&rx_wr, 0, sizeof(struct ib_recv_wr));
-	rx_wr.wr_id = (unsigned long)isert_conn->login_req_buf;
+	rx_wr.wr_id = (uintptr_t)isert_conn->login_req_buf;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
 	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
 	if (ret) {
-		pr_err("ib_post_recv() failed: %d\n", ret);
+		isert_err("ib_post_recv() failed: %d\n", ret);
 		isert_conn->post_recv_buf_count--;
 	}
 
-	pr_debug("ib_post_recv(): returned success >>>>>>>>>>>>>>>>>>>>>>>>\n");
 	return ret;
 }
 
@@ -1072,13 +1107,9 @@
 		if (login->login_complete) {
 			if (!conn->sess->sess_ops->SessionType &&
 			    isert_conn->conn_device->use_fastreg) {
-				/* Normal Session and fastreg is used */
-				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
-
-				ret = isert_conn_create_fastreg_pool(isert_conn,
-								     pi_support);
+				ret = isert_conn_create_fastreg_pool(isert_conn);
 				if (ret) {
-					pr_err("Conn: %p failed to create"
+					isert_err("Conn: %p failed to create"
 					       " fastreg pool\n", isert_conn);
 					return ret;
 				}
@@ -1092,7 +1123,10 @@
 			if (ret)
 				return ret;
 
-			isert_conn->state = ISER_CONN_UP;
+			/* Now we are in FULL_FEATURE phase */
+			mutex_lock(&isert_conn->conn_mutex);
+			isert_conn->state = ISER_CONN_FULL_FEATURE;
+			mutex_unlock(&isert_conn->conn_mutex);
 			goto post_send;
 		}
 
@@ -1109,18 +1143,17 @@
 }
 
 static void
-isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
-		   struct isert_conn *isert_conn)
+isert_rx_login_req(struct isert_conn *isert_conn)
 {
+	struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf;
+	int rx_buflen = isert_conn->login_req_len;
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct iscsi_login *login = conn->conn_login;
 	int size;
 
-	if (!login) {
-		pr_err("conn->conn_login is NULL\n");
-		dump_stack();
-		return;
-	}
+	isert_info("conn %p\n", isert_conn);
+
+	WARN_ON_ONCE(!login);
 
 	if (login->first_request) {
 		struct iscsi_login_req *login_req =
@@ -1146,8 +1179,9 @@
 	memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN);
 
 	size = min(rx_buflen, MAX_KEY_VALUE_PAIRS);
-	pr_debug("Using login payload size: %d, rx_buflen: %d MAX_KEY_VALUE_PAIRS: %d\n",
-		 size, rx_buflen, MAX_KEY_VALUE_PAIRS);
+	isert_dbg("Using login payload size: %d, rx_buflen: %d "
+		  "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen,
+		  MAX_KEY_VALUE_PAIRS);
 	memcpy(login->req_buf, &rx_desc->data[0], size);
 
 	if (login->first_request) {
@@ -1166,7 +1200,7 @@
 
 	cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);
 	if (!cmd) {
-		pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
+		isert_err("Unable to allocate iscsi_cmd + isert_cmd\n");
 		return NULL;
 	}
 	isert_cmd = iscsit_priv_cmd(cmd);
@@ -1209,8 +1243,8 @@
 	sg = &cmd->se_cmd.t_data_sg[0];
 	sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
 
-	pr_debug("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
-		 sg, sg_nents, &rx_desc->data[0], imm_data_len);
+	isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
+		  sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
 	sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
@@ -1254,13 +1288,15 @@
 	 * FIXME: Unexpected unsolicited_data out
 	 */
 	if (!cmd->unsolicited_data) {
-		pr_err("Received unexpected solicited data payload\n");
+		isert_err("Received unexpected solicited data payload\n");
 		dump_stack();
 		return -1;
 	}
 
-	pr_debug("Unsolicited DataOut unsol_data_len: %u, write_data_done: %u, data_length: %u\n",
-		 unsol_data_len, cmd->write_data_done, cmd->se_cmd.data_length);
+	isert_dbg("Unsolicited DataOut unsol_data_len: %u, "
+		  "write_data_done: %u, data_length: %u\n",
+		  unsol_data_len,  cmd->write_data_done,
+		  cmd->se_cmd.data_length);
 
 	sg_off = cmd->write_data_done / PAGE_SIZE;
 	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
@@ -1270,12 +1306,13 @@
 	 * FIXME: Non page-aligned unsolicited_data out
 	 */
 	if (page_off) {
-		pr_err("Received unexpected non-page aligned data payload\n");
+		isert_err("unexpected non-page aligned data payload\n");
 		dump_stack();
 		return -1;
 	}
-	pr_debug("Copying DataOut: sg_start: %p, sg_off: %u sg_nents: %u from %p %u\n",
-		 sg_start, sg_off, sg_nents, &rx_desc->data[0], unsol_data_len);
+	isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u "
+		  "sg_nents: %u from %p %u\n", sg_start, sg_off,
+		  sg_nents, &rx_desc->data[0], unsol_data_len);
 
 	sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0],
 			    unsol_data_len);
@@ -1322,8 +1359,8 @@
 
 	text_in = kzalloc(payload_length, GFP_KERNEL);
 	if (!text_in) {
-		pr_err("Unable to allocate text_in of payload_length: %u\n",
-		       payload_length);
+		isert_err("Unable to allocate text_in of payload_length: %u\n",
+			  payload_length);
 		return -ENOMEM;
 	}
 	cmd->text_in_ptr = text_in;
@@ -1348,8 +1385,8 @@
 
 	if (sess->sess_ops->SessionType &&
 	   (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
-		pr_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
-		       " ignoring\n", opcode);
+		isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
+			  " ignoring\n", opcode);
 		return 0;
 	}
 
@@ -1395,10 +1432,6 @@
 			break;
 
 		ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr);
-		if (ret > 0)
-			wait_for_completion_timeout(&conn->conn_logout_comp,
-						    SECONDS_FOR_LOGOUT_COMP *
-						    HZ);
 		break;
 	case ISCSI_OP_TEXT:
 		cmd = isert_allocate_cmd(conn);
@@ -1410,7 +1443,7 @@
 					    rx_desc, (struct iscsi_text *)hdr);
 		break;
 	default:
-		pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
+		isert_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
 		dump_stack();
 		break;
 	}
@@ -1431,23 +1464,23 @@
 		if (iser_hdr->flags & ISER_RSV) {
 			read_stag = be32_to_cpu(iser_hdr->read_stag);
 			read_va = be64_to_cpu(iser_hdr->read_va);
-			pr_debug("ISER_RSV: read_stag: 0x%08x read_va: 0x%16llx\n",
-				 read_stag, (unsigned long long)read_va);
+			isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
+				  read_stag, (unsigned long long)read_va);
 		}
 		if (iser_hdr->flags & ISER_WSV) {
 			write_stag = be32_to_cpu(iser_hdr->write_stag);
 			write_va = be64_to_cpu(iser_hdr->write_va);
-			pr_debug("ISER_WSV: write__stag: 0x%08x write_va: 0x%16llx\n",
-				 write_stag, (unsigned long long)write_va);
+			isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
+				  write_stag, (unsigned long long)write_va);
 		}
 
-		pr_debug("ISER ISCSI_CTRL PDU\n");
+		isert_dbg("ISER ISCSI_CTRL PDU\n");
 		break;
 	case ISER_HELLO:
-		pr_err("iSER Hello message\n");
+		isert_err("iSER Hello message\n");
 		break;
 	default:
-		pr_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+		isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
 		break;
 	}
 
@@ -1457,7 +1490,7 @@
 
 static void
 isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
-		    unsigned long xfer_len)
+		    u32 xfer_len)
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_hdr *hdr;
@@ -1467,34 +1500,43 @@
 	if ((char *)desc == isert_conn->login_req_buf) {
 		rx_dma = isert_conn->login_req_dma;
 		rx_buflen = ISER_RX_LOGIN_SIZE;
-		pr_debug("ISER login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	} else {
 		rx_dma = desc->dma_addr;
 		rx_buflen = ISER_RX_PAYLOAD_SIZE;
-		pr_debug("ISER req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	}
 
 	ib_dma_sync_single_for_cpu(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE);
 
 	hdr = &desc->iscsi_header;
-	pr_debug("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
+	isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
 		 hdr->opcode, hdr->itt, hdr->flags,
 		 (int)(xfer_len - ISER_HEADERS_LEN));
 
-	if ((char *)desc == isert_conn->login_req_buf)
-		isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN,
-				   isert_conn);
-	else
+	if ((char *)desc == isert_conn->login_req_buf) {
+		isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN;
+		if (isert_conn->conn) {
+			struct iscsi_login *login = isert_conn->conn->conn_login;
+
+			if (login && !login->first_request)
+				isert_rx_login_req(isert_conn);
+		}
+		mutex_lock(&isert_conn->conn_mutex);
+		complete(&isert_conn->login_req_comp);
+		mutex_unlock(&isert_conn->conn_mutex);
+	} else {
 		isert_rx_do_work(desc, isert_conn);
+	}
 
 	ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen,
 				      DMA_FROM_DEVICE);
 
 	isert_conn->post_recv_buf_count--;
-	pr_debug("iSERT: Decremented post_recv_buf_count: %d\n",
-		 isert_conn->post_recv_buf_count);
+	isert_dbg("Decremented post_recv_buf_count: %d\n",
+		  isert_conn->post_recv_buf_count);
 
 	if ((char *)desc == isert_conn->login_req_buf)
 		return;
@@ -1505,7 +1547,7 @@
 				ISERT_MIN_POSTED_RX);
 		err = isert_post_recv(isert_conn, count);
 		if (err) {
-			pr_err("isert_post_recv() count: %d failed, %d\n",
+			isert_err("isert_post_recv() count: %d failed, %d\n",
 			       count, err);
 		}
 	}
@@ -1534,12 +1576,12 @@
 	data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
 					data->dma_dir);
 	if (unlikely(!data->dma_nents)) {
-		pr_err("Cmd: unable to dma map SGs %p\n", sg);
+		isert_err("Cmd: unable to dma map SGs %p\n", sg);
 		return -EINVAL;
 	}
 
-	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
-		 isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
+	isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+		  isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
 
 	return 0;
 }
@@ -1560,21 +1602,21 @@
 {
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
-	pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->data.sg) {
-		pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
 	if (wr->send_wr) {
-		pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd);
+		isert_dbg("Cmd %p free send_wr\n", isert_cmd);
 		kfree(wr->send_wr);
 		wr->send_wr = NULL;
 	}
 
 	if (wr->ib_sge) {
-		pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd);
+		isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
 		kfree(wr->ib_sge);
 		wr->ib_sge = NULL;
 	}
@@ -1586,11 +1628,10 @@
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 	LIST_HEAD(unmap_list);
 
-	pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->fr_desc) {
-		pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n",
-			 isert_cmd, wr->fr_desc);
+		isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, wr->fr_desc);
 		if (wr->fr_desc->ind & ISERT_PROTECTED) {
 			isert_unmap_data_buf(isert_conn, &wr->prot);
 			wr->fr_desc->ind &= ~ISERT_PROTECTED;
@@ -1602,7 +1643,7 @@
 	}
 
 	if (wr->data.sg) {
-		pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
@@ -1618,7 +1659,7 @@
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct isert_device *device = isert_conn->conn_device;
 
-	pr_debug("Entering isert_put_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
@@ -1668,7 +1709,7 @@
 		 * associated cmd->se_cmd needs to be released.
 		 */
 		if (cmd->se_cmd.se_tfo != NULL) {
-			pr_debug("Calling transport_generic_free_cmd from"
+			isert_dbg("Calling transport_generic_free_cmd from"
 				 " isert_put_cmd for 0x%02x\n",
 				 cmd->iscsi_opcode);
 			transport_generic_free_cmd(&cmd->se_cmd, 0);
@@ -1687,7 +1728,7 @@
 isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev)
 {
 	if (tx_desc->dma_addr != 0) {
-		pr_debug("Calling ib_dma_unmap_single for tx_desc->dma_addr\n");
+		isert_dbg("unmap single for tx_desc->dma_addr\n");
 		ib_dma_unmap_single(ib_dev, tx_desc->dma_addr,
 				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
 		tx_desc->dma_addr = 0;
@@ -1699,7 +1740,7 @@
 		     struct ib_device *ib_dev, bool comp_err)
 {
 	if (isert_cmd->pdu_buf_dma != 0) {
-		pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
+		isert_dbg("unmap single for isert_cmd->pdu_buf_dma\n");
 		ib_dma_unmap_single(ib_dev, isert_cmd->pdu_buf_dma,
 				    isert_cmd->pdu_buf_len, DMA_TO_DEVICE);
 		isert_cmd->pdu_buf_dma = 0;
@@ -1717,7 +1758,7 @@
 
 	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
 	if (ret) {
-		pr_err("ib_check_mr_status failed, ret %d\n", ret);
+		isert_err("ib_check_mr_status failed, ret %d\n", ret);
 		goto fail_mr_status;
 	}
 
@@ -1740,12 +1781,12 @@
 		do_div(sec_offset_err, block_size);
 		se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba;
 
-		pr_err("isert: PI error found type %d at sector 0x%llx "
-		       "expected 0x%x vs actual 0x%x\n",
-		       mr_status.sig_err.err_type,
-		       (unsigned long long)se_cmd->bad_sector,
-		       mr_status.sig_err.expected,
-		       mr_status.sig_err.actual);
+		isert_err("PI error found type %d at sector 0x%llx "
+			  "expected 0x%x vs actual 0x%x\n",
+			  mr_status.sig_err.err_type,
+			  (unsigned long long)se_cmd->bad_sector,
+			  mr_status.sig_err.expected,
+			  mr_status.sig_err.actual);
 		ret = 1;
 	}
 
@@ -1801,7 +1842,7 @@
 	cmd->write_data_done = wr->data.len;
 	wr->send_wr_num = 0;
 
-	pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
+	isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
 	spin_lock_bh(&cmd->istate_lock);
 	cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
 	cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
@@ -1823,36 +1864,22 @@
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
+	isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
+
 	switch (cmd->i_state) {
 	case ISTATE_SEND_TASKMGTRSP:
-		pr_debug("Calling iscsit_tmr_post_handler >>>>>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_tmr_post_handler(cmd, cmd->conn);
-
+	case ISTATE_SEND_REJECT:   /* FALLTHRU */
+	case ISTATE_SEND_TEXTRSP:  /* FALLTHRU */
 		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
-	case ISTATE_SEND_REJECT:
-		pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
-		atomic_dec(&isert_conn->post_send_buf_count);
-
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+		isert_completion_put(&isert_cmd->tx_desc, isert_cmd,
+				     ib_dev, false);
 		break;
 	case ISTATE_SEND_LOGOUTRSP:
-		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_logout_post_handler(cmd, cmd->conn);
 		break;
-	case ISTATE_SEND_TEXTRSP:
-		atomic_dec(&isert_conn->post_send_buf_count);
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
 	default:
-		pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
+		isert_err("Unknown i_state %d\n", cmd->i_state);
 		dump_stack();
 		break;
 	}
@@ -1865,7 +1892,6 @@
 			  struct ib_device *ib_dev)
 {
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
 	if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
 	    cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1878,267 +1904,151 @@
 		return;
 	}
 
-	/**
-	 * If send_wr_num is 0 this means that we got
-	 * RDMA completion and we cleared it and we should
-	 * simply decrement the response post. else the
-	 * response is incorporated in send_wr_num, just
-	 * sub it.
-	 **/
-	if (wr->send_wr_num)
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	else
-		atomic_dec(&isert_conn->post_send_buf_count);
-
 	cmd->i_state = ISTATE_SENT_STATUS;
 	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
 }
 
 static void
-__isert_send_completion(struct iser_tx_desc *tx_desc,
-		        struct isert_conn *isert_conn)
-{
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct isert_rdma_wr *wr;
-
-	if (!isert_cmd) {
-		atomic_dec(&isert_conn->post_send_buf_count);
-		isert_unmap_tx_desc(tx_desc, ib_dev);
-		return;
-	}
-	wr = &isert_cmd->rdma_wr;
-
-	switch (wr->iser_ib_op) {
-	case ISER_IB_RECV:
-		pr_err("isert_send_completion: Got ISER_IB_RECV\n");
-		dump_stack();
-		break;
-	case ISER_IB_SEND:
-		pr_debug("isert_send_completion: Got ISER_IB_SEND\n");
-		isert_response_completion(tx_desc, isert_cmd,
-					  isert_conn, ib_dev);
-		break;
-	case ISER_IB_RDMA_WRITE:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-		isert_completion_rdma_write(tx_desc, isert_cmd);
-		break;
-	case ISER_IB_RDMA_READ:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n");
-
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-		isert_completion_rdma_read(tx_desc, isert_cmd);
-		break;
-	default:
-		pr_err("Unknown wr->iser_ib_op: 0x%02x\n", wr->iser_ib_op);
-		dump_stack();
-		break;
-	}
-}
-
-static void
 isert_send_completion(struct iser_tx_desc *tx_desc,
 		      struct isert_conn *isert_conn)
 {
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
-	struct iser_tx_desc *t;
-	/*
-	 * Drain coalesced completion llist starting from comp_llnode_batch
-	 * setup in isert_init_send_wr(), and then complete trailing tx_desc.
-	 */
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		__isert_send_completion(t, isert_conn);
-	}
-	__isert_send_completion(tx_desc, isert_conn);
-}
-
-static void
-isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev)
-{
-	struct llist_node *llnode;
-	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
-
-	mutex_lock(&isert_conn->conn_mutex);
-	llnode = llist_del_all(&isert_conn->conn_comp_llist);
-	isert_conn->conn_comp_batch = 0;
-	mutex_unlock(&isert_conn->conn_mutex);
-
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
-
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
-		else
-			atomic_dec(&isert_conn->post_send_buf_count);
-
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
-}
-
-static void
-isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
-{
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
 	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
 
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
-
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
-		else
-			atomic_dec(&isert_conn->post_send_buf_count);
-
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
-	tx_desc->comp_llnode_batch = NULL;
-
-	if (!isert_cmd)
+	if (!isert_cmd) {
 		isert_unmap_tx_desc(tx_desc, ib_dev);
-	else
-		isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
-}
-
-static void
-isert_cq_rx_comp_err(struct isert_conn *isert_conn)
-{
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct iscsi_conn *conn = isert_conn->conn;
-
-	if (isert_conn->post_recv_buf_count)
 		return;
-
-	isert_cq_drain_comp_llist(isert_conn, ib_dev);
-
-	if (conn->sess) {
-		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
-		target_wait_for_sess_cmds(conn->sess->se_sess);
 	}
+	wr = &isert_cmd->rdma_wr;
 
-	while (atomic_read(&isert_conn->post_send_buf_count))
-		msleep(3000);
+	isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
 
-	mutex_lock(&isert_conn->conn_mutex);
-	isert_conn->state = ISER_CONN_DOWN;
-	mutex_unlock(&isert_conn->conn_mutex);
+	switch (wr->iser_ib_op) {
+	case ISER_IB_RECV:
+		isert_err("Got ISER_IB_RECV\n");
+		dump_stack();
+		break;
+	case ISER_IB_SEND:
+		isert_response_completion(tx_desc, isert_cmd,
+					  isert_conn, ib_dev);
+		break;
+	case ISER_IB_RDMA_WRITE:
+		isert_completion_rdma_write(tx_desc, isert_cmd);
+		break;
+	case ISER_IB_RDMA_READ:
+		isert_completion_rdma_read(tx_desc, isert_cmd);
+		break;
+	default:
+		isert_err("Unknown wr->iser_ib_op: 0x%x\n", wr->iser_ib_op);
+		dump_stack();
+		break;
+	}
+}
 
-	iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+/**
+ * is_isert_tx_desc() - Indicate if the completion wr_id
+ *     is a TX descriptor or not.
+ * @isert_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
+{
+	void *start = isert_conn->conn_rx_descs;
+	int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
 
-	complete(&isert_conn->conn_wait_comp_err);
+	if (wr_id >= start && wr_id < start + len)
+		return false;
+
+	return true;
 }
 
 static void
-isert_cq_tx_work(struct work_struct *work)
+isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 {
-	struct isert_cq_desc *cq_desc = container_of(work,
-				struct isert_cq_desc, cq_tx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *tx_cq = device->dev_tx_cq[cq_index];
+	if (wc->wr_id == ISER_BEACON_WRID) {
+		isert_info("conn %p completing conn_wait_comp_err\n",
+			   isert_conn);
+		complete(&isert_conn->conn_wait_comp_err);
+	} else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
+		struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+		struct isert_cmd *isert_cmd;
+		struct iser_tx_desc *desc;
+
+		desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+		isert_cmd = desc->isert_cmd;
+		if (!isert_cmd)
+			isert_unmap_tx_desc(desc, ib_dev);
+		else
+			isert_completion_put(desc, isert_cmd, ib_dev, true);
+	} else {
+		isert_conn->post_recv_buf_count--;
+		if (!isert_conn->post_recv_buf_count)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+	}
+}
+
+static void
+isert_handle_wc(struct ib_wc *wc)
+{
 	struct isert_conn *isert_conn;
 	struct iser_tx_desc *tx_desc;
-	struct ib_wc wc;
-
-	while (ib_poll_cq(tx_cq, 1, &wc) == 1) {
-		tx_desc = (struct iser_tx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
-
-		if (wc.status == IB_WC_SUCCESS) {
-			isert_send_completion(tx_desc, isert_conn);
-		} else {
-			pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			pr_debug("TX wc.status: 0x%08x\n", wc.status);
-			pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err);
-
-			if (wc.wr_id != ISER_FASTREG_LI_WRID) {
-				if (tx_desc->llnode_active)
-					continue;
-
-				atomic_dec(&isert_conn->post_send_buf_count);
-				isert_cq_tx_comp_err(tx_desc, isert_conn);
-			}
-		}
-	}
-
-	ib_req_notify_cq(tx_cq, IB_CQ_NEXT_COMP);
-}
-
-static void
-isert_cq_tx_callback(struct ib_cq *cq, void *context)
-{
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
-
-	queue_work(isert_comp_wq, &cq_desc->cq_tx_work);
-}
-
-static void
-isert_cq_rx_work(struct work_struct *work)
-{
-	struct isert_cq_desc *cq_desc = container_of(work,
-			struct isert_cq_desc, cq_rx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *rx_cq = device->dev_rx_cq[cq_index];
-	struct isert_conn *isert_conn;
 	struct iser_rx_desc *rx_desc;
-	struct ib_wc wc;
-	unsigned long xfer_len;
 
-	while (ib_poll_cq(rx_cq, 1, &wc) == 1) {
-		rx_desc = (struct iser_rx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
-
-		if (wc.status == IB_WC_SUCCESS) {
-			xfer_len = (unsigned long)wc.byte_len;
-			isert_rx_completion(rx_desc, isert_conn, xfer_len);
+	isert_conn = wc->qp->qp_context;
+	if (likely(wc->status == IB_WC_SUCCESS)) {
+		if (wc->opcode == IB_WC_RECV) {
+			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
+			isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
 		} else {
-			pr_debug("RX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			if (wc.status != IB_WC_WR_FLUSH_ERR) {
-				pr_debug("RX wc.status: 0x%08x\n", wc.status);
-				pr_debug("RX wc.vendor_err: 0x%08x\n",
-					 wc.vendor_err);
-			}
-			isert_conn->post_recv_buf_count--;
-			isert_cq_rx_comp_err(isert_conn);
+			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+			isert_send_completion(tx_desc, isert_conn);
 		}
-	}
+	} else {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			isert_err("wr id %llx status %d vend_err %x\n",
+				  wc->wr_id, wc->status, wc->vendor_err);
+		else
+			isert_dbg("flush error: wr id %llx\n", wc->wr_id);
 
-	ib_req_notify_cq(rx_cq, IB_CQ_NEXT_COMP);
+		if (wc->wr_id != ISER_FASTREG_LI_WRID)
+			isert_cq_comp_err(isert_conn, wc);
+	}
 }
 
 static void
-isert_cq_rx_callback(struct ib_cq *cq, void *context)
+isert_cq_work(struct work_struct *work)
 {
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
+	enum { isert_poll_budget = 65536 };
+	struct isert_comp *comp = container_of(work, struct isert_comp,
+					       work);
+	struct ib_wc *const wcs = comp->wcs;
+	int i, n, completed = 0;
 
-	queue_work(isert_rx_wq, &cq_desc->cq_rx_work);
+	while ((n = ib_poll_cq(comp->cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+		for (i = 0; i < n; i++)
+			isert_handle_wc(&wcs[i]);
+
+		completed += n;
+		if (completed >= isert_poll_budget)
+			break;
+	}
+
+	ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
+}
+
+static void
+isert_cq_callback(struct ib_cq *cq, void *context)
+{
+	struct isert_comp *comp = context;
+
+	queue_work(isert_comp_wq, &comp->work);
 }
 
 static int
@@ -2147,13 +2057,10 @@
 	struct ib_send_wr *wr_failed;
 	int ret;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
 			   &wr_failed);
 	if (ret) {
-		pr_err("ib_post_send failed with %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
+		isert_err("ib_post_send failed with %d\n", ret);
 		return ret;
 	}
 	return ret;
@@ -2200,9 +2107,9 @@
 		isert_cmd->tx_desc.num_sge = 2;
 	}
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("Posting SCSI Response\n");
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2231,8 +2138,16 @@
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	struct isert_device *device = isert_conn->conn_device;
 
-	if (device->pi_capable)
-		return TARGET_PROT_ALL;
+	if (conn->tpg->tpg_attrib.t10_pi) {
+		if (device->pi_capable) {
+			isert_info("conn %p PI offload enabled\n", isert_conn);
+			isert_conn->pi_support = true;
+			return TARGET_PROT_ALL;
+		}
+	}
+
+	isert_info("conn %p PI offload disabled\n", isert_conn);
+	isert_conn->pi_support = false;
 
 	return TARGET_PROT_NORMAL;
 }
@@ -2250,9 +2165,9 @@
 			       &isert_cmd->tx_desc.iscsi_header,
 			       nopout_response);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting NOPIN Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2268,9 +2183,9 @@
 	iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)
 				&isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Logout Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2286,9 +2201,9 @@
 	iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)
 				  &isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Task Management Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2318,9 +2233,9 @@
 	tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 	isert_cmd->tx_desc.num_sge = 2;
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2358,9 +2273,9 @@
 		tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 		isert_cmd->tx_desc.num_sge = 2;
 	}
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Text Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2383,30 +2298,31 @@
 
 	send_wr->sg_list = ib_sge;
 	send_wr->num_sge = sg_nents;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	/*
 	 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
 	 */
 	for_each_sg(sg_start, tmp_sg, sg_nents, i) {
-		pr_debug("ISER RDMA from SGL dma_addr: 0x%16llx dma_len: %u, page_off: %u\n",
-			 (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length, page_off);
+		isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
+			  "page_off: %u\n",
+			  (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length, page_off);
 
 		ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
 		ib_sge->length = min_t(u32, data_left,
 				ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
 		ib_sge->lkey = isert_conn->conn_mr->lkey;
 
-		pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n",
-			 ib_sge->addr, ib_sge->length, ib_sge->lkey);
+		isert_dbg("RDMA ib_sge: addr: 0x%llx  length: %u lkey: %x\n",
+			  ib_sge->addr, ib_sge->length, ib_sge->lkey);
 		page_off = 0;
 		data_left -= ib_sge->length;
 		ib_sge++;
-		pr_debug("Incrementing ib_sge pointer to %p\n", ib_sge);
+		isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
 	}
 
-	pr_debug("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
-		 send_wr->sg_list, send_wr->num_sge);
+	isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
+		  send_wr->sg_list, send_wr->num_sge);
 
 	return sg_nents;
 }
@@ -2438,7 +2354,7 @@
 
 	ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
 	if (!ib_sge) {
-		pr_warn("Unable to allocate ib_sge\n");
+		isert_warn("Unable to allocate ib_sge\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2448,7 +2364,7 @@
 	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
 				GFP_KERNEL);
 	if (!wr->send_wr) {
-		pr_debug("Unable to allocate wr->send_wr\n");
+		isert_dbg("Unable to allocate wr->send_wr\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2512,9 +2428,9 @@
 			chunk_start = start_addr;
 		end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
 
-		pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
-			 i, (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length);
+		isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
+			  i, (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length);
 
 		if ((end_addr & ~PAGE_MASK) && i < last_ent) {
 			new_chunk = 0;
@@ -2525,8 +2441,8 @@
 		page = chunk_start & PAGE_MASK;
 		do {
 			fr_pl[n_pages++] = page;
-			pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
-				 n_pages - 1, page);
+			isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
+				  n_pages - 1, page);
 			page += PAGE_SIZE;
 		} while (page < end_addr);
 	}
@@ -2534,6 +2450,21 @@
 	return n_pages;
 }
 
+static inline void
+isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+	u32 rkey;
+
+	memset(inv_wr, 0, sizeof(*inv_wr));
+	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->opcode = IB_WR_LOCAL_INV;
+	inv_wr->ex.invalidate_rkey = mr->rkey;
+
+	/* Bump the key */
+	rkey = ib_inc_rkey(mr->rkey);
+	ib_update_fast_reg_key(mr, rkey);
+}
+
 static int
 isert_fast_reg_mr(struct isert_conn *isert_conn,
 		  struct fast_reg_descriptor *fr_desc,
@@ -2548,15 +2479,13 @@
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	int ret, pagelist_len;
 	u32 page_off;
-	u8 key;
 
 	if (mem->dma_nents == 1) {
 		sge->lkey = isert_conn->conn_mr->lkey;
 		sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
 		sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
-		pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-			 __func__, __LINE__, sge->addr, sge->length,
-			 sge->lkey);
+		isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+			 sge->addr, sge->length, sge->lkey);
 		return 0;
 	}
 
@@ -2572,21 +2501,15 @@
 
 	page_off = mem->offset % PAGE_SIZE;
 
-	pr_debug("Use fr_desc %p sg_nents %d offset %u\n",
-		 fr_desc, mem->nents, mem->offset);
+	isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+		  fr_desc, mem->nents, mem->offset);
 
 	pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
 					     &frpl->page_list[0]);
 
-	if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.ex.invalidate_rkey = mr->rkey;
+	if (!(fr_desc->ind & ind)) {
+		isert_inv_rkey(&inv_wr, mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(mr, ++key);
 	}
 
 	/* Prepare FASTREG WR */
@@ -2608,7 +2531,7 @@
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		return ret;
 	}
 	fr_desc->ind &= ~ind;
@@ -2617,9 +2540,8 @@
 	sge->addr = frpl->page_list[0] + page_off;
 	sge->length = mem->len;
 
-	pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 __func__, __LINE__, sge->addr, sge->length,
-		 sge->lkey);
+	isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  sge->addr, sge->length, sge->lkey);
 
 	return ret;
 }
@@ -2665,7 +2587,7 @@
 		isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
 		break;
 	default:
-		pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
+		isert_err("Unsupported PI operation %d\n", se_cmd->prot_op);
 		return -EINVAL;
 	}
 
@@ -2681,17 +2603,16 @@
 }
 
 static int
-isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
-		 struct fast_reg_descriptor *fr_desc,
-		 struct ib_sge *data_sge, struct ib_sge *prot_sge,
-		 struct ib_sge *sig_sge)
+isert_reg_sig_mr(struct isert_conn *isert_conn,
+		 struct se_cmd *se_cmd,
+		 struct isert_rdma_wr *rdma_wr,
+		 struct fast_reg_descriptor *fr_desc)
 {
 	struct ib_send_wr sig_wr, inv_wr;
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	struct pi_context *pi_ctx = fr_desc->pi_ctx;
 	struct ib_sig_attrs sig_attrs;
 	int ret;
-	u32 key;
 
 	memset(&sig_attrs, 0, sizeof(sig_attrs));
 	ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
@@ -2701,26 +2622,20 @@
 	sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
 
 	if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+		isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
 	}
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
 	sig_wr.opcode = IB_WR_REG_SIG_MR;
 	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-	sig_wr.sg_list = data_sge;
+	sig_wr.sg_list = &rdma_wr->ib_sg[DATA];
 	sig_wr.num_sge = 1;
 	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
 	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
 	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
 	if (se_cmd->t_prot_sg)
-		sig_wr.wr.sig_handover.prot = prot_sge;
+		sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT];
 
 	if (!wr)
 		wr = &sig_wr;
@@ -2729,39 +2644,98 @@
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		goto err;
 	}
 	fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
 
-	sig_sge->lkey = pi_ctx->sig_mr->lkey;
-	sig_sge->addr = 0;
-	sig_sge->length = se_cmd->data_length;
+	rdma_wr->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
+	rdma_wr->ib_sg[SIG].addr = 0;
+	rdma_wr->ib_sg[SIG].length = se_cmd->data_length;
 	if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
 	    se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
 		/*
 		 * We have protection guards on the wire
 		 * so we need to set a larget transfer
 		 */
-		sig_sge->length += se_cmd->prot_length;
+		rdma_wr->ib_sg[SIG].length += se_cmd->prot_length;
 
-	pr_debug("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 sig_sge->addr, sig_sge->length,
-		 sig_sge->lkey);
+	isert_dbg("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  rdma_wr->ib_sg[SIG].addr, rdma_wr->ib_sg[SIG].length,
+		  rdma_wr->ib_sg[SIG].lkey);
 err:
 	return ret;
 }
 
 static int
+isert_handle_prot_cmd(struct isert_conn *isert_conn,
+		      struct isert_cmd *isert_cmd,
+		      struct isert_rdma_wr *wr)
+{
+	struct isert_device *device = isert_conn->conn_device;
+	struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
+	int ret;
+
+	if (!wr->fr_desc->pi_ctx) {
+		ret = isert_create_pi_ctx(wr->fr_desc,
+					  device->ib_device,
+					  isert_conn->conn_pd);
+		if (ret) {
+			isert_err("conn %p failed to allocate pi_ctx\n",
+				  isert_conn);
+			return ret;
+		}
+	}
+
+	if (se_cmd->t_prot_sg) {
+		ret = isert_map_data_buf(isert_conn, isert_cmd,
+					 se_cmd->t_prot_sg,
+					 se_cmd->t_prot_nents,
+					 se_cmd->prot_length,
+					 0, wr->iser_ib_op, &wr->prot);
+		if (ret) {
+			isert_err("conn %p failed to map protection buffer\n",
+				  isert_conn);
+			return ret;
+		}
+
+		memset(&wr->ib_sg[PROT], 0, sizeof(wr->ib_sg[PROT]));
+		ret = isert_fast_reg_mr(isert_conn, wr->fr_desc, &wr->prot,
+					ISERT_PROT_KEY_VALID, &wr->ib_sg[PROT]);
+		if (ret) {
+			isert_err("conn %p failed to fast reg mr\n",
+				  isert_conn);
+			goto unmap_prot_cmd;
+		}
+	}
+
+	ret = isert_reg_sig_mr(isert_conn, se_cmd, wr, wr->fr_desc);
+	if (ret) {
+		isert_err("conn %p failed to fast reg mr\n",
+			  isert_conn);
+		goto unmap_prot_cmd;
+	}
+	wr->fr_desc->ind |= ISERT_PROTECTED;
+
+	return 0;
+
+unmap_prot_cmd:
+	if (se_cmd->t_prot_sg)
+		isert_unmap_data_buf(isert_conn, &wr->prot);
+
+	return ret;
+}
+
+static int
 isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	       struct isert_rdma_wr *wr)
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
 	struct isert_conn *isert_conn = conn->context;
-	struct ib_sge data_sge;
-	struct ib_send_wr *send_wr;
 	struct fast_reg_descriptor *fr_desc = NULL;
+	struct ib_send_wr *send_wr;
+	struct ib_sge *ib_sg;
 	u32 offset;
 	int ret = 0;
 	unsigned long flags;
@@ -2775,8 +2749,7 @@
 	if (ret)
 		return ret;
 
-	if (wr->data.dma_nents != 1 ||
-	    se_cmd->prot_op != TARGET_PROT_NORMAL) {
+	if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
 		fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
 					   struct fast_reg_descriptor, list);
@@ -2786,38 +2759,21 @@
 	}
 
 	ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data,
-				ISERT_DATA_KEY_VALID, &data_sge);
+				ISERT_DATA_KEY_VALID, &wr->ib_sg[DATA]);
 	if (ret)
 		goto unmap_cmd;
 
-	if (se_cmd->prot_op != TARGET_PROT_NORMAL) {
-		struct ib_sge prot_sge, sig_sge;
-
-		if (se_cmd->t_prot_sg) {
-			ret = isert_map_data_buf(isert_conn, isert_cmd,
-						 se_cmd->t_prot_sg,
-						 se_cmd->t_prot_nents,
-						 se_cmd->prot_length,
-						 0, wr->iser_ib_op, &wr->prot);
-			if (ret)
-				goto unmap_cmd;
-
-			ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot,
-						ISERT_PROT_KEY_VALID, &prot_sge);
-			if (ret)
-				goto unmap_prot_cmd;
-		}
-
-		ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc,
-				       &data_sge, &prot_sge, &sig_sge);
+	if (isert_prot_cmd(isert_conn, se_cmd)) {
+		ret = isert_handle_prot_cmd(isert_conn, isert_cmd, wr);
 		if (ret)
-			goto unmap_prot_cmd;
+			goto unmap_cmd;
 
-		fr_desc->ind |= ISERT_PROTECTED;
-		memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge));
-	} else
-		memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge));
+		ib_sg = &wr->ib_sg[SIG];
+	} else {
+		ib_sg = &wr->ib_sg[DATA];
+	}
 
+	memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
 	wr->ib_sge = &wr->s_ib_sge;
 	wr->send_wr_num = 1;
 	memset(&wr->s_send_wr, 0, sizeof(*send_wr));
@@ -2827,12 +2783,12 @@
 	send_wr = &isert_cmd->rdma_wr.s_send_wr;
 	send_wr->sg_list = &wr->s_ib_sge;
 	send_wr->num_sge = 1;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
 		send_wr->opcode = IB_WR_RDMA_WRITE;
 		send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
 		send_wr->wr.rdma.rkey = isert_cmd->read_stag;
-		send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ?
+		send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
 				      0 : IB_SEND_SIGNALED;
 	} else {
 		send_wr->opcode = IB_WR_RDMA_READ;
@@ -2842,9 +2798,7 @@
 	}
 
 	return 0;
-unmap_prot_cmd:
-	if (se_cmd->t_prot_sg)
-		isert_unmap_data_buf(isert_conn, &wr->prot);
+
 unmap_cmd:
 	if (fr_desc) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
@@ -2867,16 +2821,17 @@
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n",
+	isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
 		 isert_cmd, se_cmd->data_length);
+
 	wr->iser_ib_op = ISER_IB_RDMA_WRITE;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL) {
+	if (!isert_prot_cmd(isert_conn, se_cmd)) {
 		/*
 		 * Build isert_conn->tx_desc for iSCSI response PDU and attach
 		 */
@@ -2886,24 +2841,20 @@
 				     &isert_cmd->tx_desc.iscsi_header);
 		isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
 		isert_init_send_wr(isert_conn, isert_cmd,
-				   &isert_cmd->tx_desc.send_wr, false);
+				   &isert_cmd->tx_desc.send_wr);
 		isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
 		wr->send_wr_num += 1;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL)
-		pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
+	if (!isert_prot_cmd(isert_conn, se_cmd))
+		isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
 			 "READ\n", isert_cmd);
 	else
-		pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
+		isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
 			 isert_cmd);
 
 	return 1;
@@ -2920,23 +2871,20 @@
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
+	isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
 		 isert_cmd, se_cmd->data_length, cmd->write_data_done);
 	wr->iser_ib_op = ISER_IB_RDMA_READ;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
-	pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
+
+	isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
 		 isert_cmd);
 
 	return 0;
@@ -2952,7 +2900,7 @@
 		ret = isert_put_nopin(cmd, conn, false);
 		break;
 	default:
-		pr_err("Unknown immediate state: 0x%02x\n", state);
+		isert_err("Unknown immediate state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -2963,15 +2911,14 @@
 static int
 isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 {
+	struct isert_conn *isert_conn = conn->context;
 	int ret;
 
 	switch (state) {
 	case ISTATE_SEND_LOGOUTRSP:
 		ret = isert_put_logout_rsp(cmd, conn);
-		if (!ret) {
-			pr_debug("Returning iSER Logout -EAGAIN\n");
-			ret = -EAGAIN;
-		}
+		if (!ret)
+			isert_conn->logout_posted = true;
 		break;
 	case ISTATE_SEND_NOPIN:
 		ret = isert_put_nopin(cmd, conn, true);
@@ -2993,7 +2940,7 @@
 		ret = isert_put_response(conn, cmd);
 		break;
 	default:
-		pr_err("Unknown response state: 0x%02x\n", state);
+		isert_err("Unknown response state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -3001,27 +2948,64 @@
 	return ret;
 }
 
+struct rdma_cm_id *
+isert_setup_id(struct isert_np *isert_np)
+{
+	struct iscsi_np *np = isert_np->np;
+	struct rdma_cm_id *id;
+	struct sockaddr *sa;
+	int ret;
+
+	sa = (struct sockaddr *)&np->np_sockaddr;
+	isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
+
+	id = rdma_create_id(isert_cma_handler, isert_np,
+			    RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(id)) {
+		isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
+		ret = PTR_ERR(id);
+		goto out;
+	}
+	isert_dbg("id %p context %p\n", id, id->context);
+
+	ret = rdma_bind_addr(id, sa);
+	if (ret) {
+		isert_err("rdma_bind_addr() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+	if (ret) {
+		isert_err("rdma_listen() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	return id;
+out_id:
+	rdma_destroy_id(id);
+out:
+	return ERR_PTR(ret);
+}
+
 static int
 isert_setup_np(struct iscsi_np *np,
 	       struct __kernel_sockaddr_storage *ksockaddr)
 {
 	struct isert_np *isert_np;
 	struct rdma_cm_id *isert_lid;
-	struct sockaddr *sa;
 	int ret;
 
 	isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
 	if (!isert_np) {
-		pr_err("Unable to allocate struct isert_np\n");
+		isert_err("Unable to allocate struct isert_np\n");
 		return -ENOMEM;
 	}
 	sema_init(&isert_np->np_sem, 0);
 	mutex_init(&isert_np->np_accept_mutex);
 	INIT_LIST_HEAD(&isert_np->np_accept_list);
 	init_completion(&isert_np->np_login_comp);
+	isert_np->np = np;
 
-	sa = (struct sockaddr *)ksockaddr;
-	pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa);
 	/*
 	 * Setup the np->np_sockaddr from the passed sockaddr setup
 	 * in iscsi_target_configfs.c code..
@@ -3029,37 +3013,20 @@
 	memcpy(&np->np_sockaddr, ksockaddr,
 	       sizeof(struct __kernel_sockaddr_storage));
 
-	isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP,
-				IB_QPT_RC);
+	isert_lid = isert_setup_id(isert_np);
 	if (IS_ERR(isert_lid)) {
-		pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n",
-		       PTR_ERR(isert_lid));
 		ret = PTR_ERR(isert_lid);
 		goto out;
 	}
 
-	ret = rdma_bind_addr(isert_lid, sa);
-	if (ret) {
-		pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
-	ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG);
-	if (ret) {
-		pr_err("rdma_listen() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
 	isert_np->np_cm_id = isert_lid;
 	np->np_context = isert_np;
-	pr_debug("Setup isert_lid->context: %p\n", isert_lid->context);
 
 	return 0;
 
-out_lid:
-	rdma_destroy_id(isert_lid);
 out:
 	kfree(isert_np);
+
 	return ret;
 }
 
@@ -3075,16 +3042,12 @@
 	cp.retry_count = 7;
 	cp.rnr_retry_count = 7;
 
-	pr_debug("Before rdma_accept >>>>>>>>>>>>>>>>>>>>.\n");
-
 	ret = rdma_accept(cm_id, &cp);
 	if (ret) {
-		pr_err("rdma_accept() failed with: %d\n", ret);
+		isert_err("rdma_accept() failed with: %d\n", ret);
 		return ret;
 	}
 
-	pr_debug("After rdma_accept >>>>>>>>>>>>>>>>>>>>>.\n");
-
 	return 0;
 }
 
@@ -3094,7 +3057,15 @@
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	int ret;
 
-	pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn);
+	isert_info("before login_req comp conn: %p\n", isert_conn);
+	ret = wait_for_completion_interruptible(&isert_conn->login_req_comp);
+	if (ret) {
+		isert_err("isert_conn %p interrupted before got login req\n",
+			  isert_conn);
+		return ret;
+	}
+	reinit_completion(&isert_conn->login_req_comp);
+
 	/*
 	 * For login requests after the first PDU, isert_rx_login_req() will
 	 * kick schedule_delayed_work(&conn->login_work) as the packet is
@@ -3104,11 +3075,15 @@
 	if (!login->first_request)
 		return 0;
 
+	isert_rx_login_req(isert_conn);
+
+	isert_info("before conn_login_comp conn: %p\n", conn);
 	ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
 	if (ret)
 		return ret;
 
-	pr_debug("isert_get_login_rx processing login->req: %p\n", login->req);
+	isert_info("processing login->req: %p\n", login->req);
+
 	return 0;
 }
 
@@ -3161,7 +3136,7 @@
 	spin_lock_bh(&np->np_thread_lock);
 	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("np_thread_state %d for isert_accept_np\n",
+		isert_dbg("np_thread_state %d for isert_accept_np\n",
 			 np->np_thread_state);
 		/**
 		 * No point in stalling here when np_thread
@@ -3186,17 +3161,10 @@
 	isert_conn->conn = conn;
 	max_accept = 0;
 
-	ret = isert_rdma_post_recvl(isert_conn);
-	if (ret)
-		return ret;
-
-	ret = isert_rdma_accept(isert_conn);
-	if (ret)
-		return ret;
-
 	isert_set_conn_info(np, conn, isert_conn);
 
-	pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn);
+	isert_dbg("Processing isert_conn: %p\n", isert_conn);
+
 	return 0;
 }
 
@@ -3204,25 +3172,103 @@
 isert_free_np(struct iscsi_np *np)
 {
 	struct isert_np *isert_np = (struct isert_np *)np->np_context;
+	struct isert_conn *isert_conn, *n;
 
 	if (isert_np->np_cm_id)
 		rdma_destroy_id(isert_np->np_cm_id);
 
+	/*
+	 * FIXME: At this point we don't have a good way to insure
+	 * that at this point we don't have hanging connections that
+	 * completed RDMA establishment but didn't start iscsi login
+	 * process. So work-around this by cleaning up what ever piled
+	 * up in np_accept_list.
+	 */
+	mutex_lock(&isert_np->np_accept_mutex);
+	if (!list_empty(&isert_np->np_accept_list)) {
+		isert_info("Still have isert connections, cleaning up...\n");
+		list_for_each_entry_safe(isert_conn, n,
+					 &isert_np->np_accept_list,
+					 conn_accept_node) {
+			isert_info("cleaning isert_conn %p state (%d)\n",
+				   isert_conn, isert_conn->state);
+			isert_connect_release(isert_conn);
+		}
+	}
+	mutex_unlock(&isert_np->np_accept_mutex);
+
 	np->np_context = NULL;
 	kfree(isert_np);
 }
 
+static void isert_release_work(struct work_struct *work)
+{
+	struct isert_conn *isert_conn = container_of(work,
+						     struct isert_conn,
+						     release_work);
+
+	isert_info("Starting release conn %p\n", isert_conn);
+
+	wait_for_completion(&isert_conn->conn_wait);
+
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn->state = ISER_CONN_DOWN;
+	mutex_unlock(&isert_conn->conn_mutex);
+
+	isert_info("Destroying conn %p\n", isert_conn);
+	isert_put_conn(isert_conn);
+}
+
+static void
+isert_wait4logout(struct isert_conn *isert_conn)
+{
+	struct iscsi_conn *conn = isert_conn->conn;
+
+	isert_info("conn %p\n", isert_conn);
+
+	if (isert_conn->logout_posted) {
+		isert_info("conn %p wait for conn_logout_comp\n", isert_conn);
+		wait_for_completion_timeout(&conn->conn_logout_comp,
+					    SECONDS_FOR_LOGOUT_COMP * HZ);
+	}
+}
+
+static void
+isert_wait4cmds(struct iscsi_conn *conn)
+{
+	isert_info("iscsi_conn %p\n", conn);
+
+	if (conn->sess) {
+		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
+		target_wait_for_sess_cmds(conn->sess->se_sess);
+	}
+}
+
+static void
+isert_wait4flush(struct isert_conn *isert_conn)
+{
+	struct ib_recv_wr *bad_wr;
+
+	isert_info("conn %p\n", isert_conn);
+
+	init_completion(&isert_conn->conn_wait_comp_err);
+	isert_conn->beacon.wr_id = ISER_BEACON_WRID;
+	/* post an indication that all flush errors were consumed */
+	if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+		isert_err("conn %p failed to post beacon", isert_conn);
+		return;
+	}
+
+	wait_for_completion(&isert_conn->conn_wait_comp_err);
+}
+
 static void isert_wait_conn(struct iscsi_conn *conn)
 {
 	struct isert_conn *isert_conn = conn->context;
 
-	pr_debug("isert_wait_conn: Starting \n");
+	isert_info("Starting conn %p\n", isert_conn);
 
 	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->conn_cm_id && !isert_conn->disconnect) {
-		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
 	/*
 	 * Only wait for conn_wait_comp_err if the isert_conn made it
 	 * into full feature phase..
@@ -3231,14 +3277,15 @@
 		mutex_unlock(&isert_conn->conn_mutex);
 		return;
 	}
-	if (isert_conn->state == ISER_CONN_UP)
-		isert_conn->state = ISER_CONN_TERMINATING;
+	isert_conn_terminate(isert_conn);
 	mutex_unlock(&isert_conn->conn_mutex);
 
-	wait_for_completion(&isert_conn->conn_wait_comp_err);
+	isert_wait4cmds(conn);
+	isert_wait4flush(isert_conn);
+	isert_wait4logout(isert_conn);
 
-	wait_for_completion(&isert_conn->conn_wait);
-	isert_put_conn(isert_conn);
+	INIT_WORK(&isert_conn->release_work, isert_release_work);
+	queue_work(isert_release_wq, &isert_conn->release_work);
 }
 
 static void isert_free_conn(struct iscsi_conn *conn)
@@ -3273,35 +3320,39 @@
 {
 	int ret;
 
-	isert_rx_wq = alloc_workqueue("isert_rx_wq", 0, 0);
-	if (!isert_rx_wq) {
-		pr_err("Unable to allocate isert_rx_wq\n");
+	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
+	if (!isert_comp_wq) {
+		isert_err("Unable to allocate isert_comp_wq\n");
+		ret = -ENOMEM;
 		return -ENOMEM;
 	}
 
-	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
-	if (!isert_comp_wq) {
-		pr_err("Unable to allocate isert_comp_wq\n");
+	isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!isert_release_wq) {
+		isert_err("Unable to allocate isert_release_wq\n");
 		ret = -ENOMEM;
-		goto destroy_rx_wq;
+		goto destroy_comp_wq;
 	}
 
 	iscsit_register_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Loaded iser_target_transport\n");
+
 	return 0;
 
-destroy_rx_wq:
-	destroy_workqueue(isert_rx_wq);
+destroy_comp_wq:
+	destroy_workqueue(isert_comp_wq);
+
 	return ret;
 }
 
 static void __exit isert_exit(void)
 {
 	flush_scheduled_work();
+	destroy_workqueue(isert_release_wq);
 	destroy_workqueue(isert_comp_wq);
-	destroy_workqueue(isert_rx_wq);
 	iscsit_unregister_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Released iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
 }
 
 MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 04f51f7..8dc8415 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -4,9 +4,37 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 
+#define DRV_NAME	"isert"
+#define PFX		DRV_NAME ": "
+
+#define isert_dbg(fmt, arg...)				 \
+	do {						 \
+		if (unlikely(isert_debug_level > 2))	 \
+			printk(KERN_DEBUG PFX "%s: " fmt,\
+				__func__ , ## arg);	 \
+	} while (0)
+
+#define isert_warn(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 0))	\
+			pr_warn(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_info(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 1))	\
+			pr_info(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_err(fmt, arg...) \
+	pr_err(PFX "%s: " fmt, __func__ , ## arg)
+
 #define ISERT_RDMA_LISTEN_BACKLOG	10
 #define ISCSI_ISER_SG_TABLESIZE		256
 #define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL
+#define ISER_BEACON_WRID               0xfffffffffffffffeULL
 
 enum isert_desc_type {
 	ISCSI_TX_CONTROL,
@@ -23,6 +51,7 @@
 enum iser_conn_state {
 	ISER_CONN_INIT,
 	ISER_CONN_UP,
+	ISER_CONN_FULL_FEATURE,
 	ISER_CONN_TERMINATING,
 	ISER_CONN_DOWN,
 };
@@ -44,9 +73,6 @@
 	struct ib_sge	tx_sg[2];
 	int		num_sge;
 	struct isert_cmd *isert_cmd;
-	struct llist_node *comp_llnode_batch;
-	struct llist_node comp_llnode;
-	bool		llnode_active;
 	struct ib_send_wr send_wr;
 } __packed;
 
@@ -81,6 +107,12 @@
 	enum dma_data_direction dma_dir;
 };
 
+enum {
+	DATA = 0,
+	PROT = 1,
+	SIG = 2,
+};
+
 struct isert_rdma_wr {
 	struct list_head	wr_list;
 	struct isert_cmd	*isert_cmd;
@@ -90,6 +122,7 @@
 	int			send_wr_num;
 	struct ib_send_wr	*send_wr;
 	struct ib_send_wr	s_send_wr;
+	struct ib_sge		ib_sg[3];
 	struct isert_data_buf	data;
 	struct isert_data_buf	prot;
 	struct fast_reg_descriptor *fr_desc;
@@ -117,14 +150,15 @@
 struct isert_conn {
 	enum iser_conn_state	state;
 	int			post_recv_buf_count;
-	atomic_t		post_send_buf_count;
 	u32			responder_resources;
 	u32			initiator_depth;
+	bool			pi_support;
 	u32			max_sge;
 	char			*login_buf;
 	char			*login_req_buf;
 	char			*login_rsp_buf;
 	u64			login_req_dma;
+	int			login_req_len;
 	u64			login_rsp_dma;
 	unsigned int		conn_rx_desc_head;
 	struct iser_rx_desc	*conn_rx_descs;
@@ -132,13 +166,13 @@
 	struct iscsi_conn	*conn;
 	struct list_head	conn_accept_node;
 	struct completion	conn_login_comp;
+	struct completion	login_req_comp;
 	struct iser_tx_desc	conn_login_tx_desc;
 	struct rdma_cm_id	*conn_cm_id;
 	struct ib_pd		*conn_pd;
 	struct ib_mr		*conn_mr;
 	struct ib_qp		*conn_qp;
 	struct isert_device	*conn_device;
-	struct work_struct	conn_logout_work;
 	struct mutex		conn_mutex;
 	struct completion	conn_wait;
 	struct completion	conn_wait_comp_err;
@@ -147,31 +181,38 @@
 	int			conn_fr_pool_size;
 	/* lock to protect fastreg pool */
 	spinlock_t		conn_lock;
-#define ISERT_COMP_BATCH_COUNT	8
-	int			conn_comp_batch;
-	struct llist_head	conn_comp_llist;
-	bool                    disconnect;
+	struct work_struct	release_work;
+	struct ib_recv_wr       beacon;
+	bool                    logout_posted;
 };
 
 #define ISERT_MAX_CQ 64
 
-struct isert_cq_desc {
-	struct isert_device	*device;
-	int			cq_index;
-	struct work_struct	cq_rx_work;
-	struct work_struct	cq_tx_work;
+/**
+ * struct isert_comp - iSER completion context
+ *
+ * @device:     pointer to device handle
+ * @cq:         completion queue
+ * @wcs:        work completion array
+ * @active_qps: Number of active QPs attached
+ *              to completion context
+ * @work:       completion work handle
+ */
+struct isert_comp {
+	struct isert_device     *device;
+	struct ib_cq		*cq;
+	struct ib_wc		 wcs[16];
+	int                      active_qps;
+	struct work_struct	 work;
 };
 
 struct isert_device {
 	int			use_fastreg;
 	bool			pi_capable;
-	int			cqs_used;
 	int			refcount;
-	int			cq_active_qps[ISERT_MAX_CQ];
 	struct ib_device	*ib_device;
-	struct ib_cq		*dev_rx_cq[ISERT_MAX_CQ];
-	struct ib_cq		*dev_tx_cq[ISERT_MAX_CQ];
-	struct isert_cq_desc	*cq_desc;
+	struct isert_comp	*comps;
+	int                     comps_used;
 	struct list_head	dev_node;
 	struct ib_device_attr	dev_attr;
 	int			(*reg_rdma_mem)(struct iscsi_conn *conn,
@@ -182,6 +223,7 @@
 };
 
 struct isert_np {
+	struct iscsi_np         *np;
 	struct semaphore	np_sem;
 	struct rdma_cm_id	*np_cm_id;
 	struct mutex		np_accept_mutex;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b205f76..9802485 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4071,7 +4071,7 @@
 	int devid;
 	int ret;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4134,7 +4134,7 @@
 	if (!config_enabled(CONFIG_SMP))
 		return -1;
 
-	cfg       = data->chip_data;
+	cfg       = irqd_cfg(data);
 	irq       = data->irq;
 	irte_info = &cfg->irq_2_irte;
 
@@ -4172,7 +4172,7 @@
 	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4191,7 +4191,7 @@
 	struct irq_cfg *cfg;
 	union irte irte;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return;
 
@@ -4220,7 +4220,7 @@
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4240,7 +4240,7 @@
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4263,7 +4263,7 @@
 	struct irq_cfg *cfg;
 	int index, devid;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 27541d4..a55b207 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -54,7 +54,7 @@
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
@@ -85,7 +85,7 @@
 {
 	struct ir_table *table = iommu->ir_table;
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned int mask = 0;
 	unsigned long flags;
 	int index;
@@ -153,7 +153,7 @@
 static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	if (!irq_iommu)
@@ -1050,7 +1050,7 @@
 intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			  bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest, irq = data->irq;
 	struct irte irte;
 	int err;
@@ -1105,7 +1105,7 @@
 	u16 sub_handle = 0;
 	int ir_index;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 
 	ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 	BUG_ON(ir_index == -1);
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2c3f5ad..89c4846 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -298,7 +298,7 @@
 
 void free_remapped_irq(int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!remap_ops || !remap_ops->free_irq)
 		return;
@@ -311,7 +311,7 @@
 			      unsigned int irq, unsigned int dest,
 			      struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!irq_remapped(cfg))
 		native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -364,7 +364,7 @@
 static void ir_ack_apic_level(struct irq_data *data)
 {
 	ack_APIC_irq();
-	eoi_ioapic_irq(data->irq, data->chip_data);
+	eoi_ioapic_irq(data->irq, irqd_cfg(data));
 }
 
 static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca4721..51fd6b5 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,7 +34,8 @@
 /*
  * Initialises a CXL context.
  */
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping)
 {
 	int i;
 
@@ -42,6 +43,8 @@
 	ctx->afu = afu;
 	ctx->master = master;
 	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = mapping;
 
 	/*
 	 * Allocate the segment table before we put it in the IDR so that we
@@ -82,12 +85,12 @@
 	 * Allocating IDR! We better make sure everything's setup that
 	 * dereferences from it.
 	 */
+	mutex_lock(&afu->contexts_lock);
 	idr_preload(GFP_KERNEL);
-	spin_lock(&afu->contexts_lock);
 	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
 		      ctx->afu->num_procs, GFP_NOWAIT);
-	spin_unlock(&afu->contexts_lock);
 	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
 	if (i < 0)
 		return i;
 
@@ -147,6 +150,12 @@
 	afu_release_irqs(ctx);
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
 	wake_up_all(&ctx->wq);
+
+	/* Release Problem State Area mapping */
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping, 0, 0, 1);
+	mutex_unlock(&ctx->mapping_lock);
 }
 
 /*
@@ -168,21 +177,22 @@
 	struct cxl_context *ctx;
 	int tmp;
 
-	rcu_read_lock();
-	idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
 		/*
 		 * Anything done in here needs to be setup before the IDR is
 		 * created and torn down after the IDR removed
 		 */
 		__detach_context(ctx);
-	rcu_read_unlock();
+	}
+	mutex_unlock(&afu->contexts_lock);
 }
 
 void cxl_context_free(struct cxl_context *ctx)
 {
-	spin_lock(&ctx->afu->contexts_lock);
+	mutex_lock(&ctx->afu->contexts_lock);
 	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-	spin_unlock(&ctx->afu->contexts_lock);
+	mutex_unlock(&ctx->afu->contexts_lock);
 	synchronize_rcu();
 
 	free_page((u64)ctx->sstp);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda..28078f8 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -351,7 +351,7 @@
 	struct device *chardev_s, *chardev_m, *chardev_d;
 	struct idr contexts_idr;
 	struct dentry *debugfs;
-	spinlock_t contexts_lock;
+	struct mutex contexts_lock;
 	struct mutex spa_mutex;
 	spinlock_t afu_cntl_lock;
 
@@ -398,6 +398,10 @@
 	phys_addr_t psn_phys;
 	u64 psn_size;
 
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+
 	spinlock_t sste_lock; /* Protects segment table entries */
 	struct cxl_sste *sstp;
 	u64 sstp0, sstp1;
@@ -599,7 +603,8 @@
 void init_cxl_native(void);
 
 struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
 
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 378b099..e9f2f10 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -77,7 +77,7 @@
 		goto err_put_afu;
 	}
 
-	if ((rc = cxl_context_init(ctx, afu, master)))
+	if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
 		goto err_put_afu;
 
 	pr_devel("afu_open pe: %i\n", ctx->pe);
@@ -113,6 +113,10 @@
 		 __func__, ctx->pe);
 	cxl_context_detach(ctx);
 
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+
 	put_device(&ctx->afu->dev);
 
 	/*
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442..f2b37b4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -277,6 +277,7 @@
 				  u64 cmd, u64 pe_state)
 {
 	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
 	WARN_ON(!ctx->afu->enabled);
 
@@ -286,6 +287,10 @@
 	smp_mb();
 	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
 	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			return -EBUSY;
+		}
 		state = be64_to_cpup(ctx->afu->sw_command_status);
 		if (state == ~0ULL) {
 			pr_err("cxl: Error adding process element to AFU\n");
@@ -610,13 +615,6 @@
 	return 0;
 }
 
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
- * & remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
 {
 	if (!ctx->pe_inserted)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab..0f2cc9f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@
 	afu->dev.release = cxl_release_afu;
 	afu->slice = slice;
 	idr_init(&afu->contexts_idr);
-	spin_lock_init(&afu->contexts_lock);
+	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
 	mutex_init(&afu->spa_mutex);
 
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06..461bdbd 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@
 	int rc;
 
 	/* Not safe to reset if it is currently in use */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr)) {
 		rc = -EBUSY;
 		goto err;
@@ -132,7 +132,7 @@
 
 	rc = count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
@@ -247,7 +247,7 @@
 	int rc = -EBUSY;
 
 	/* can't change this if we have a user */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr))
 		goto err;
 
@@ -271,7 +271,7 @@
 	afu->current_mode = 0;
 	afu->num_procs = 0;
 
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 
 	if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
 		return rc;
@@ -280,7 +280,7 @@
 
 	return count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
index 028ba5d6..687e9aa 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -326,21 +326,27 @@
 			}
 			avail = vrh->vring.avail;
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx & (num - 1));
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx) & (num - 1));
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx);
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "avail ring[%d] %d\n",
 					   j, avail->ring[j]);
 			used = vrh->vring.used;
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx & (num - 1));
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx);
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "used ring[%d] id %d len %d\n",
-					   j, used->ring[j].id,
-					   used->ring[j].len);
+					   j, vringh32_to_cpu(vrh,
+							      used->ring[j].id),
+					   vringh32_to_cpu(vrh,
+							   used->ring[j].len));
 		}
 	}
 	mutex_unlock(&mdev->mic_mutex);
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index cced842..7a8f1c5 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -67,7 +67,7 @@
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
-	depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
+	depends on PCI && X86_LOCAL_APIC
 	help
 	   This allows native hypertransport devices to use interrupts.
 
@@ -110,13 +110,6 @@
 
 	  If unsure, say N.
 
-config PCI_IOAPIC
-	bool "PCI IO-APIC hotplug support" if X86
-	depends on PCI
-	depends on ACPI
-	depends on X86_IO_APIC
-	default !X86
-
 config PCI_LABEL
 	def_bool y if (DMI || ACPI)
 	select NLS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index e04fe2d..73e4af4 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -13,8 +13,6 @@
 # Build PCI Express stuff if needed
 obj-$(CONFIG_PCIEPORTBUS) += pcie/
 
-obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
-
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
 ifdef CONFIG_HOTPLUG_PCI
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 3efaf4c..96c5c72 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -36,6 +36,7 @@
 #include <linux/wait.h>
 #include "../pci.h"
 #include <asm/pci_x86.h>		/* for struct irq_routing_table */
+#include <asm/io_apic.h>
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
@@ -155,13 +156,10 @@
 	for (loop = 0; loop < len; loop++) {
 		if ((*cur_slot)->number == rtable->slots[loop].slot &&
 		    (*cur_slot)->bus == rtable->slots[loop].bus) {
-			struct io_apic_irq_attr irq_attr;
-
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i,
-						&irq_attr);
+						(int) (*cur_slot)->device, i);
 
 			debug("(*cur_slot)->irq[0] = %x\n",
 					(*cur_slot)->irq[0]);
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
deleted file mode 100644
index f6219d3..0000000
--- a/drivers/pci/ioapic.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * IOAPIC/IOxAPIC/IOSAPIC driver
- *
- * Copyright (C) 2009 Fujitsu Limited.
- * (c) Copyright 2009 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This driver manages PCI I/O APICs added by hotplug after boot.  We try to
- * claim all I/O APIC PCI devices, but those present at boot were registered
- * when we parsed the ACPI MADT, so we'll fail when we try to re-register
- * them.
- */
-
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/acpi.h>
-#include <linux/slab.h>
-
-struct ioapic {
-	acpi_handle	handle;
-	u32		gsi_base;
-};
-
-static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
-{
-	acpi_handle handle;
-	acpi_status status;
-	unsigned long long gsb;
-	struct ioapic *ioapic;
-	int ret;
-	char *type;
-	struct resource *res;
-
-	handle = ACPI_HANDLE(&dev->dev);
-	if (!handle)
-		return -EINVAL;
-
-	status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	/*
-	 * The previous code in acpiphp evaluated _MAT if _GSB failed, but
-	 * ACPI spec 4.0 sec 6.2.2 requires _GSB for hot-pluggable I/O APICs.
-	 */
-
-	ioapic = kzalloc(sizeof(*ioapic), GFP_KERNEL);
-	if (!ioapic)
-		return -ENOMEM;
-
-	ioapic->handle = handle;
-	ioapic->gsi_base = (u32) gsb;
-
-	if (dev->class == PCI_CLASS_SYSTEM_PIC_IOAPIC)
-		type = "IOAPIC";
-	else
-		type = "IOxAPIC";
-
-	ret = pci_enable_device(dev);
-	if (ret < 0)
-		goto exit_free;
-
-	pci_set_master(dev);
-
-	if (pci_request_region(dev, 0, type))
-		goto exit_disable;
-
-	res = &dev->resource[0];
-	if (acpi_register_ioapic(ioapic->handle, res->start, ioapic->gsi_base))
-		goto exit_release;
-
-	pci_set_drvdata(dev, ioapic);
-	dev_info(&dev->dev, "%s at %pR, GSI %u\n", type, res, ioapic->gsi_base);
-	return 0;
-
-exit_release:
-	pci_release_region(dev, 0);
-exit_disable:
-	pci_disable_device(dev);
-exit_free:
-	kfree(ioapic);
-	return -ENODEV;
-}
-
-static void ioapic_remove(struct pci_dev *dev)
-{
-	struct ioapic *ioapic = pci_get_drvdata(dev);
-
-	acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base);
-	pci_release_region(dev, 0);
-	pci_disable_device(dev);
-	kfree(ioapic);
-}
-
-
-static const struct pci_device_id ioapic_devices[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOAPIC, ~0) },
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOXAPIC, ~0) },
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, ioapic_devices);
-
-static struct pci_driver ioapic_driver = {
-	.name		= "ioapic",
-	.id_table	= ioapic_devices,
-	.probe		= ioapic_probe,
-	.remove		= ioapic_remove,
-};
-
-static int __init ioapic_init(void)
-{
-	return pci_register_driver(&ioapic_driver);
-}
-module_init(ioapic_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 8156b4c..3925db1 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -42,28 +42,6 @@
 
 #include "lustre_patchless_compat.h"
 
-# define LOCK_FS_STRUCT(fs)	spin_lock(&(fs)->lock)
-# define UNLOCK_FS_STRUCT(fs)	spin_unlock(&(fs)->lock)
-
-static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-				 struct dentry *dentry)
-{
-	struct path path;
-	struct path old_pwd;
-
-	path.mnt = mnt;
-	path.dentry = dentry;
-	LOCK_FS_STRUCT(fs);
-	old_pwd = fs->pwd;
-	path_get(&path);
-	fs->pwd = path;
-	UNLOCK_FS_STRUCT(fs);
-
-	if (old_pwd.dentry)
-		path_put(&old_pwd);
-}
-
-
 /*
  * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
  * ATTR_* attributes (see bug 13828)
@@ -110,8 +88,6 @@
 #define cfs_bio_io_error(a, b)   bio_io_error((a))
 #define cfs_bio_endio(a, b, c)    bio_endio((a), (c))
 
-#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
 #define cfs_path_put(nd)     path_put(&(nd)->path)
 
 
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 407718a..1ac7a70 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -661,7 +661,7 @@
 	int mode;
 	int err;
 
-	mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+	mode = (0755 & ~current_umask()) | S_IFDIR;
 	op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
 				     strlen(filename), mode, LUSTRE_OPC_MKDIR,
 				     lump);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 6e423aa..a3367bf 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -2372,21 +2372,6 @@
 	return buf;
 }
 
-static char *ll_d_path(struct dentry *dentry, char *buf, int bufsize)
-{
-	char *path = NULL;
-
-	struct path p;
-
-	p.dentry = dentry;
-	p.mnt = current->fs->root.mnt;
-	path_get(&p);
-	path = d_path(&p, buf, bufsize);
-	path_put(&p);
-
-	return path;
-}
-
 void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
 	char *buf, *path = NULL;
@@ -2398,7 +2383,7 @@
 	if (buf != NULL) {
 		dentry = d_find_alias(page->mapping->host);
 		if (dentry != NULL)
-			path = ll_d_path(dentry, buf, PAGE_SIZE);
+			path = dentry_path_raw(dentry, buf, PAGE_SIZE);
 	}
 
 	CDEBUG(D_WARNING,
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 73e58d2..f8ec322 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -609,6 +609,7 @@
 
 	return ret;
 r2t_out:
+	iscsit_unregister_transport(&iscsi_target_transport);
 	kmem_cache_destroy(lio_r2t_cache);
 ooo_out:
 	kmem_cache_destroy(lio_ooo_cache);
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
index 302eb3b..09a522b 100644
--- a/drivers/target/iscsi/iscsi_target_core.h
+++ b/drivers/target/iscsi/iscsi_target_core.h
@@ -790,7 +790,6 @@
 	void			*np_context;
 	struct iscsit_transport *np_transport;
 	struct list_head	np_list;
-	struct iscsi_tpg_np	*tpg_np;
 } ____cacheline_aligned;
 
 struct iscsi_tpg_np {
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 480f2e0..713c0c1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -281,7 +281,6 @@
 {
 	struct iscsi_session *sess = NULL;
 	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
-	enum target_prot_op sup_pro_ops;
 	int ret;
 
 	sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
@@ -343,9 +342,8 @@
 		kfree(sess);
 		return -ENOMEM;
 	}
-	sup_pro_ops = conn->conn_transport->iscsit_get_sup_prot_ops(conn);
 
-	sess->se_sess = transport_init_session(sup_pro_ops);
+	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(sess->se_sess)) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
@@ -1161,6 +1159,7 @@
 	}
 	kfree(conn->sess->sess_ops);
 	kfree(conn->sess);
+	conn->sess = NULL;
 
 old_sess_out:
 	iscsi_stop_login_thread_timer(np);
@@ -1204,6 +1203,9 @@
 		conn->sock = NULL;
 	}
 
+	if (conn->conn_transport->iscsit_wait_conn)
+		conn->conn_transport->iscsit_wait_conn(conn);
+
 	if (conn->conn_transport->iscsit_free_conn)
 		conn->conn_transport->iscsit_free_conn(conn);
 
@@ -1364,6 +1366,9 @@
 	}
 	login->zero_tsih = zero_tsih;
 
+	conn->sess->se_sess->sup_prot_ops =
+		conn->conn_transport->iscsit_get_sup_prot_ops(conn);
+
 	tpg = conn->tpg;
 	if (!tpg) {
 		pr_err("Unable to locate struct iscsi_conn->tpg\n");
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index c3cb5c1..9053a3c 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -501,7 +501,6 @@
 	init_completion(&tpg_np->tpg_np_comp);
 	kref_init(&tpg_np->tpg_np_kref);
 	tpg_np->tpg_np		= np;
-	np->tpg_np		= tpg_np;
 	tpg_np->tpg		= tpg;
 
 	spin_lock(&tpg->tpg_np_lock);
diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c
index 882728f..08217d6 100644
--- a/drivers/target/iscsi/iscsi_target_transport.c
+++ b/drivers/target/iscsi/iscsi_target_transport.c
@@ -26,8 +26,7 @@
 
 void iscsit_put_transport(struct iscsit_transport *t)
 {
-	if (t->owner)
-		module_put(t->owner);
+	module_put(t->owner);
 }
 
 int iscsit_register_transport(struct iscsit_transport *t)
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7c6a95b..bcd88ec 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1356,15 +1356,15 @@
 	struct iscsi_conn *conn,
 	struct iscsi_data_count *count)
 {
-	int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+	int ret, iov_len;
 	struct kvec *iov_p;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
-	if (data <= 0) {
-		pr_err("Data length is: %d\n", data);
+	if (count->data_length <= 0) {
+		pr_err("Data length is: %d\n", count->data_length);
 		return -1;
 	}
 
@@ -1373,20 +1373,16 @@
 	iov_p = count->iov;
 	iov_len = count->iov_count;
 
-	while (total_tx < data) {
-		tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
-					(data - total_tx));
-		if (tx_loop <= 0) {
-			pr_debug("tx_loop: %d total_tx %d\n",
-				tx_loop, total_tx);
-			return tx_loop;
-		}
-		total_tx += tx_loop;
-		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
-					tx_loop, total_tx, data);
+	ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+			     count->data_length);
+	if (ret != count->data_length) {
+		pr_err("Unexpected ret: %d send data %d\n",
+		       ret, count->data_length);
+		return -EPIPE;
 	}
+	pr_debug("ret: %d, sent data: %d\n", ret, count->data_length);
 
-	return total_tx;
+	return ret;
 }
 
 int rx_data(
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 4d1b722..7172a71 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -138,7 +138,7 @@
 		set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
 		goto out_done;
 	}
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus) {
 		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
 				" does not exist\n");
@@ -218,16 +218,26 @@
  * to struct scsi_device
  */
 static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
-			      struct tcm_loop_nexus *tl_nexus,
 			      int lun, int task, enum tcm_tmreq_table tmr)
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
 	struct se_portal_group *se_tpg;
+	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
 	struct tcm_loop_tmr *tl_tmr = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
 
+	/*
+	 * Locate the tl_nexus and se_sess pointers
+	 */
+	tl_nexus = tl_tpg->tl_nexus;
+	if (!tl_nexus) {
+		pr_err("Unable to perform device reset without"
+				" active I_T Nexus\n");
+		return ret;
+	}
+
 	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL);
 	if (!tl_cmd) {
 		pr_err("Unable to allocate memory for tl_cmd\n");
@@ -243,7 +253,7 @@
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
-	se_sess = tl_nexus->se_sess;
+	se_sess = tl_tpg->tl_nexus->se_sess;
 	/*
 	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
 	 */
@@ -288,7 +298,6 @@
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -296,21 +305,8 @@
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 sc->request->tag, TMR_ABORT_TASK);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -322,7 +318,6 @@
 static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -330,20 +325,9 @@
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 0, TMR_LUN_RESET);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -940,8 +924,8 @@
 	struct tcm_loop_nexus *tl_nexus;
 	int ret = -ENOMEM;
 
-	if (tl_tpg->tl_hba->tl_nexus) {
-		pr_debug("tl_tpg->tl_hba->tl_nexus already exists\n");
+	if (tl_tpg->tl_nexus) {
+		pr_debug("tl_tpg->tl_nexus already exists\n");
 		return -EEXIST;
 	}
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -976,7 +960,7 @@
 	 */
 	__transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
 			tl_nexus->se_sess, tl_nexus);
-	tl_tpg->tl_hba->tl_nexus = tl_nexus;
+	tl_tpg->tl_nexus = tl_nexus;
 	pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
 		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
 		name);
@@ -992,12 +976,8 @@
 {
 	struct se_session *se_sess;
 	struct tcm_loop_nexus *tl_nexus;
-	struct tcm_loop_hba *tl_hba = tpg->tl_hba;
 
-	if (!tl_hba)
-		return -ENODEV;
-
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 
@@ -1013,13 +993,13 @@
 	}
 
 	pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated"
-		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
+		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba),
 		tl_nexus->se_sess->se_node_acl->initiatorname);
 	/*
 	 * Release the SCSI I_T Nexus to the emulated SAS Target Port
 	 */
 	transport_deregister_session(tl_nexus->se_sess);
-	tpg->tl_hba->tl_nexus = NULL;
+	tpg->tl_nexus = NULL;
 	kfree(tl_nexus);
 	return 0;
 }
@@ -1035,7 +1015,7 @@
 	struct tcm_loop_nexus *tl_nexus;
 	ssize_t ret;
 
-	tl_nexus = tl_tpg->tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index 54c59d0..6ae49f2 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -27,11 +27,6 @@
 };
 
 struct tcm_loop_nexus {
-	int it_nexus_active;
-	/*
-	 * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h
-	 */
-	struct scsi_host *sh;
 	/*
 	 * Pointer to TCM session for I_T Nexus
 	 */
@@ -51,6 +46,7 @@
 	atomic_t tl_tpg_port_count;
 	struct se_portal_group tl_se_tpg;
 	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_nexus *tl_nexus;
 };
 
 struct tcm_loop_hba {
@@ -59,7 +55,6 @@
 	struct se_hba_s *se_hba;
 	struct se_lun *tl_hba_lun;
 	struct se_port *tl_hba_lun_sep;
-	struct tcm_loop_nexus *tl_nexus;
 	struct device dev;
 	struct Scsi_Host *sh;
 	struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA];
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 79f9296..75d89ad 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -50,6 +50,19 @@
 #include "target_core_rd.h"
 #include "target_core_xcopy.h"
 
+#define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs)		\
+static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \
+{									\
+	struct target_backend_cits *tbc = &sa->tb_cits;			\
+	struct config_item_type *cit = &tbc->tb_##_name##_cit;		\
+									\
+	cit->ct_item_ops = _item_ops;					\
+	cit->ct_group_ops = _group_ops;					\
+	cit->ct_attrs = _attrs;						\
+	cit->ct_owner = sa->owner;					\
+	pr_debug("Setup generic %s\n", __stringify(_name));		\
+}
+
 extern struct t10_alua_lu_gp *default_lu_gp;
 
 static LIST_HEAD(g_tf_list);
@@ -126,48 +139,57 @@
 
 	pr_debug("Target_Core_ConfigFS: REGISTER -> group: %p name:"
 			" %s\n", group, name);
-	/*
-	 * Below are some hardcoded request_module() calls to automatically
-	 * local fabric modules when the following is called:
-	 *
-	 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
-	 *
-	 * Note that this does not limit which TCM fabric module can be
-	 * registered, but simply provids auto loading logic for modules with
-	 * mkdir(2) system calls with known TCM fabric modules.
-	 */
-	if (!strncmp(name, "iscsi", 5)) {
-		/*
-		 * Automatically load the LIO Target fabric module when the
-		 * following is called:
-		 *
-		 * mkdir -p $CONFIGFS/target/iscsi
-		 */
-		ret = request_module("iscsi_target_mod");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" iscsi_target_mod.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
-	} else if (!strncmp(name, "loopback", 8)) {
-		/*
-		 * Automatically load the tcm_loop fabric module when the
-		 * following is called:
-		 *
-		 * mkdir -p $CONFIGFS/target/loopback
-		 */
-		ret = request_module("tcm_loop");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" tcm_loop.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
-	}
 
 	tf = target_core_get_fabric(name);
 	if (!tf) {
-		pr_err("target_core_get_fabric() failed for %s\n",
+		pr_err("target_core_register_fabric() trying autoload for %s\n",
 			name);
+
+		/*
+		 * Below are some hardcoded request_module() calls to automatically
+		 * local fabric modules when the following is called:
+		 *
+		 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
+		 *
+		 * Note that this does not limit which TCM fabric module can be
+		 * registered, but simply provids auto loading logic for modules with
+		 * mkdir(2) system calls with known TCM fabric modules.
+		 */
+
+		if (!strncmp(name, "iscsi", 5)) {
+			/*
+			 * Automatically load the LIO Target fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/iscsi
+			 */
+			ret = request_module("iscsi_target_mod");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " iscsi_target_mod.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
+		} else if (!strncmp(name, "loopback", 8)) {
+			/*
+			 * Automatically load the tcm_loop fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/loopback
+			 */
+			ret = request_module("tcm_loop");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " tcm_loop.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
+		}
+
+		tf = target_core_get_fabric(name);
+	}
+
+	if (!tf) {
+		pr_err("target_core_get_fabric() failed for %s\n",
+		       name);
 		return ERR_PTR(-EINVAL);
 	}
 	pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
@@ -562,198 +584,21 @@
 // Stop functions called by external Target Fabrics Modules
 //############################################################################*/
 
-/* Start functions for struct config_item_type target_core_dev_attrib_cit */
-
-#define DEF_DEV_ATTRIB_SHOW(_name)					\
-static ssize_t target_core_dev_show_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	char *page)							\
-{									\
-	return snprintf(page, PAGE_SIZE, "%u\n",			\
-		(u32)da->da_dev->dev_attrib._name);			\
-}
-
-#define DEF_DEV_ATTRIB_STORE(_name)					\
-static ssize_t target_core_dev_store_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	const char *page,						\
-	size_t count)							\
-{									\
-	unsigned long val;						\
-	int ret;							\
-									\
-	ret = kstrtoul(page, 0, &val);				\
-	if (ret < 0) {							\
-		pr_err("kstrtoul() failed with"		\
-			" ret: %d\n", ret);				\
-		return -EINVAL;						\
-	}								\
-	ret = se_dev_set_##_name(da->da_dev, (u32)val);			\
-									\
-	return (!ret) ? count : -EINVAL;				\
-}
-
-#define DEF_DEV_ATTRIB(_name)						\
-DEF_DEV_ATTRIB_SHOW(_name);						\
-DEF_DEV_ATTRIB_STORE(_name);
-
-#define DEF_DEV_ATTRIB_RO(_name)					\
-DEF_DEV_ATTRIB_SHOW(_name);
+/* Start functions for struct config_item_type tb_dev_attrib_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib);
-#define SE_DEV_ATTR(_name, _mode)					\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-		__CONFIGFS_EATTR(_name, _mode,				\
-		target_core_dev_show_attr_##_name,			\
-		target_core_dev_store_attr_##_name);
-
-#define SE_DEV_ATTR_RO(_name);						\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-	__CONFIGFS_EATTR_RO(_name,					\
-	target_core_dev_show_attr_##_name);
-
-DEF_DEV_ATTRIB(emulate_model_alias);
-SE_DEV_ATTR(emulate_model_alias, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_dpo);
-SE_DEV_ATTR(emulate_dpo, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_write);
-SE_DEV_ATTR(emulate_fua_write, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_read);
-SE_DEV_ATTR(emulate_fua_read, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_write_cache);
-SE_DEV_ATTR(emulate_write_cache, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_ua_intlck_ctrl);
-SE_DEV_ATTR(emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tas);
-SE_DEV_ATTR(emulate_tas, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpu);
-SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpws);
-SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_caw);
-SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_3pc);
-SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(pi_prot_type);
-SE_DEV_ATTR(pi_prot_type, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_pi_prot_type);
-SE_DEV_ATTR_RO(hw_pi_prot_type);
-
-DEF_DEV_ATTRIB(pi_prot_format);
-SE_DEV_ATTR(pi_prot_format, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(enforce_pr_isids);
-SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(is_nonrot);
-SE_DEV_ATTR(is_nonrot, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_rest_reord);
-SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(force_pr_aptpl);
-SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_block_size);
-SE_DEV_ATTR_RO(hw_block_size);
-
-DEF_DEV_ATTRIB(block_size);
-SE_DEV_ATTR(block_size, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_max_sectors);
-SE_DEV_ATTR_RO(hw_max_sectors);
-
-DEF_DEV_ATTRIB(fabric_max_sectors);
-SE_DEV_ATTR(fabric_max_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(optimal_sectors);
-SE_DEV_ATTR(optimal_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_queue_depth);
-SE_DEV_ATTR_RO(hw_queue_depth);
-
-DEF_DEV_ATTRIB(queue_depth);
-SE_DEV_ATTR(queue_depth, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_lba_count);
-SE_DEV_ATTR(max_unmap_lba_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_block_desc_count);
-SE_DEV_ATTR(max_unmap_block_desc_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity);
-SE_DEV_ATTR(unmap_granularity, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity_alignment);
-SE_DEV_ATTR(unmap_granularity_alignment, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_write_same_len);
-SE_DEV_ATTR(max_write_same_len, S_IRUGO | S_IWUSR);
-
 CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group);
 
-static struct configfs_attribute *target_core_dev_attrib_attrs[] = {
-	&target_core_dev_attrib_emulate_model_alias.attr,
-	&target_core_dev_attrib_emulate_dpo.attr,
-	&target_core_dev_attrib_emulate_fua_write.attr,
-	&target_core_dev_attrib_emulate_fua_read.attr,
-	&target_core_dev_attrib_emulate_write_cache.attr,
-	&target_core_dev_attrib_emulate_ua_intlck_ctrl.attr,
-	&target_core_dev_attrib_emulate_tas.attr,
-	&target_core_dev_attrib_emulate_tpu.attr,
-	&target_core_dev_attrib_emulate_tpws.attr,
-	&target_core_dev_attrib_emulate_caw.attr,
-	&target_core_dev_attrib_emulate_3pc.attr,
-	&target_core_dev_attrib_pi_prot_type.attr,
-	&target_core_dev_attrib_hw_pi_prot_type.attr,
-	&target_core_dev_attrib_pi_prot_format.attr,
-	&target_core_dev_attrib_enforce_pr_isids.attr,
-	&target_core_dev_attrib_force_pr_aptpl.attr,
-	&target_core_dev_attrib_is_nonrot.attr,
-	&target_core_dev_attrib_emulate_rest_reord.attr,
-	&target_core_dev_attrib_hw_block_size.attr,
-	&target_core_dev_attrib_block_size.attr,
-	&target_core_dev_attrib_hw_max_sectors.attr,
-	&target_core_dev_attrib_fabric_max_sectors.attr,
-	&target_core_dev_attrib_optimal_sectors.attr,
-	&target_core_dev_attrib_hw_queue_depth.attr,
-	&target_core_dev_attrib_queue_depth.attr,
-	&target_core_dev_attrib_max_unmap_lba_count.attr,
-	&target_core_dev_attrib_max_unmap_block_desc_count.attr,
-	&target_core_dev_attrib_unmap_granularity.attr,
-	&target_core_dev_attrib_unmap_granularity_alignment.attr,
-	&target_core_dev_attrib_max_write_same_len.attr,
-	NULL,
-};
-
 static struct configfs_item_operations target_core_dev_attrib_ops = {
 	.show_attribute		= target_core_dev_attrib_attr_show,
 	.store_attribute	= target_core_dev_attrib_attr_store,
 };
 
-static struct config_item_type target_core_dev_attrib_cit = {
-	.ct_item_ops		= &target_core_dev_attrib_ops,
-	.ct_attrs		= target_core_dev_attrib_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL);
 
-/* End functions for struct config_item_type target_core_dev_attrib_cit */
+/* End functions for struct config_item_type tb_dev_attrib_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_wwn_cit */
+/*  Start functions for struct config_item_type tb_dev_wwn_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_wwn, t10_wwn);
 #define SE_DEV_WWN_ATTR(_name, _mode)					\
@@ -984,15 +829,11 @@
 	.store_attribute	= target_core_dev_wwn_attr_store,
 };
 
-static struct config_item_type target_core_dev_wwn_cit = {
-	.ct_item_ops		= &target_core_dev_wwn_ops,
-	.ct_attrs		= target_core_dev_wwn_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_wwn, &target_core_dev_wwn_ops, NULL, target_core_dev_wwn_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_wwn_cit */
+/*  End functions for struct config_item_type tb_dev_wwn_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_pr_cit */
+/*  Start functions for struct config_item_type tb_dev_pr_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_pr, se_device);
 #define SE_DEV_PR_ATTR(_name, _mode)					\
@@ -1453,15 +1294,11 @@
 	.store_attribute	= target_core_dev_pr_attr_store,
 };
 
-static struct config_item_type target_core_dev_pr_cit = {
-	.ct_item_ops		= &target_core_dev_pr_ops,
-	.ct_attrs		= target_core_dev_pr_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_pr_cit */
+/*  End functions for struct config_item_type tb_dev_pr_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_cit */
+/*  Start functions for struct config_item_type tb_dev_cit */
 
 static ssize_t target_core_show_dev_info(void *p, char *page)
 {
@@ -1925,7 +1762,7 @@
 	.store	= target_core_store_dev_lba_map,
 };
 
-static struct configfs_attribute *lio_core_dev_attrs[] = {
+static struct configfs_attribute *target_core_dev_attrs[] = {
 	&target_core_attr_dev_info.attr,
 	&target_core_attr_dev_control.attr,
 	&target_core_attr_dev_alias.attr,
@@ -1984,13 +1821,9 @@
 	.store_attribute	= target_core_dev_store,
 };
 
-static struct config_item_type target_core_dev_cit = {
-	.ct_item_ops		= &target_core_dev_item_ops,
-	.ct_attrs		= lio_core_dev_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev, &target_core_dev_item_ops, NULL, target_core_dev_attrs);
 
-/* End functions for struct config_item_type target_core_dev_cit */
+/* End functions for struct config_item_type tb_dev_cit */
 
 /* Start functions for struct config_item_type target_core_alua_lu_gp_cit */
 
@@ -2670,7 +2503,7 @@
 
 /* End functions for struct config_item_type target_core_alua_tg_pt_gp_cit */
 
-/* Start functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* Start functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 static struct config_group *target_core_alua_create_tg_pt_gp(
 	struct config_group *group,
@@ -2721,12 +2554,9 @@
 	.drop_item		= &target_core_alua_drop_tg_pt_gp,
 };
 
-static struct config_item_type target_core_alua_tg_pt_gps_cit = {
-	.ct_group_ops		= &target_core_alua_tg_pt_gps_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_alua_tg_pt_gps, NULL, &target_core_alua_tg_pt_gps_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* End functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 /* Start functions for struct config_item_type target_core_alua_cit */
 
@@ -2744,7 +2574,7 @@
 
 /* End functions for struct config_item_type target_core_alua_cit */
 
-/* Start functions for struct config_item_type target_core_stat_cit */
+/* Start functions for struct config_item_type tb_dev_stat_cit */
 
 static struct config_group *target_core_stat_mkdir(
 	struct config_group *group,
@@ -2765,12 +2595,9 @@
 	.drop_item		= &target_core_stat_rmdir,
 };
 
-static struct config_item_type target_core_stat_cit = {
-	.ct_group_ops		= &target_core_stat_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_stat, NULL, &target_core_stat_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_stat_cit */
+/* End functions for struct config_item_type tb_dev_stat_cit */
 
 /* Start functions for struct config_item_type target_core_hba_cit */
 
@@ -2806,17 +2633,17 @@
 	if (!dev_cg->default_groups)
 		goto out_free_device;
 
-	config_group_init_type_name(dev_cg, name, &target_core_dev_cit);
+	config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit);
 	config_group_init_type_name(&dev->dev_attrib.da_group, "attrib",
-			&target_core_dev_attrib_cit);
+			&t->tb_cits.tb_dev_attrib_cit);
 	config_group_init_type_name(&dev->dev_pr_group, "pr",
-			&target_core_dev_pr_cit);
+			&t->tb_cits.tb_dev_pr_cit);
 	config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn",
-			&target_core_dev_wwn_cit);
+			&t->tb_cits.tb_dev_wwn_cit);
 	config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group,
-			"alua", &target_core_alua_tg_pt_gps_cit);
+			"alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit);
 	config_group_init_type_name(&dev->dev_stat_grps.stat_group,
-			"statistics", &target_core_stat_cit);
+			"statistics", &t->tb_cits.tb_dev_stat_cit);
 
 	dev_cg->default_groups[0] = &dev->dev_attrib.da_group;
 	dev_cg->default_groups[1] = &dev->dev_pr_group;
@@ -3110,6 +2937,17 @@
 
 /* Stop functions for struct config_item_type target_core_hba_cit */
 
+void target_core_setup_sub_cits(struct se_subsystem_api *sa)
+{
+	target_core_setup_dev_cit(sa);
+	target_core_setup_dev_attrib_cit(sa);
+	target_core_setup_dev_pr_cit(sa);
+	target_core_setup_dev_wwn_cit(sa);
+	target_core_setup_dev_alua_tg_pt_gps_cit(sa);
+	target_core_setup_dev_stat_cit(sa);
+}
+EXPORT_SYMBOL(target_core_setup_sub_cits);
+
 static int __init target_core_init_configfs(void)
 {
 	struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index c45f9e9..7653cfb 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -659,6 +659,7 @@
 			dev, dev->dev_attrib.max_unmap_lba_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count);
 
 int se_dev_set_max_unmap_block_desc_count(
 	struct se_device *dev,
@@ -670,6 +671,7 @@
 			dev, dev->dev_attrib.max_unmap_block_desc_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count);
 
 int se_dev_set_unmap_granularity(
 	struct se_device *dev,
@@ -680,6 +682,7 @@
 			dev, dev->dev_attrib.unmap_granularity);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity);
 
 int se_dev_set_unmap_granularity_alignment(
 	struct se_device *dev,
@@ -690,6 +693,7 @@
 			dev, dev->dev_attrib.unmap_granularity_alignment);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment);
 
 int se_dev_set_max_write_same_len(
 	struct se_device *dev,
@@ -700,6 +704,7 @@
 			dev, dev->dev_attrib.max_write_same_len);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_write_same_len);
 
 static void dev_set_t10_wwn_model_alias(struct se_device *dev)
 {
@@ -738,6 +743,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_model_alias);
 
 int se_dev_set_emulate_dpo(struct se_device *dev, int flag)
 {
@@ -753,6 +759,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_dpo);
 
 int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
 {
@@ -760,17 +767,12 @@
 		pr_err("Illegal value %d\n", flag);
 		return -EINVAL;
 	}
-
-	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_fua_write not supported for pSCSI\n");
-		return -EINVAL;
-	}
 	dev->dev_attrib.emulate_fua_write = flag;
 	pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n",
 			dev, dev->dev_attrib.emulate_fua_write);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_write);
 
 int se_dev_set_emulate_fua_read(struct se_device *dev, int flag)
 {
@@ -786,6 +788,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
 
 int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 {
@@ -794,11 +797,6 @@
 		return -EINVAL;
 	}
 	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_write_cache not supported for pSCSI\n");
-		return -EINVAL;
-	}
-	if (flag &&
 	    dev->transport->get_write_cache) {
 		pr_err("emulate_write_cache not supported for this device\n");
 		return -EINVAL;
@@ -809,6 +807,7 @@
 			dev, dev->dev_attrib.emulate_write_cache);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_write_cache);
 
 int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag)
 {
@@ -829,6 +828,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl);
 
 int se_dev_set_emulate_tas(struct se_device *dev, int flag)
 {
@@ -849,6 +849,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tas);
 
 int se_dev_set_emulate_tpu(struct se_device *dev, int flag)
 {
@@ -870,6 +871,7 @@
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpu);
 
 int se_dev_set_emulate_tpws(struct se_device *dev, int flag)
 {
@@ -891,6 +893,7 @@
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpws);
 
 int se_dev_set_emulate_caw(struct se_device *dev, int flag)
 {
@@ -904,6 +907,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_caw);
 
 int se_dev_set_emulate_3pc(struct se_device *dev, int flag)
 {
@@ -917,6 +921,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_3pc);
 
 int se_dev_set_pi_prot_type(struct se_device *dev, int flag)
 {
@@ -970,6 +975,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_type);
 
 int se_dev_set_pi_prot_format(struct se_device *dev, int flag)
 {
@@ -1005,6 +1011,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_format);
 
 int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
 {
@@ -1017,6 +1024,7 @@
 		(dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled");
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_enforce_pr_isids);
 
 int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
 {
@@ -1034,6 +1042,7 @@
 	pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_force_pr_aptpl);
 
 int se_dev_set_is_nonrot(struct se_device *dev, int flag)
 {
@@ -1046,6 +1055,7 @@
 	       dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_is_nonrot);
 
 int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag)
 {
@@ -1058,6 +1068,7 @@
 	pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_rest_reord);
 
 /*
  * Note, this can only be called on unexported SE Device Object.
@@ -1076,31 +1087,21 @@
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
+	if (queue_depth > dev->dev_attrib.queue_depth) {
 		if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-			pr_err("dev[%p]: Passed queue_depth: %u"
-				" exceeds TCM/SE_Device TCQ: %u\n",
-				dev, queue_depth,
+			pr_err("dev[%p]: Passed queue_depth:"
+				" %u exceeds TCM/SE_Device MAX"
+				" TCQ: %u\n", dev, queue_depth,
 				dev->dev_attrib.hw_queue_depth);
 			return -EINVAL;
 		}
-	} else {
-		if (queue_depth > dev->dev_attrib.queue_depth) {
-			if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-				pr_err("dev[%p]: Passed queue_depth:"
-					" %u exceeds TCM/SE_Device MAX"
-					" TCQ: %u\n", dev, queue_depth,
-					dev->dev_attrib.hw_queue_depth);
-				return -EINVAL;
-			}
-		}
 	}
-
 	dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth;
 	pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n",
 			dev, queue_depth);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_queue_depth);
 
 int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
 {
@@ -1123,22 +1124,12 @@
 				DA_STATUS_MAX_SECTORS_MIN);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		if (fabric_max_sectors > dev->dev_attrib.hw_max_sectors) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than TCM/SE_Device max_sectors:"
-				" %u\n", dev, fabric_max_sectors,
-				dev->dev_attrib.hw_max_sectors);
-			 return -EINVAL;
-		}
-	} else {
-		if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than DA_STATUS_MAX_SECTORS_MAX:"
-				" %u\n", dev, fabric_max_sectors,
-				DA_STATUS_MAX_SECTORS_MAX);
-			return -EINVAL;
-		}
+	if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
+		pr_err("dev[%p]: Passed fabric_max_sectors: %u"
+			" greater than DA_STATUS_MAX_SECTORS_MAX:"
+			" %u\n", dev, fabric_max_sectors,
+			DA_STATUS_MAX_SECTORS_MAX);
+		return -EINVAL;
 	}
 	/*
 	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
@@ -1155,6 +1146,7 @@
 			dev, fabric_max_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_fabric_max_sectors);
 
 int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 {
@@ -1164,11 +1156,6 @@
 			dev, dev->export_count);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Passed optimal_sectors cannot be"
-				" changed for TCM/pSCSI\n", dev);
-		return -EINVAL;
-	}
 	if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) {
 		pr_err("dev[%p]: Passed optimal_sectors %u cannot be"
 			" greater than fabric_max_sectors: %u\n", dev,
@@ -1181,6 +1168,7 @@
 			dev, optimal_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_optimal_sectors);
 
 int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 {
@@ -1201,13 +1189,6 @@
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Not allowed to change block_size for"
-			" Physical Device, use for Linux/SCSI to change"
-			" block_size for underlying hardware\n", dev);
-		return -EINVAL;
-	}
-
 	dev->dev_attrib.block_size = block_size;
 	pr_debug("dev[%p]: SE Device block_size changed to %u\n",
 			dev, block_size);
@@ -1218,6 +1199,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_block_size);
 
 struct se_lun *core_dev_add_lun(
 	struct se_portal_group *tpg,
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 72c83d9..c2aea09 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -37,6 +37,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_file.h"
 
@@ -934,6 +935,42 @@
 	return sbc_parse_cdb(cmd, &fd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(fileio);
+
+static struct configfs_attribute *fileio_backend_dev_attrs[] = {
+	&fileio_dev_attrib_emulate_model_alias.attr,
+	&fileio_dev_attrib_emulate_dpo.attr,
+	&fileio_dev_attrib_emulate_fua_write.attr,
+	&fileio_dev_attrib_emulate_fua_read.attr,
+	&fileio_dev_attrib_emulate_write_cache.attr,
+	&fileio_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&fileio_dev_attrib_emulate_tas.attr,
+	&fileio_dev_attrib_emulate_tpu.attr,
+	&fileio_dev_attrib_emulate_tpws.attr,
+	&fileio_dev_attrib_emulate_caw.attr,
+	&fileio_dev_attrib_emulate_3pc.attr,
+	&fileio_dev_attrib_pi_prot_type.attr,
+	&fileio_dev_attrib_hw_pi_prot_type.attr,
+	&fileio_dev_attrib_pi_prot_format.attr,
+	&fileio_dev_attrib_enforce_pr_isids.attr,
+	&fileio_dev_attrib_is_nonrot.attr,
+	&fileio_dev_attrib_emulate_rest_reord.attr,
+	&fileio_dev_attrib_force_pr_aptpl.attr,
+	&fileio_dev_attrib_hw_block_size.attr,
+	&fileio_dev_attrib_block_size.attr,
+	&fileio_dev_attrib_hw_max_sectors.attr,
+	&fileio_dev_attrib_fabric_max_sectors.attr,
+	&fileio_dev_attrib_optimal_sectors.attr,
+	&fileio_dev_attrib_hw_queue_depth.attr,
+	&fileio_dev_attrib_queue_depth.attr,
+	&fileio_dev_attrib_max_unmap_lba_count.attr,
+	&fileio_dev_attrib_max_unmap_block_desc_count.attr,
+	&fileio_dev_attrib_unmap_granularity.attr,
+	&fileio_dev_attrib_unmap_granularity_alignment.attr,
+	&fileio_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api fileio_template = {
 	.name			= "fileio",
 	.inquiry_prod		= "FILEIO",
@@ -957,6 +994,11 @@
 
 static int __init fileio_module_init(void)
 {
+	struct target_backend_cits *tbc = &fileio_template.tb_cits;
+
+	target_core_setup_sub_cits(&fileio_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs;
+
 	return transport_subsystem_register(&fileio_template);
 }
 
diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index a25051a..ff95f95 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c
@@ -36,6 +36,7 @@
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
 #include <target/target_core_fabric.h>
+#include <target/target_core_configfs.h>
 
 #include "target_core_internal.h"
 
@@ -137,8 +138,7 @@
 	return hba;
 
 out_module_put:
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 	hba->transport = NULL;
 out_free_hba:
 	kfree(hba);
@@ -159,8 +159,7 @@
 	pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target"
 			" Core\n", hba->hba_id);
 
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 
 	hba->transport = NULL;
 	kfree(hba);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7e6b857..3efff94 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -41,6 +41,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_iblock.h"
 
@@ -858,6 +859,42 @@
 	return q->flush_flags & REQ_FLUSH;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(iblock);
+
+static struct configfs_attribute *iblock_backend_dev_attrs[] = {
+	&iblock_dev_attrib_emulate_model_alias.attr,
+	&iblock_dev_attrib_emulate_dpo.attr,
+	&iblock_dev_attrib_emulate_fua_write.attr,
+	&iblock_dev_attrib_emulate_fua_read.attr,
+	&iblock_dev_attrib_emulate_write_cache.attr,
+	&iblock_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&iblock_dev_attrib_emulate_tas.attr,
+	&iblock_dev_attrib_emulate_tpu.attr,
+	&iblock_dev_attrib_emulate_tpws.attr,
+	&iblock_dev_attrib_emulate_caw.attr,
+	&iblock_dev_attrib_emulate_3pc.attr,
+	&iblock_dev_attrib_pi_prot_type.attr,
+	&iblock_dev_attrib_hw_pi_prot_type.attr,
+	&iblock_dev_attrib_pi_prot_format.attr,
+	&iblock_dev_attrib_enforce_pr_isids.attr,
+	&iblock_dev_attrib_is_nonrot.attr,
+	&iblock_dev_attrib_emulate_rest_reord.attr,
+	&iblock_dev_attrib_force_pr_aptpl.attr,
+	&iblock_dev_attrib_hw_block_size.attr,
+	&iblock_dev_attrib_block_size.attr,
+	&iblock_dev_attrib_hw_max_sectors.attr,
+	&iblock_dev_attrib_fabric_max_sectors.attr,
+	&iblock_dev_attrib_optimal_sectors.attr,
+	&iblock_dev_attrib_hw_queue_depth.attr,
+	&iblock_dev_attrib_queue_depth.attr,
+	&iblock_dev_attrib_max_unmap_lba_count.attr,
+	&iblock_dev_attrib_max_unmap_block_desc_count.attr,
+	&iblock_dev_attrib_unmap_granularity.attr,
+	&iblock_dev_attrib_unmap_granularity_alignment.attr,
+	&iblock_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api iblock_template = {
 	.name			= "iblock",
 	.inquiry_prod		= "IBLOCK",
@@ -883,6 +920,11 @@
 
 static int __init iblock_module_init(void)
 {
+	struct target_backend_cits *tbc = &iblock_template.tb_cits;
+
+	target_core_setup_sub_cits(&iblock_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs;
+
 	return transport_subsystem_register(&iblock_template);
 }
 
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index e31f42f..60381db 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -18,34 +18,6 @@
 		struct se_lun *);
 void	core_dev_unexport(struct se_device *, struct se_portal_group *,
 		struct se_lun *);
-int	se_dev_set_task_timeout(struct se_device *, u32);
-int	se_dev_set_max_unmap_lba_count(struct se_device *, u32);
-int	se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
-int	se_dev_set_unmap_granularity(struct se_device *, u32);
-int	se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
-int	se_dev_set_max_write_same_len(struct se_device *, u32);
-int	se_dev_set_emulate_model_alias(struct se_device *, int);
-int	se_dev_set_emulate_dpo(struct se_device *, int);
-int	se_dev_set_emulate_fua_write(struct se_device *, int);
-int	se_dev_set_emulate_fua_read(struct se_device *, int);
-int	se_dev_set_emulate_write_cache(struct se_device *, int);
-int	se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
-int	se_dev_set_emulate_tas(struct se_device *, int);
-int	se_dev_set_emulate_tpu(struct se_device *, int);
-int	se_dev_set_emulate_tpws(struct se_device *, int);
-int	se_dev_set_emulate_caw(struct se_device *, int);
-int	se_dev_set_emulate_3pc(struct se_device *, int);
-int	se_dev_set_pi_prot_type(struct se_device *, int);
-int	se_dev_set_pi_prot_format(struct se_device *, int);
-int	se_dev_set_enforce_pr_isids(struct se_device *, int);
-int	se_dev_set_force_pr_aptpl(struct se_device *, int);
-int	se_dev_set_is_nonrot(struct se_device *, int);
-int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
-int	se_dev_set_queue_depth(struct se_device *, u32);
-int	se_dev_set_max_sectors(struct se_device *, u32);
-int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
-int	se_dev_set_optimal_sectors(struct se_device *, u32);
-int	se_dev_set_block_size(struct se_device *, u32);
 struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32);
 void	core_dev_del_lun(struct se_portal_group *, struct se_lun *);
 struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 4c261c3..d56f2aa 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -76,7 +76,7 @@
 };
 
 static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *,
-			struct t10_pr_registration *, int);
+					      struct t10_pr_registration *, int, int);
 
 static sense_reason_t
 target_scsi2_reservation_check(struct se_cmd *cmd)
@@ -1177,7 +1177,7 @@
 		 *    service action with the SERVICE ACTION RESERVATION KEY
 		 *    field set to zero (see 5.7.11.3).
 		 */
-		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0);
+		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0, 1);
 		ret = 1;
 		/*
 		 * For 'All Registrants' reservation types, all existing
@@ -1219,7 +1219,8 @@
 
 	pr_reg->pr_reg_deve->def_pr_registered = 0;
 	pr_reg->pr_reg_deve->pr_res_key = 0;
-	list_del(&pr_reg->pr_reg_list);
+	if (!list_empty(&pr_reg->pr_reg_list))
+		list_del(&pr_reg->pr_reg_list);
 	/*
 	 * Caller accessing *pr_reg using core_scsi3_locate_pr_reg(),
 	 * so call core_scsi3_put_pr_reg() to decrement our reference.
@@ -1271,6 +1272,7 @@
 {
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_res_holder;
+	bool free_reg = false;
 	/*
 	 * If the passed se_node_acl matches the reservation holder,
 	 * release the reservation.
@@ -1278,13 +1280,18 @@
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if ((pr_res_holder != NULL) &&
-	    (pr_res_holder->pr_reg_nacl == nacl))
-		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0);
+	    (pr_res_holder->pr_reg_nacl == nacl)) {
+		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0, 1);
+		free_reg = true;
+	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
 	 * Release any registration associated with the struct se_node_acl.
 	 */
 	spin_lock(&pr_tmpl->registration_lock);
+	if (pr_res_holder && free_reg)
+		__core_scsi3_free_registration(dev, pr_res_holder, NULL, 0);
+
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
 
@@ -1307,7 +1314,7 @@
 	if (pr_res_holder != NULL) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-				pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -1429,14 +1436,12 @@
 	struct target_core_fabric_ops *tmp_tf_ops;
 	unsigned char *buf;
 	unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	sense_reason_t ret;
 	u32 tpdl, tid_len = 0;
 	int dest_local_nexus;
 	u32 dest_rtpi = 0;
 
-	memset(dest_iport, 0, 64);
-
 	local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Allocate a struct pr_transport_id_holder and setup the
@@ -2105,13 +2110,13 @@
 		/*
 		 * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus.
 		 */
-		pr_holder = core_scsi3_check_implicit_release(
-				cmd->se_dev, pr_reg);
+		type = pr_reg->pr_res_type;
+		pr_holder = core_scsi3_check_implicit_release(cmd->se_dev,
+							      pr_reg);
 		if (pr_holder < 0) {
 			ret = TCM_RESERVATION_CONFLICT;
 			goto out;
 		}
-		type = pr_reg->pr_res_type;
 
 		spin_lock(&pr_tmpl->registration_lock);
 		/*
@@ -2269,6 +2274,7 @@
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if (pr_res_holder) {
+		int pr_res_type = pr_res_holder->pr_res_type;
 		/*
 		 * From spc4r17 Section 5.7.9: Reserving:
 		 *
@@ -2279,7 +2285,9 @@
 		 * the logical unit, then the command shall be completed with
 		 * RESERVATION CONFLICT status.
 		 */
-		if (pr_res_holder != pr_reg) {
+		if ((pr_res_holder != pr_reg) &&
+		    (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) &&
+		    (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
 			struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 			pr_err("SPC-3 PR: Attempted RESERVE from"
 				" [%s]: %s while reservation already held by"
@@ -2385,23 +2393,59 @@
 	struct se_device *dev,
 	struct se_node_acl *se_nacl,
 	struct t10_pr_registration *pr_reg,
-	int explicit)
+	int explicit,
+	int unreg)
 {
 	struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
 	char i_buf[PR_REG_ISID_ID_LEN];
+	int pr_res_type = 0, pr_res_scope = 0;
 
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN);
 	/*
 	 * Go ahead and release the current PR reservation holder.
+	 * If an All Registrants reservation is currently active and
+	 * a unregister operation is requested, replace the current
+	 * dev_pr_res_holder with another active registration.
 	 */
-	dev->dev_pr_res_holder = NULL;
+	if (dev->dev_pr_res_holder) {
+		pr_res_type = dev->dev_pr_res_holder->pr_res_type;
+		pr_res_scope = dev->dev_pr_res_holder->pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_type = 0;
+		dev->dev_pr_res_holder->pr_res_scope = 0;
+		dev->dev_pr_res_holder->pr_res_holder = 0;
+		dev->dev_pr_res_holder = NULL;
+	}
+	if (!unreg)
+		goto out;
 
-	pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
-		" reservation holder TYPE: %s ALL_TG_PT: %d\n",
-		tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit",
-		core_scsi3_pr_dump_type(pr_reg->pr_res_type),
-		(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	spin_lock(&dev->t10_pr.registration_lock);
+	list_del_init(&pr_reg->pr_reg_list);
+	/*
+	 * If the I_T nexus is a reservation holder, the persistent reservation
+	 * is of an all registrants type, and the I_T nexus is the last remaining
+	 * registered I_T nexus, then the device server shall also release the
+	 * persistent reservation.
+	 */
+	if (!list_empty(&dev->t10_pr.registration_list) &&
+	    ((pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
+	     (pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))) {
+		dev->dev_pr_res_holder =
+			list_entry(dev->t10_pr.registration_list.next,
+				   struct t10_pr_registration, pr_reg_list);
+		dev->dev_pr_res_holder->pr_res_type = pr_res_type;
+		dev->dev_pr_res_holder->pr_res_scope = pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_holder = 1;
+	}
+	spin_unlock(&dev->t10_pr.registration_lock);
+out:
+	if (!dev->dev_pr_res_holder) {
+		pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
+			" reservation holder TYPE: %s ALL_TG_PT: %d\n",
+			tfo->get_fabric_name(), (explicit) ? "explicit" :
+			"implicit", core_scsi3_pr_dump_type(pr_res_type),
+			(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	}
 	pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n",
 		tfo->get_fabric_name(), se_nacl->initiatorname,
 		i_buf);
@@ -2532,7 +2576,7 @@
 	 *    server shall not establish a unit attention condition.
 	 */
 	__core_scsi3_complete_pro_release(dev, se_sess->se_node_acl,
-			pr_reg, 1);
+					  pr_reg, 1, 0);
 
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -2620,7 +2664,7 @@
 	if (pr_res_holder) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
@@ -2679,7 +2723,7 @@
 	 */
 	if (dev->dev_pr_res_holder)
 		__core_scsi3_complete_pro_release(dev, nacl,
-				dev->dev_pr_res_holder, 0);
+						  dev->dev_pr_res_holder, 0, 0);
 
 	dev->dev_pr_res_holder = pr_reg;
 	pr_reg->pr_res_holder = 1;
@@ -2924,8 +2968,8 @@
 	 */
 	if (pr_reg_n != pr_res_holder)
 		__core_scsi3_complete_pro_release(dev,
-				pr_res_holder->pr_reg_nacl,
-				dev->dev_pr_res_holder, 0);
+						  pr_res_holder->pr_reg_nacl,
+						  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * b) Remove the registrations for all I_T nexuses identified
 	 *    by the SERVICE ACTION RESERVATION KEY field, except the
@@ -3059,7 +3103,7 @@
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	unsigned char *buf;
 	unsigned char *initiator_str;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	u32 tid_len, tmp_tid_len;
 	int new_reg = 0, type, scope, matching_iname;
 	sense_reason_t ret;
@@ -3071,7 +3115,6 @@
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 
-	memset(dest_iport, 0, 64);
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	se_tpg = se_sess->se_tpg;
 	tf_ops = se_tpg->se_tpg_tfo;
@@ -3389,7 +3432,7 @@
 	 *    holder (i.e., the I_T nexus on which the
 	 */
 	__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			dev->dev_pr_res_holder, 0);
+					  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * g) Move the persistent reservation to the specified I_T nexus using
 	 *    the same scope and type as the persistent reservation released in
@@ -3837,7 +3880,8 @@
 	unsigned char *buf;
 	u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len;
 	u32 off = 8; /* off into first Full Status descriptor */
-	int format_code = 0;
+	int format_code = 0, pr_res_type = 0, pr_res_scope = 0;
+	bool all_reg = false;
 
 	if (cmd->data_length < 8) {
 		pr_err("PRIN SA READ_FULL_STATUS SCSI Data Length: %u"
@@ -3854,6 +3898,19 @@
 	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
 	buf[3] = (dev->t10_pr.pr_generation & 0xff);
 
+	spin_lock(&dev->dev_reservation_lock);
+	if (dev->dev_pr_res_holder) {
+		struct t10_pr_registration *pr_holder = dev->dev_pr_res_holder;
+
+		if (pr_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG ||
+		    pr_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG) {
+			all_reg = true;
+			pr_res_type = pr_holder->pr_res_type;
+			pr_res_scope = pr_holder->pr_res_scope;
+		}
+	}
+	spin_unlock(&dev->dev_reservation_lock);
+
 	spin_lock(&pr_tmpl->registration_lock);
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
@@ -3901,14 +3958,20 @@
 		 * reservation holder for PR_HOLDER bit.
 		 *
 		 * Also, if this registration is the reservation
-		 * holder, fill in SCOPE and TYPE in the next byte.
+		 * holder or there is an All Registrants reservation
+		 * active, fill in SCOPE and TYPE in the next byte.
 		 */
 		if (pr_reg->pr_res_holder) {
 			buf[off++] |= 0x01;
 			buf[off++] = (pr_reg->pr_res_scope & 0xf0) |
 				     (pr_reg->pr_res_type & 0x0f);
-		} else
+		} else if (all_reg) {
+			buf[off++] |= 0x01;
+			buf[off++] = (pr_res_scope & 0xf0) |
+				     (pr_res_type & 0x0f);
+		} else {
 			off += 2;
+		}
 
 		off += 4; /* Skip over reserved area */
 		/*
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c8291f..74873e4 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -44,6 +44,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_alua.h"
 #include "target_core_pscsi.h"
@@ -1165,6 +1166,26 @@
 	kfree(pt);
 }
 
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type);
+TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size);
+TB_DEV_ATTR_RO(pscsi, hw_block_size);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors);
+TB_DEV_ATTR_RO(pscsi, hw_max_sectors);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth);
+TB_DEV_ATTR_RO(pscsi, hw_queue_depth);
+
+static struct configfs_attribute *pscsi_backend_dev_attrs[] = {
+	&pscsi_dev_attrib_hw_pi_prot_type.attr,
+	&pscsi_dev_attrib_hw_block_size.attr,
+	&pscsi_dev_attrib_hw_max_sectors.attr,
+	&pscsi_dev_attrib_hw_queue_depth.attr,
+	NULL,
+};
+
 static struct se_subsystem_api pscsi_template = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
@@ -1185,6 +1206,11 @@
 
 static int __init pscsi_module_init(void)
 {
+	struct target_backend_cits *tbc = &pscsi_template.tb_cits;
+
+	target_core_setup_sub_cits(&pscsi_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs;
+
 	return transport_subsystem_register(&pscsi_template);
 }
 
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index b920db3..60ebd17 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -34,6 +34,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_rd.h"
 
@@ -632,6 +633,42 @@
 	return sbc_parse_cdb(cmd, &rd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(rd_mcp);
+
+static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = {
+	&rd_mcp_dev_attrib_emulate_model_alias.attr,
+	&rd_mcp_dev_attrib_emulate_dpo.attr,
+	&rd_mcp_dev_attrib_emulate_fua_write.attr,
+	&rd_mcp_dev_attrib_emulate_fua_read.attr,
+	&rd_mcp_dev_attrib_emulate_write_cache.attr,
+	&rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&rd_mcp_dev_attrib_emulate_tas.attr,
+	&rd_mcp_dev_attrib_emulate_tpu.attr,
+	&rd_mcp_dev_attrib_emulate_tpws.attr,
+	&rd_mcp_dev_attrib_emulate_caw.attr,
+	&rd_mcp_dev_attrib_emulate_3pc.attr,
+	&rd_mcp_dev_attrib_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_hw_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_pi_prot_format.attr,
+	&rd_mcp_dev_attrib_enforce_pr_isids.attr,
+	&rd_mcp_dev_attrib_is_nonrot.attr,
+	&rd_mcp_dev_attrib_emulate_rest_reord.attr,
+	&rd_mcp_dev_attrib_force_pr_aptpl.attr,
+	&rd_mcp_dev_attrib_hw_block_size.attr,
+	&rd_mcp_dev_attrib_block_size.attr,
+	&rd_mcp_dev_attrib_hw_max_sectors.attr,
+	&rd_mcp_dev_attrib_fabric_max_sectors.attr,
+	&rd_mcp_dev_attrib_optimal_sectors.attr,
+	&rd_mcp_dev_attrib_hw_queue_depth.attr,
+	&rd_mcp_dev_attrib_queue_depth.attr,
+	&rd_mcp_dev_attrib_max_unmap_lba_count.attr,
+	&rd_mcp_dev_attrib_max_unmap_block_desc_count.attr,
+	&rd_mcp_dev_attrib_unmap_granularity.attr,
+	&rd_mcp_dev_attrib_unmap_granularity_alignment.attr,
+	&rd_mcp_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api rd_mcp_template = {
 	.name			= "rd_mcp",
 	.inquiry_prod		= "RAMDISK-MCP",
@@ -653,8 +690,12 @@
 
 int __init rd_module_init(void)
 {
+	struct target_backend_cits *tbc = &rd_mcp_template.tb_cits;
 	int ret;
 
+	target_core_setup_sub_cits(&rd_mcp_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&rd_mcp_template);
 	if (ret < 0) {
 		return ret;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9a1b314..8bfa61c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,8 @@
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
+
 #include <linux/target_core_user.h>
 
 /*
@@ -1092,6 +1094,42 @@
 	return ret;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(tcmu);
+
+static struct configfs_attribute *tcmu_backend_dev_attrs[] = {
+	&tcmu_dev_attrib_emulate_model_alias.attr,
+	&tcmu_dev_attrib_emulate_dpo.attr,
+	&tcmu_dev_attrib_emulate_fua_write.attr,
+	&tcmu_dev_attrib_emulate_fua_read.attr,
+	&tcmu_dev_attrib_emulate_write_cache.attr,
+	&tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&tcmu_dev_attrib_emulate_tas.attr,
+	&tcmu_dev_attrib_emulate_tpu.attr,
+	&tcmu_dev_attrib_emulate_tpws.attr,
+	&tcmu_dev_attrib_emulate_caw.attr,
+	&tcmu_dev_attrib_emulate_3pc.attr,
+	&tcmu_dev_attrib_pi_prot_type.attr,
+	&tcmu_dev_attrib_hw_pi_prot_type.attr,
+	&tcmu_dev_attrib_pi_prot_format.attr,
+	&tcmu_dev_attrib_enforce_pr_isids.attr,
+	&tcmu_dev_attrib_is_nonrot.attr,
+	&tcmu_dev_attrib_emulate_rest_reord.attr,
+	&tcmu_dev_attrib_force_pr_aptpl.attr,
+	&tcmu_dev_attrib_hw_block_size.attr,
+	&tcmu_dev_attrib_block_size.attr,
+	&tcmu_dev_attrib_hw_max_sectors.attr,
+	&tcmu_dev_attrib_fabric_max_sectors.attr,
+	&tcmu_dev_attrib_optimal_sectors.attr,
+	&tcmu_dev_attrib_hw_queue_depth.attr,
+	&tcmu_dev_attrib_queue_depth.attr,
+	&tcmu_dev_attrib_max_unmap_lba_count.attr,
+	&tcmu_dev_attrib_max_unmap_block_desc_count.attr,
+	&tcmu_dev_attrib_unmap_granularity.attr,
+	&tcmu_dev_attrib_unmap_granularity_alignment.attr,
+	&tcmu_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api tcmu_template = {
 	.name			= "user",
 	.inquiry_prod		= "USER",
@@ -1112,6 +1150,7 @@
 
 static int __init tcmu_module_init(void)
 {
+	struct target_backend_cits *tbc = &tcmu_template.tb_cits;
 	int ret;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
@@ -1134,6 +1173,9 @@
 		goto out_unreg_device;
 	}
 
+	target_core_setup_sub_cits(&tcmu_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&tcmu_template);
 	if (ret)
 		goto out_unreg_genl;
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index b46c706..e98b424 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -435,7 +435,6 @@
 		 * allowed. thus jiffies are updated properly.
 		 */
 		preempt_disable();
-		tick_nohz_idle_enter();
 		/* mwait until target jiffies is reached */
 		while (time_before(jiffies, target_jiffies)) {
 			unsigned long ecx = 1;
@@ -451,7 +450,6 @@
 			start_critical_timings();
 			atomic_inc(&idle_wakeup_counter);
 		}
-		tick_nohz_idle_exit();
 		preempt_enable();
 	}
 	del_timer_sync(&wakeup_timer);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 5174eba..3bb02c6 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <uapi/linux/virtio_config.h>
 
 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 {
@@ -28,13 +29,14 @@
 
 /* Returns vring->num if empty, -ve on error. */
 static inline int __vringh_get_head(const struct vringh *vrh,
-				    int (*getu16)(u16 *val, const u16 *p),
+				    int (*getu16)(const struct vringh *vrh,
+						  u16 *val, const __virtio16 *p),
 				    u16 *last_avail_idx)
 {
 	u16 avail_idx, i, head;
 	int err;
 
-	err = getu16(&avail_idx, &vrh->vring.avail->idx);
+	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
 	if (err) {
 		vringh_bad("Failed to access avail idx at %p",
 			   &vrh->vring.avail->idx);
@@ -49,7 +51,7 @@
 
 	i = *last_avail_idx & (vrh->vring.num - 1);
 
-	err = getu16(&head, &vrh->vring.avail->ring[i]);
+	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
 	if (err) {
 		vringh_bad("Failed to read head: idx %d address %p",
 			   *last_avail_idx, &vrh->vring.avail->ring[i]);
@@ -144,28 +146,32 @@
 }
 
 /* No reason for this code to be inline. */
-static int move_to_indirect(int *up_next, u16 *i, void *addr,
+static int move_to_indirect(const struct vringh *vrh,
+			    int *up_next, u16 *i, void *addr,
 			    const struct vring_desc *desc,
 			    struct vring_desc **descs, int *desc_max)
 {
+	u32 len;
+
 	/* Indirect tables can't have indirect. */
 	if (*up_next != -1) {
 		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
 		return -EINVAL;
 	}
 
-	if (unlikely(desc->len % sizeof(struct vring_desc))) {
+	len = vringh32_to_cpu(vrh, desc->len);
+	if (unlikely(len % sizeof(struct vring_desc))) {
 		vringh_bad("Strange indirect len %u", desc->len);
 		return -EINVAL;
 	}
 
 	/* We will check this when we follow it! */
-	if (desc->flags & VRING_DESC_F_NEXT)
-		*up_next = desc->next;
+	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
+		*up_next = vringh16_to_cpu(vrh, desc->next);
 	else
 		*up_next = -2;
 	*descs = addr;
-	*desc_max = desc->len / sizeof(struct vring_desc);
+	*desc_max = len / sizeof(struct vring_desc);
 
 	/* Now, start at the first indirect. */
 	*i = 0;
@@ -287,22 +293,25 @@
 		if (unlikely(err))
 			goto fail;
 
-		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+		if (unlikely(desc.flags &
+			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
+			u64 a = vringh64_to_cpu(vrh, desc.addr);
+
 			/* Make sure it's OK, and get offset. */
-			len = desc.len;
-			if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+			len = vringh32_to_cpu(vrh, desc.len);
+			if (!rcheck(vrh, a, &len, &range, getrange)) {
 				err = -EINVAL;
 				goto fail;
 			}
 
-			if (unlikely(len != desc.len)) {
+			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
 				slow = true;
 				/* We need to save this range to use offset */
 				slowrange = range;
 			}
 
-			addr = (void *)(long)(desc.addr + range.offset);
-			err = move_to_indirect(&up_next, &i, addr, &desc,
+			addr = (void *)(long)(a + range.offset);
+			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
 					       &descs, &desc_max);
 			if (err)
 				goto fail;
@@ -315,7 +324,7 @@
 			goto fail;
 		}
 
-		if (desc.flags & VRING_DESC_F_WRITE)
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
 			iov = wiov;
 		else {
 			iov = riov;
@@ -336,12 +345,14 @@
 
 	again:
 		/* Make sure it's OK, and get offset. */
-		len = desc.len;
-		if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+		len = vringh32_to_cpu(vrh, desc.len);
+		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
+			    getrange)) {
 			err = -EINVAL;
 			goto fail;
 		}
-		addr = (void *)(unsigned long)(desc.addr + range.offset);
+		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
+					       range.offset);
 
 		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
 			err = resize_iovec(iov, gfp);
@@ -353,14 +364,16 @@
 		iov->iov[iov->used].iov_len = len;
 		iov->used++;
 
-		if (unlikely(len != desc.len)) {
-			desc.len -= len;
-			desc.addr += len;
+		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
+			desc.len = cpu_to_vringh32(vrh,
+				   vringh32_to_cpu(vrh, desc.len) - len);
+			desc.addr = cpu_to_vringh64(vrh,
+				    vringh64_to_cpu(vrh, desc.addr) + len);
 			goto again;
 		}
 
-		if (desc.flags & VRING_DESC_F_NEXT) {
-			i = desc.next;
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
+			i = vringh16_to_cpu(vrh, desc.next);
 		} else {
 			/* Just in case we need to finish traversing above. */
 			if (unlikely(up_next > 0)) {
@@ -387,7 +400,8 @@
 static inline int __vringh_complete(struct vringh *vrh,
 				    const struct vring_used_elem *used,
 				    unsigned int num_used,
-				    int (*putu16)(u16 *p, u16 val),
+				    int (*putu16)(const struct vringh *vrh,
+						  __virtio16 *p, u16 val),
 				    int (*putused)(struct vring_used_elem *dst,
 						   const struct vring_used_elem
 						   *src, unsigned num))
@@ -420,7 +434,7 @@
 	/* Make sure buffer is written before we update index. */
 	virtio_wmb(vrh->weak_barriers);
 
-	err = putu16(&vrh->vring.used->idx, used_idx + num_used);
+	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
 	if (err) {
 		vringh_bad("Failed to update used index at %p",
 			   &vrh->vring.used->idx);
@@ -433,7 +447,9 @@
 
 
 static inline int __vringh_need_notify(struct vringh *vrh,
-				       int (*getu16)(u16 *val, const u16 *p))
+				       int (*getu16)(const struct vringh *vrh,
+						     u16 *val,
+						     const __virtio16 *p))
 {
 	bool notify;
 	u16 used_event;
@@ -447,7 +463,7 @@
 	/* Old-style, without event indices. */
 	if (!vrh->event_indices) {
 		u16 flags;
-		err = getu16(&flags, &vrh->vring.avail->flags);
+		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
 		if (err) {
 			vringh_bad("Failed to get flags at %p",
 				   &vrh->vring.avail->flags);
@@ -457,7 +473,7 @@
 	}
 
 	/* Modern: we know when other side wants to know. */
-	err = getu16(&used_event, &vring_used_event(&vrh->vring));
+	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
 	if (err) {
 		vringh_bad("Failed to get used event idx at %p",
 			   &vring_used_event(&vrh->vring));
@@ -478,20 +494,22 @@
 }
 
 static inline bool __vringh_notify_enable(struct vringh *vrh,
-					  int (*getu16)(u16 *val, const u16 *p),
-					  int (*putu16)(u16 *p, u16 val))
+					  int (*getu16)(const struct vringh *vrh,
+							u16 *val, const __virtio16 *p),
+					  int (*putu16)(const struct vringh *vrh,
+							__virtio16 *p, u16 val))
 {
 	u16 avail;
 
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, 0) != 0) {
+		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
 			vringh_bad("Clearing used flags %p",
 				   &vrh->vring.used->flags);
 			return true;
 		}
 	} else {
-		if (putu16(&vring_avail_event(&vrh->vring),
+		if (putu16(vrh, &vring_avail_event(&vrh->vring),
 			   vrh->last_avail_idx) != 0) {
 			vringh_bad("Updating avail event index %p",
 				   &vring_avail_event(&vrh->vring));
@@ -503,7 +521,7 @@
 	 * sure it's written, then check again. */
 	virtio_mb(vrh->weak_barriers);
 
-	if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
+	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
 		vringh_bad("Failed to check avail idx at %p",
 			   &vrh->vring.avail->idx);
 		return true;
@@ -516,11 +534,13 @@
 }
 
 static inline void __vringh_notify_disable(struct vringh *vrh,
-					   int (*putu16)(u16 *p, u16 val))
+					   int (*putu16)(const struct vringh *vrh,
+							 __virtio16 *p, u16 val))
 {
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
+		if (putu16(vrh, &vrh->vring.used->flags,
+			   VRING_USED_F_NO_NOTIFY)) {
 			vringh_bad("Setting used flags %p",
 				   &vrh->vring.used->flags);
 		}
@@ -528,14 +548,18 @@
 }
 
 /* Userspace access helpers: in this case, addresses are really userspace. */
-static inline int getu16_user(u16 *val, const u16 *p)
+static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
 {
-	return get_user(*val, (__force u16 __user *)p);
+	__virtio16 v = 0;
+	int rc = get_user(v, (__force __virtio16 __user *)p);
+	*val = vringh16_to_cpu(vrh, v);
+	return rc;
 }
 
-static inline int putu16_user(u16 *p, u16 val)
+static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	return put_user(val, (__force u16 __user *)p);
+	__virtio16 v = cpu_to_vringh16(vrh, val);
+	return put_user(v, (__force __virtio16 __user *)p);
 }
 
 static inline int copydesc_user(void *dst, const void *src, size_t len)
@@ -577,7 +601,7 @@
  * Returns an error if num is invalid: you should check pointers
  * yourself!
  */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -589,6 +613,7 @@
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -729,8 +754,8 @@
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
 }
 EXPORT_SYMBOL(vringh_complete_user);
@@ -792,15 +817,16 @@
 EXPORT_SYMBOL(vringh_need_notify_user);
 
 /* Kernelspace access helpers. */
-static inline int getu16_kern(u16 *val, const u16 *p)
+static inline int getu16_kern(const struct vringh *vrh,
+			      u16 *val, const __virtio16 *p)
 {
-	*val = ACCESS_ONCE(*p);
+	*val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
 	return 0;
 }
 
-static inline int putu16_kern(u16 *p, u16 val)
+static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	ACCESS_ONCE(*p) = val;
+	ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
 	return 0;
 }
 
@@ -836,7 +862,7 @@
  *
  * Returns an error if num is invalid.
  */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
@@ -848,6 +874,7 @@
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -962,8 +989,8 @@
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 
 	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f226658..b9f70df 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -162,6 +162,27 @@
 	spin_unlock_irq(&dev->config_lock);
 }
 
+static int virtio_finalize_features(struct virtio_device *dev)
+{
+	int ret = dev->config->finalize_features(dev);
+	unsigned status;
+
+	if (ret)
+		return ret;
+
+	if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
+		return 0;
+
+	add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+	status = dev->config->get_status(dev);
+	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+		dev_err(&dev->dev, "virtio: device refuses features: %x\n",
+			status);
+		return -ENODEV;
+	}
+	return 0;
+}
+
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
@@ -170,7 +191,6 @@
 	u64 device_features;
 	u64 driver_features;
 	u64 driver_features_legacy;
-	unsigned status;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -208,21 +228,10 @@
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
-	err = dev->config->finalize_features(dev);
+	err = virtio_finalize_features(dev);
 	if (err)
 		goto err;
 
-	if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
-		add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
-		status = dev->config->get_status(dev);
-		if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
-			dev_err(_d, "virtio: device refuses features: %x\n",
-			       status);
-			err = -ENODEV;
-			goto err;
-		}
-	}
-
 	err = drv->probe(dev);
 	if (err)
 		goto err;
@@ -372,7 +381,7 @@
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-	ret = dev->config->finalize_features(dev);
+	ret = virtio_finalize_features(dev);
 	if (ret)
 		goto err;
 
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 953057d..2ef9529 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -458,7 +458,44 @@
 	return virtio_device_restore(&vp_dev->vdev);
 }
 
-const struct dev_pm_ops virtio_pci_pm_ops = {
+static const struct dev_pm_ops virtio_pci_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
 };
 #endif
+
+
+/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
+static const struct pci_device_id virtio_pci_id_table[] = {
+	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
+
+static int virtio_pci_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	return virtio_pci_legacy_probe(pci_dev, id);
+}
+
+static void virtio_pci_remove(struct pci_dev *pci_dev)
+{
+     virtio_pci_legacy_remove(pci_dev);
+}
+
+static struct pci_driver virtio_pci_driver = {
+	.name		= "virtio-pci",
+	.id_table	= virtio_pci_id_table,
+	.probe		= virtio_pci_probe,
+	.remove		= virtio_pci_remove,
+#ifdef CONFIG_PM_SLEEP
+	.driver.pm	= &virtio_pci_pm_ops,
+#endif
+};
+
+module_pci_driver(virtio_pci_driver);
+
+MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
+MODULE_DESCRIPTION("virtio-pci");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index d840dad..adddb64 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -27,7 +27,6 @@
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
-#define VIRTIO_PCI_NO_LEGACY
 #include <linux/virtio_pci.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
@@ -129,8 +128,8 @@
 int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 void virtio_pci_release_dev(struct device *);
 
-#ifdef CONFIG_PM_SLEEP
-extern const struct dev_pm_ops virtio_pci_pm_ops;
-#endif
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id);
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev);
 
 #endif
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2588252..6c76f0f 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -19,14 +19,6 @@
 
 #include "virtio_pci_common.h"
 
-/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
-static const struct pci_device_id virtio_pci_id_table[] = {
-	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
-
 /* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
@@ -220,7 +212,7 @@
 };
 
 /* the PCI probing function */
-static int virtio_pci_probe(struct pci_dev *pci_dev,
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
 			    const struct pci_device_id *id)
 {
 	struct virtio_pci_device *vp_dev;
@@ -300,7 +292,7 @@
 	return err;
 }
 
-static void virtio_pci_remove(struct pci_dev *pci_dev)
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
@@ -312,15 +304,3 @@
 	pci_disable_device(pci_dev);
 	kfree(vp_dev);
 }
-
-static struct pci_driver virtio_pci_driver = {
-	.name		= "virtio-pci",
-	.id_table	= virtio_pci_id_table,
-	.probe		= virtio_pci_probe,
-	.remove		= virtio_pci_remove,
-#ifdef CONFIG_PM_SLEEP
-	.driver.pm	= &virtio_pci_pm_ops,
-#endif
-};
-
-module_pci_driver(virtio_pci_driver);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d..97aff28 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -254,6 +254,7 @@
 				return NULL;
 		}
 	}
+	s[-1] ='\0';
 	return s;
 }
 
@@ -378,8 +379,7 @@
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->magic)
+		if (!e->magic[0])
 			goto einval;
 		if (USE_DEBUG)
 			print_hex_dump_bytes(
@@ -391,8 +391,7 @@
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->mask) {
+		if (!e->mask[0]) {
 			e->mask = NULL;
 			pr_debug("register:  mask[raw]: none\n");
 		} else if (USE_DEBUG)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd7..7e60741 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3481,8 +3481,8 @@
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_root *root,
 				   u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes);
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes);
 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, u64 type);
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3096512..8c63419 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4121,12 +4121,6 @@
 		if (ret)
 			break;
 
-		/* opt_discard */
-		if (btrfs_test_opt(root, DISCARD))
-			ret = btrfs_error_discard_extent(root, start,
-							 end + 1 - start,
-							 NULL);
-
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
 		btrfs_error_unpin_extent_range(root, start, end);
 		cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6ae..a80b971 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1889,8 +1889,8 @@
 	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
 }
 
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-				u64 num_bytes, u64 *actual_bytes)
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes)
 {
 	int ret;
 	u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@
 	update_global_block_rsv(fs_info);
 }
 
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+			      const bool return_free_space)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@
 
 		if (start < cache->last_byte_to_unpin) {
 			len = min(len, cache->last_byte_to_unpin - start);
-			btrfs_add_free_space(cache, start, len);
+			if (return_free_space)
+				btrfs_add_free_space(cache, start, len);
 		}
 
 		start += len;
@@ -5815,7 +5817,7 @@
 						   end + 1 - start, NULL);
 
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
-		unpin_extent_range(root, start, end);
+		unpin_extent_range(root, start, end, true);
 		cond_resched();
 	}
 
@@ -8872,6 +8874,7 @@
 				       cache_node);
 		rb_erase(&block_group->cache_node,
 			 &info->block_group_cache_tree);
+		RB_CLEAR_NODE(&block_group->cache_node);
 		spin_unlock(&info->block_group_cache_lock);
 
 		down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@
 			spin_lock(&info->block_group_cache_lock);
 			rb_erase(&cache->cache_node,
 				 &info->block_group_cache_tree);
+			RB_CLEAR_NODE(&cache->cache_node);
 			spin_unlock(&info->block_group_cache_lock);
 			btrfs_put_block_group(cache);
 			goto error;
@@ -9271,6 +9275,7 @@
 		spin_lock(&root->fs_info->block_group_cache_lock);
 		rb_erase(&cache->cache_node,
 			 &root->fs_info->block_group_cache_tree);
+		RB_CLEAR_NODE(&cache->cache_node);
 		spin_unlock(&root->fs_info->block_group_cache_lock);
 		btrfs_put_block_group(cache);
 		return ret;
@@ -9690,13 +9695,7 @@
 
 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
-	return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes)
-{
-	return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+	return unpin_extent_range(root, start, end, false);
 }
 
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847b..d6c03f7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2966,8 +2966,8 @@
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 
-	ret = btrfs_error_discard_extent(fs_info->extent_root,
-					 start, bytes, &trimmed);
+	ret = btrfs_discard_extent(fs_info->extent_root,
+				   start, bytes, &trimmed);
 	if (!ret)
 		*total_trimmed += trimmed;
 
@@ -3185,16 +3185,18 @@
 
 		spin_unlock(&block_group->lock);
 
+		lock_chunks(block_group->fs_info->chunk_root);
 		em_tree = &block_group->fs_info->mapping_tree.map_tree;
 		write_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, block_group->key.objectid,
 					   1);
 		BUG_ON(!em); /* logic error, can't happen */
+		/*
+		 * remove_extent_mapping() will delete us from the pinned_chunks
+		 * list, which is protected by the chunk mutex.
+		 */
 		remove_extent_mapping(em_tree, em);
 		write_unlock(&em_tree->lock);
-
-		lock_chunks(block_group->fs_info->chunk_root);
-		list_del_init(&em->list);
 		unlock_chunks(block_group->fs_info->chunk_root);
 
 		/* once for us and once for the tree */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790..50c5a87 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@
 	struct file *filp;
 
 	filp = filp_open(path_name, O_RDWR, 0);
-	if (!filp)
+	if (IS_ERR(filp))
 		return;
 	file_update_time(filp);
 	filp_close(filp, NULL);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604..719e1ce 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1917,7 +1917,6 @@
 			break;
 		case 2:
 			dst[dst_byte_offset++] |= (src_byte);
-			dst[dst_byte_offset] = 0;
 			current_bit_offset = 0;
 			break;
 		}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec..6f4e659 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -190,23 +190,11 @@
 {
 	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct dentry *ecryptfs_dentry = file->f_path.dentry;
 	/* Private value of ecryptfs_dentry allocated in
 	 * ecryptfs_lookup() */
 	struct ecryptfs_file_info *file_info;
 
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-		ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
-	    && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
-		|| (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
-		|| (file->f_flags & O_APPEND))) {
-		printk(KERN_WARNING "Mount has encrypted view enabled; "
-		       "files may only be read\n");
-		rc = -EPERM;
-		goto out;
-	}
 	/* Released in ecryptfs_release or end of function if failure */
 	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e1..917bd5c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@
 	(*size) = 0;
 	if (data[0] < 192) {
 		/* One-byte length */
-		(*size) = (unsigned char)data[0];
+		(*size) = data[0];
 		(*length_size) = 1;
 	} else if (data[0] < 224) {
 		/* Two-byte length */
-		(*size) = (((unsigned char)(data[0]) - 192) * 256);
-		(*size) += ((unsigned char)(data[1]) + 192);
+		(*size) = (data[0] - 192) * 256;
+		(*size) += data[1] + 192;
 		(*length_size) = 2;
 	} else if (data[0] == 255) {
 		/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd..d9eb84b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@
 {
 	struct super_block *s;
 	struct ecryptfs_sb_info *sbi;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct ecryptfs_dentry_info *root_info;
 	const char *err = "Getting sb failed";
 	struct inode *inode;
@@ -511,6 +512,7 @@
 		err = "Error parsing options";
 		goto out;
 	}
+	mount_crypt_stat = &sbi->mount_crypt_stat;
 
 	s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	if (IS_ERR(s)) {
@@ -557,11 +559,19 @@
 
 	/**
 	 * Set the POSIX ACL flag based on whether they're enabled in the lower
-	 * mount. Force a read-only eCryptfs mount if the lower mount is ro.
-	 * Allow a ro eCryptfs mount even when the lower mount is rw.
+	 * mount.
 	 */
 	s->s_flags = flags & ~MS_POSIXACL;
-	s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+	s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+	/**
+	 * Force a read-only eCryptfs mount when:
+	 *   1) The lower mount is ro
+	 *   2) The ecryptfs_encrypted_view mount option is specified
+	 */
+	if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+	    mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+		s->s_flags |= MS_RDONLY;
 
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15..370420b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,7 +267,6 @@
 	handle_t *handle;
 	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
-	unsigned int w_flags = 0;
 	unsigned int tmp_data_size, data_size, replaced_size;
 	int err2, jblocks, retries = 0;
 	int replaced_count = 0;
@@ -288,9 +287,6 @@
 		return 0;
 	}
 
-	if (segment_eq(get_fs(), KERNEL_DS))
-		w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
 	orig_blk_offset = orig_page_offset * blocks_per_page +
 		data_offset_in_page;
 
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390e..ddc9f96 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -448,27 +448,6 @@
 	return pol;
 }
 
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
-			      const nodemask_t *from, const nodemask_t *to,
-			      unsigned long flags)
-{
-	struct file *file = vma->vm_file;
-	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
-
-	if (!of->vm_ops)
-		return 0;
-
-	if (!kernfs_get_active(of->kn))
-		return 0;
-
-	ret = 0;
-	if (of->vm_ops->migrate)
-		ret = of->vm_ops->migrate(vma, from, to, flags);
-
-	kernfs_put_active(of->kn);
-	return ret;
-}
 #endif
 
 static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@
 #ifdef CONFIG_NUMA
 	.set_policy	= kernfs_vma_set_policy,
 	.get_policy	= kernfs_vma_get_policy,
-	.migrate	= kernfs_vma_migrate,
 #endif
 };
 
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f..510413eb 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@
 
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca174..0f96f71 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -91,6 +91,7 @@
 
 static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 {
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	int err = 0;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@
 		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	}
 	seq_putc(m, ' ');
-	seq_path(m, &mnt_path, " \t\n\\");
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	if (err)
+		goto out;
 	seq_putc(m, ' ');
 	show_type(m, sb);
 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@
 	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-	struct path root = p->root;
 	int err = 0;
 
 	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@
 	seq_putc(m, ' ');
 
 	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	err = seq_path_root(m, &mnt_path, &p->