Merge git://github.com/rustyrussell/linux

* git://github.com/rustyrussell/linux:
  lguest: move process freezing before pending signals check
  lguest: don't allow KVM-detection cpuid.
  lguest: Allow running under paravirt-enabled KVM.
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index c1eb41c..2b5d561 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -206,3 +206,16 @@
 		when a discarded area is read the discard_zeroes_data
 		parameter will be set to one. Otherwise it will be 0 and
 		the result of reading a discarded area is undefined.
+What:		/sys/block/<disk>/alias
+Date:		Aug 2011
+Contact:	Nao Nishijima <nao.nishijima.xt@hitachi.com>
+Description:
+		A raw device name of a disk does not always point a same disk
+		each boot-up time. Therefore, users have to use persistent
+		device names, which udev creates when the kernel finds a disk,
+		instead of raw device name. However, kernel doesn't show those
+		persistent names on its messages (e.g. dmesg).
+		This file can store an alias of the disk and it would be
+		appeared in kernel messages if it is set. A disk can have an
+		alias which length is up to 255bytes. Users can use alphabets,
+		numbers, "-" and "_" in alias name. This file is writeonce.
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom
new file mode 100644
index 0000000..82d4df1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-wacom
@@ -0,0 +1,72 @@
+What:		/sys/class/hidraw/hidraw*/device/speed
+Date:		April 2010
+Kernel Version:	2.6.35
+Contact:	linux-bluetooth@vger.kernel.org
+Description:
+		The /sys/class/hidraw/hidraw*/device/speed file controls
+		reporting speed of Wacom bluetooth tablet. Reading from
+		this file returns 1 if tablet reports in high speed mode
+		or 0 otherwise. Writing to this file one of these values
+		switches reporting speed.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/led
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Attribute group for control of the status LEDs and the OLEDs.
+		This attribute group is only available for Intuos 4 M, L,
+		and XL (with LEDs and OLEDs) and Cintiq 21UX2 (LEDs only).
+		Therefore its presence implicitly signifies the presence of
+		said LEDs and OLEDs on the tablet device.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status0_luminance
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Writing to this file sets the status LED luminance (1..127)
+		when the stylus does not touch the tablet surface, and no
+		button is pressed on the stylus. This luminance level is
+		normally lower than the level when a button is pressed.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status1_luminance
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Writing to this file sets the status LED luminance (1..127)
+		when the stylus touches the tablet surface, or any button is
+		pressed on the stylus.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led0_select
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Writing to this file sets which one of the four (for Intuos 4)
+		or of the right four (for Cintiq 21UX2) status LEDs is active (0..3).
+		The other three LEDs on the same side are always inactive.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led1_select
+Date:		September 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Writing to this file sets which one of the left four (for Cintiq 21UX2)
+		status LEDs is active (0..3). The other three LEDs on the left are always
+		inactive.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/buttons_luminance
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		Writing to this file sets the overall luminance level (0..15)
+		of all eight button OLED displays.
+
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/button<n>_rawimg
+Date:		August 2011
+Contact:	linux-input@vger.kernel.org
+Description:
+		When writing a 1024 byte raw image in Wacom Intuos 4
+		interleaving format to the file, the image shows up on Button N
+		of the device. The image is a 64x32 pixel 4-bit gray image. The
+		1024 byte binary is split up into 16x 64 byte chunks. Each 64
+		byte chunk encodes the image data for two consecutive lines on
+		the display. The low nibble of each byte contains the first
+		line, and the high nibble contains the second line.
diff --git a/Documentation/ABI/testing/sysfs-wacom b/Documentation/ABI/testing/sysfs-wacom
deleted file mode 100644
index 1517976..0000000
--- a/Documentation/ABI/testing/sysfs-wacom
+++ /dev/null
@@ -1,10 +0,0 @@
-What:		/sys/class/hidraw/hidraw*/device/speed
-Date:		April 2010
-Kernel Version:	2.6.35
-Contact:	linux-bluetooth@vger.kernel.org
-Description:
-		The /sys/class/hidraw/hidraw*/device/speed file controls
-		reporting speed of wacom bluetooth tablet. Reading from
-		this file returns 1 if tablet reports in high speed mode
-		or 0 otherwise. Writing to this file one of these values
-		switches reporting speed.
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
index 598c22f..5de23c0 100644
--- a/Documentation/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -4288,7 +4288,7 @@
 <![CDATA[
   struct snd_rawmidi *rmidi;
   snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, port, info_flags,
-                      irq, irq_flags, &rmidi);
+                      irq, &rmidi);
 ]]>
           </programlisting>
         </informalexample>
@@ -4343,6 +4343,13 @@
 	by itself to start processing the output stream in the irq handler.
 	</para>
 
+	<para>
+	If the MPU-401 interface shares its interrupt with the other logical
+	devices on the card, set <constant>MPU401_INFO_IRQ_HOOK</constant>
+	(see <link linkend="midi-interface-interrupt-handler"><citetitle>
+	below</citetitle></link>).
+	</para>
+
       <para>
         Usually, the port address corresponds to the command port and
         port + 1 corresponds to the data port. If not, you may change
@@ -4375,14 +4382,12 @@
       </para>
 
       <para>
-        The 6th argument specifies the irq number for UART. If the irq
-      is already allocated, pass 0 to the 7th argument
-      (<parameter>irq_flags</parameter>). Otherwise, pass the flags
-      for irq allocation 
-      (<constant>SA_XXX</constant> bits) to it, and the irq will be
-      reserved by the mpu401-uart layer. If the card doesn't generate
-      UART interrupts, pass -1 as the irq number. Then a timer
-      interrupt will be invoked for polling. 
+	The 6th argument specifies the ISA irq number that will be
+	allocated.  If no interrupt is to be allocated (because your
+	code is already allocating a shared interrupt, or because the
+	device does not use interrupts), pass -1 instead.
+	For a MPU-401 device without an interrupt, a polling timer
+	will be used instead.
       </para>
     </section>
 
@@ -4390,12 +4395,13 @@
       <title>Interrupt Handler</title>
       <para>
         When the interrupt is allocated in
-      <function>snd_mpu401_uart_new()</function>, the private
-      interrupt handler is used, hence you don't have anything else to do
-      than creating the mpu401 stuff. Otherwise, you have to call
-      <function>snd_mpu401_uart_interrupt()</function> explicitly when
-      a UART interrupt is invoked and checked in your own interrupt
-      handler.  
+      <function>snd_mpu401_uart_new()</function>, an exclusive ISA
+      interrupt handler is automatically used, hence you don't have
+      anything else to do than creating the mpu401 stuff.  Otherwise, you
+      have to set <constant>MPU401_INFO_IRQ_HOOK</constant>, and call
+      <function>snd_mpu401_uart_interrupt()</function> explicitly from your
+      own interrupt handler when it has determined that a UART interrupt
+      has occurred.
       </para>
 
       <para>
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt
index 1d5d7a8..951ca46 100644
--- a/Documentation/devicetree/bindings/arm/primecell.txt
+++ b/Documentation/devicetree/bindings/arm/primecell.txt
@@ -6,7 +6,9 @@
 
 Required properties:
 
-- compatible : should be a specific value for peripheral and "arm,primecell"
+- compatible : should be a specific name for the peripheral and
+               "arm,primecell".  The specific name will match the ARM
+               engineering name for the logic block in the form: "arm,pl???"
 
 Optional properties:
 
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
new file mode 100644
index 0000000..a2c416b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
@@ -0,0 +1,10 @@
+ARM PL061 GPIO controller
+
+Required properties:
+- compatible : "arm,pl061", "arm,primecell"
+- #gpio-cells : Should be two. The first cell is the pin number and the
+  second cell is used to specify optional parameters:
+  - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- gpio-controller : Marks the device node as a GPIO controller.
+- interrupts : Interrupt mapping for GPIO IRQ.
+
diff --git a/Documentation/devicetree/bindings/mmc/nvidia-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia-sdhci.txt
new file mode 100644
index 0000000..7e51154
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/nvidia-sdhci.txt
@@ -0,0 +1,27 @@
+* NVIDIA Tegra Secure Digital Host Controller
+
+This controller on Tegra family SoCs provides an interface for MMC, SD,
+and SDIO types of memory cards.
+
+Required properties:
+- compatible : Should be "nvidia,<chip>-sdhci"
+- reg : Should contain SD/MMC registers location and length
+- interrupts : Should contain SD/MMC interrupt
+
+Optional properties:
+- cd-gpios : Specify GPIOs for card detection
+- wp-gpios : Specify GPIOs for write protection
+- power-gpios : Specify GPIOs for power control
+- support-8bit : Boolean, indicates if 8-bit mode should be used.
+
+Example:
+
+sdhci@c8000200 {
+	compatible = "nvidia,tegra20-sdhci";
+	reg = <0xc8000200 0x200>;
+	interrupts = <47>;
+	cd-gpios = <&gpio 69 0>; /* gpio PI5 */
+	wp-gpios = <&gpio 57 0>; /* gpio PH1 */
+	power-gpios = <&gpio 155 0>; /* gpio PT3 */
+	support-8bit;
+};
diff --git a/Documentation/devicetree/bindings/sound/soc/codecs/fsl-sgtl5000.txt b/Documentation/devicetree/bindings/sound/soc/codecs/fsl-sgtl5000.txt
new file mode 100644
index 0000000..2c3cd41
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/soc/codecs/fsl-sgtl5000.txt
@@ -0,0 +1,11 @@
+* Freescale SGTL5000 Stereo Codec
+
+Required properties:
+- compatible : "fsl,sgtl5000".
+
+Example:
+
+codec: sgtl5000@0a {
+	compatible = "fsl,sgtl5000";
+	reg = <0x0a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8510.txt b/Documentation/devicetree/bindings/sound/wm8510.txt
new file mode 100644
index 0000000..fa1a32b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8510.txt
@@ -0,0 +1,18 @@
+WM8510 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8510"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8510@1a {
+	compatible = "wlf,wm8510";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8523.txt b/Documentation/devicetree/bindings/sound/wm8523.txt
new file mode 100644
index 0000000..0474618
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8523.txt
@@ -0,0 +1,16 @@
+WM8523 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+  - compatible : "wlf,wm8523"
+
+  - reg : the I2C address of the device.
+
+Example:
+
+codec: wm8523@1a {
+	compatible = "wlf,wm8523";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8580.txt b/Documentation/devicetree/bindings/sound/wm8580.txt
new file mode 100644
index 0000000..7d9821f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8580.txt
@@ -0,0 +1,16 @@
+WM8580 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+  - compatible : "wlf,wm8580"
+
+  - reg : the I2C address of the device.
+
+Example:
+
+codec: wm8580@1a {
+	compatible = "wlf,wm8580";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8711.txt b/Documentation/devicetree/bindings/sound/wm8711.txt
new file mode 100644
index 0000000..8ed9998
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8711.txt
@@ -0,0 +1,18 @@
+WM8711 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8711"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8711@1a {
+	compatible = "wlf,wm8711";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8728.txt b/Documentation/devicetree/bindings/sound/wm8728.txt
new file mode 100644
index 0000000..a8b5c36
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8728.txt
@@ -0,0 +1,18 @@
+WM8728 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8728"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8728@1a {
+	compatible = "wlf,wm8728";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8731.txt b/Documentation/devicetree/bindings/sound/wm8731.txt
new file mode 100644
index 0000000..15f7004
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8731.txt
@@ -0,0 +1,18 @@
+WM8731 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8731"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8731@1a {
+	compatible = "wlf,wm8731";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8737.txt b/Documentation/devicetree/bindings/sound/wm8737.txt
new file mode 100644
index 0000000..4bc2cea
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8737.txt
@@ -0,0 +1,18 @@
+WM8737 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8737"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8737@1a {
+	compatible = "wlf,wm8737";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8741.txt b/Documentation/devicetree/bindings/sound/wm8741.txt
new file mode 100644
index 0000000..74bda58
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8741.txt
@@ -0,0 +1,18 @@
+WM8741 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8741"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8741@1a {
+	compatible = "wlf,wm8741";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8750.txt b/Documentation/devicetree/bindings/sound/wm8750.txt
new file mode 100644
index 0000000..8db239f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8750.txt
@@ -0,0 +1,18 @@
+WM8750 and WM8987 audio CODECs
+
+These devices support both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8750" or "wlf,wm8987"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8750@1a {
+	compatible = "wlf,wm8750";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8753.txt b/Documentation/devicetree/bindings/sound/wm8753.txt
new file mode 100644
index 0000000..e65277a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8753.txt
@@ -0,0 +1,18 @@
+WM8753 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8753"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8737@1a {
+	compatible = "wlf,wm8753";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8770.txt b/Documentation/devicetree/bindings/sound/wm8770.txt
new file mode 100644
index 0000000..866e00c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8770.txt
@@ -0,0 +1,16 @@
+WM8770 audio CODEC
+
+This device supports SPI.
+
+Required properties:
+
+  - compatible : "wlf,wm8770"
+
+  - reg : the chip select number.
+
+Example:
+
+codec: wm8770@1 {
+	compatible = "wlf,wm8770";
+	reg = <1>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8776.txt b/Documentation/devicetree/bindings/sound/wm8776.txt
new file mode 100644
index 0000000..3b9ca49
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8776.txt
@@ -0,0 +1,18 @@
+WM8776 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8776"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8776@1a {
+	compatible = "wlf,wm8776";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8804.txt b/Documentation/devicetree/bindings/sound/wm8804.txt
new file mode 100644
index 0000000..4d3a56f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8804.txt
@@ -0,0 +1,18 @@
+WM8804 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+  - compatible : "wlf,wm8804"
+
+  - reg : the I2C address of the device for I2C, the chip select
+          number for SPI.
+
+Example:
+
+codec: wm8804@1a {
+	compatible = "wlf,wm8804";
+	reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt
new file mode 100644
index 0000000..306ec3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt
@@ -0,0 +1,12 @@
+ARM PL022 SPI controller
+
+Required properties:
+- compatible : "arm,pl022", "arm,primecell"
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain SPI controller interrupt
+
+Optional properties:
+- cs-gpios : should specify GPIOs used for chipselects.
+  The gpios will be referred to as reg = <index> in the SPI child nodes.
+  If unspecified, a single SPI device without a chip select can be used.
+
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
new file mode 100644
index 0000000..e855278
--- /dev/null
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -0,0 +1,40 @@
+Device tree binding vendor prefix registry.  Keep list in alphabetical order.
+
+This isn't an exhaustive list, but you should add new prefixes to it before
+using them to avoid name-space collisions.
+
+adi	Analog Devices, Inc.
+amcc	Applied Micro Circuits Corporation (APM, formally AMCC)
+apm	Applied Micro Circuits Corporation (APM)
+arm	ARM Ltd.
+atmel	Atmel Corporation
+chrp	Common Hardware Reference Platform
+dallas	Maxim Integrated Products (formerly Dallas Semiconductor)
+denx	Denx Software Engineering
+epson	Seiko Epson Corp.
+est	ESTeem Wireless Modems
+fsl	Freescale Semiconductor
+GEFanuc	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+gef	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+hp	Hewlett Packard
+ibm	International Business Machines (IBM)
+idt	Integrated Device Technologies, Inc.
+intercontrol	Inter Control Group
+linux	Linux-specific binding
+marvell	Marvell Technology Group Ltd.
+maxim	Maxim Integrated Products
+mosaixtech	Mosaix Technologies, Inc.
+national	National Semiconductor
+nintendo	Nintendo
+nvidia	NVIDIA
+nxp	NXP Semiconductors
+powervr	Imagination Technologies
+qcom	Qualcomm, Inc.
+ramtron	Ramtron International
+samsung	Samsung Semiconductor
+schindler	Schindler
+simtek
+sirf	SiRF Technology, Inc.
+stericsson	ST-Ericsson
+ti	Texas Instruments
+xlnx	Xilinx
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 82a5d25..ba4be8b 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -21,6 +21,11 @@
   /sys/block/<device>/make-it-fail or
   /sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
 
+o fail_mmc_request
+
+  injects MMC data errors on devices permitted by setting
+  debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+
 Configure fault-injection capabilities behavior
 -----------------------------------------------
 
@@ -115,7 +120,8 @@
 
 	failslab=
 	fail_page_alloc=
-	fail_make_request=<interval>,<probability>,<space>,<times>
+	fail_make_request=
+	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
 How to add new fault injection capability
 -----------------------------------------
diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt
index db798af..5602eb7 100644
--- a/Documentation/input/elantech.txt
+++ b/Documentation/input/elantech.txt
@@ -16,15 +16,28 @@
 
  1. Introduction
  2. Extra knobs
- 3. Hardware version 1
-    3.1 Registers
-    3.2 Native relative mode 4 byte packet format
-    3.3 Native absolute mode 4 byte packet format
- 4. Hardware version 2
+ 3. Differentiating hardware versions
+ 4. Hardware version 1
     4.1 Registers
-    4.2 Native absolute mode 6 byte packet format
-        4.2.1 One finger touch
-        4.2.2 Two finger touch
+    4.2 Native relative mode 4 byte packet format
+    4.3 Native absolute mode 4 byte packet format
+ 5. Hardware version 2
+    5.1 Registers
+    5.2 Native absolute mode 6 byte packet format
+        5.2.1 Parity checking and packet re-synchronization
+        5.2.2 One/Three finger touch
+        5.2.3 Two finger touch
+ 6. Hardware version 3
+    6.1 Registers
+    6.2 Native absolute mode 6 byte packet format
+        6.2.1 One/Three finger touch
+        6.2.2 Two finger touch
+ 7. Hardware version 4
+    7.1 Registers
+    7.2 Native absolute mode 6 byte packet format
+        7.2.1 Status packet
+        7.2.2 Head packet
+        7.2.3 Motion packet
 
 
 
@@ -375,7 +388,7 @@
 
 In case an error is detected, all the packets are shifted by one (and packet[0] is discarded).
 
-5.2.1 One/Three finger touch
+5.2.2 One/Three finger touch
       ~~~~~~~~~~~~~~~~
 
 byte 0:
@@ -384,19 +397,19 @@
 	 n1  n0  w3  w2   .   .   R   L
 
          L, R = 1 when Left, Right mouse button pressed
-         n1..n0 = numbers of fingers on touchpad
+         n1..n0 = number of fingers on touchpad
 
 byte 1:
 
    bit   7   6   5   4   3   2   1   0
-	 p7  p6  p5  p4  .  x10 x9  x8
+	 p7  p6  p5  p4 x11 x10 x9  x8
 
 byte 2:
 
    bit   7   6   5   4   3   2   1   0
 	 x7  x6  x5  x4  x3  x2  x1  x0
 
-         x10..x0 = absolute x value (horizontal)
+         x11..x0 = absolute x value (horizontal)
 
 byte 3:
 
@@ -420,7 +433,7 @@
 byte 4:
 
    bit   7   6   5   4   3   2   1   0
-        p3  p1  p2  p0   .   .  y9  y8
+        p3  p1  p2  p0  y11 y10 y9  y8
 
 	 p7..p0 = pressure (not EF113)
 
@@ -429,10 +442,10 @@
    bit   7   6   5   4   3   2   1   0
         y7  y6  y5  y4  y3  y2  y1  y0
 
-         y9..y0 = absolute y value (vertical)
+         y11..y0 = absolute y value (vertical)
 
 
-4.2.2 Two finger touch
+5.2.3 Two finger touch
       ~~~~~~~~~~~~~~~~
 
 Note that the two pairs of coordinates are not exactly the coordinates of the
@@ -446,7 +459,7 @@
         n1  n0  ay8 ax8  .   .   R   L
 
          L, R = 1 when Left, Right mouse button pressed
-         n1..n0 = numbers of fingers on touchpad
+         n1..n0 = number of fingers on touchpad
 
 byte 1:
 
@@ -480,3 +493,253 @@
         by7 by8 by5 by4 by3 by2 by1 by0
 
          by8..by0 = upper-right finger absolute y value
+
+/////////////////////////////////////////////////////////////////////////////
+
+6. Hardware version 3
+   ==================
+
+6.1 Registers
+    ~~~~~~~~~
+* reg_10
+
+   bit   7   6   5   4   3   2   1   0
+         0   0   0   0   0   0   0   A
+
+         A: 1 = enable absolute tracking
+
+6.2 Native absolute mode 6 byte packet format
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+1 and 3 finger touch shares the same 6-byte packet format, except that
+3 finger touch only reports the position of the center of all three fingers.
+
+Firmware would send 12 bytes of data for 2 finger touch.
+
+Note on debounce:
+In case the box has unstable power supply or other electricity issues, or
+when number of finger changes, F/W would send "debounce packet" to inform
+driver that the hardware is in debounce status.
+The debouce packet has the following signature:
+    byte 0: 0xc4
+    byte 1: 0xff
+    byte 2: 0xff
+    byte 3: 0x02
+    byte 4: 0xff
+    byte 5: 0xff
+When we encounter this kind of packet, we just ignore it.
+
+6.2.1 One/Three finger touch
+      ~~~~~~~~~~~~~~~~~~~~~~
+
+byte 0:
+
+   bit   7   6   5   4   3   2   1   0
+        n1  n0  w3  w2   0   1   R   L
+
+        L, R = 1 when Left, Right mouse button pressed
+        n1..n0 = number of fingers on touchpad
+
+byte 1:
+
+   bit   7   6   5   4   3   2   1   0
+        p7  p6  p5  p4 x11 x10  x9  x8
+
+byte 2:
+
+   bit   7   6   5   4   3   2   1   0
+        x7  x6  x5  x4  x3  x2  x1  x0
+
+        x11..x0 = absolute x value (horizontal)
+
+byte 3:
+
+   bit   7   6   5   4   3   2   1   0
+         0   0  w1  w0   0   0   1   0
+
+         w3..w0 = width of the finger touch
+
+byte 4:
+
+   bit   7   6   5   4   3   2   1   0
+        p3  p1  p2  p0  y11 y10 y9  y8
+
+        p7..p0 = pressure
+
+byte 5:
+
+   bit   7   6   5   4   3   2   1   0
+        y7  y6  y5  y4  y3  y2  y1  y0
+
+        y11..y0 = absolute y value (vertical)
+
+6.2.2 Two finger touch
+      ~~~~~~~~~~~~~~~~
+
+The packet format is exactly the same for two finger touch, except the hardware
+sends two 6 byte packets. The first packet contains data for the first finger,
+the second packet has data for the second finger. So for two finger touch a
+total of 12 bytes are sent.
+
+/////////////////////////////////////////////////////////////////////////////
+
+7. Hardware version 4
+   ==================
+
+7.1 Registers
+    ~~~~~~~~~
+* reg_07
+
+   bit   7   6   5   4   3   2   1   0
+         0   0   0   0   0   0   0   A
+
+         A: 1 = enable absolute tracking
+
+7.2 Native absolute mode 6 byte packet format
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+v4 hardware is a true multitouch touchpad, capable of tracking up to 5 fingers.
+Unfortunately, due to PS/2's limited bandwidth, its packet format is rather
+complex.
+
+Whenever the numbers or identities of the fingers changes, the hardware sends a
+status packet to indicate how many and which fingers is on touchpad, followed by
+head packets or motion packets. A head packet contains data of finger id, finger
+position (absolute x, y values), width, and pressure. A motion packet contains
+two fingers' position delta.
+
+For example, when status packet tells there are 2 fingers on touchpad, then we
+can expect two following head packets. If the finger status doesn't change,
+the following packets would be motion packets, only sending delta of finger
+position, until we receive a status packet.
+
+One exception is one finger touch. when a status packet tells us there is only
+one finger, the hardware would just send head packets afterwards.
+
+7.2.1 Status packet
+      ~~~~~~~~~~~~~
+
+byte 0:
+
+   bit   7   6   5   4   3   2   1   0
+         .   .   .   .   0   1   R   L
+
+         L, R = 1 when Left, Right mouse button pressed
+
+byte 1:
+
+   bit   7   6   5   4   3   2   1   0
+         .   .   . ft4 ft3 ft2 ft1 ft0
+
+         ft4 ft3 ft2 ft1 ft0 ftn = 1 when finger n is on touchpad
+
+byte 2: not used
+
+byte 3:
+
+   bit   7   6   5   4   3   2   1   0
+         .   .   .   1   0   0   0   0
+
+         constant bits
+
+byte 4:
+
+   bit   7   6   5   4   3   2   1   0
+         p   .   .   .   .   .   .   .
+
+         p = 1 for palm
+
+byte 5: not used
+
+7.2.2 Head packet
+      ~~~~~~~~~~~
+
+byte 0:
+
+   bit   7   6   5   4   3   2   1   0
+        w3  w2  w1  w0   0   1   R   L
+
+        L, R = 1 when Left, Right mouse button pressed
+        w3..w0 = finger width (spans how many trace lines)
+
+byte 1:
+
+   bit   7   6   5   4   3   2   1   0
+        p7  p6  p5  p4 x11 x10  x9  x8
+
+byte 2:
+
+   bit   7   6   5   4   3   2   1   0
+        x7  x6  x5  x4  x3  x2  x1  x0
+
+        x11..x0 = absolute x value (horizontal)
+
+byte 3:
+
+   bit   7   6   5   4   3   2   1   0
+       id2 id1 id0   1   0   0   0   1
+
+       id2..id0 = finger id
+
+byte 4:
+
+   bit   7   6   5   4   3   2   1   0
+        p3  p1  p2  p0  y11 y10 y9  y8
+
+        p7..p0 = pressure
+
+byte 5:
+
+   bit   7   6   5   4   3   2   1   0
+        y7  y6  y5  y4  y3  y2  y1  y0
+
+        y11..y0 = absolute y value (vertical)
+
+7.2.3 Motion packet
+      ~~~~~~~~~~~~~
+
+byte 0:
+
+   bit   7   6   5   4   3   2   1   0
+       id2 id1 id0   w   0   1   R   L
+
+       L, R = 1 when Left, Right mouse button pressed
+       id2..id0 = finger id
+       w = 1 when delta overflows (> 127 or < -128), in this case
+       firmware sends us (delta x / 5) and (delta y  / 5)
+
+byte 1:
+
+   bit   7   6   5   4   3   2   1   0
+        x7  x6  x5  x4  x3  x2  x1  x0
+
+        x7..x0 = delta x (two's complement)
+
+byte 2:
+
+   bit   7   6   5   4   3   2   1   0
+        y7  y6  y5  y4  y3  y2  y1  y0
+
+        y7..y0 = delta y (two's complement)
+
+byte 3:
+
+   bit   7   6   5   4   3   2   1   0
+       id2 id1 id0   1   0   0   1   0
+
+       id2..id0 = finger id
+
+byte 4:
+
+   bit   7   6   5   4   3   2   1   0
+        x7  x6  x5  x4  x3  x2  x1  x0
+
+        x7..x0 = delta x (two's complement)
+
+byte 5:
+
+   bit   7   6   5   4   3   2   1   0
+        y7  y6  y5  y4  y3  y2  y1  y0
+
+        y7..y0 = delta y (two's complement)
+
+        byte 0 ~ 2 for one finger
+        byte 3 ~ 5 for another
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index 71536e7..543101c 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -65,6 +65,20 @@
 end.  Upon receiving an MT event, one simply updates the appropriate
 attribute of the current slot.
 
+Some devices identify and/or track more contacts than they can report to the
+driver.  A driver for such a device should associate one type B slot with each
+contact that is reported by the hardware.  Whenever the identity of the
+contact associated with a slot changes, the driver should invalidate that
+slot by changing its ABS_MT_TRACKING_ID.  If the hardware signals that it is
+tracking more contacts than it is currently reporting, the driver should use
+a BTN_TOOL_*TAP event to inform userspace of the total number of contacts
+being tracked by the hardware at that moment.  The driver should do this by
+explicitly sending the corresponding BTN_TOOL_*TAP event and setting
+use_count to false when calling input_mt_report_pointer_emulation().
+The driver should only advertise as many slots as the hardware can report.
+Userspace can detect that a driver can report more total contacts than slots
+by noting that the largest supported BTN_TOOL_*TAP event is larger than the
+total number of type B slots reported in the absinfo for the ABS_MT_SLOT axis.
 
 Protocol Example A
 ------------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a8ba119..93413ce 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -307,6 +307,19 @@
 			behaviour to be specified.  Bit 0 enables warnings,
 			bit 1 enables fixups, and bit 2 sends a segfault.
 
+	align_va_addr=	[X86-64]
+			Align virtual addresses by clearing slice [14:12] when
+			allocating a VMA at process creation time. This option
+			gives you up to 3% performance improvement on AMD F15h
+			machines (where it is enabled by default) for a
+			CPU-intensive style benchmark, and it can vary highly in
+			a microbenchmark depending on workload and compiler.
+
+			1: only for 32-bit processes
+			2: only for 64-bit processes
+			on: enable for both 32- and 64-bit processes
+			off: disable for both 32- and 64-bit processes
+
 	amd_iommu=	[HW,X86-84]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
@@ -1784,6 +1797,11 @@
 
 	noresidual	[PPC] Don't use residual data on PReP machines.
 
+	nordrand	[X86] Disable the direct use of the RDRAND
+			instruction even if it is supported by the
+			processor.  RDRAND is still available to user
+			space applications.
+
 	noresume	[SWSUSP] Disables resume and restores original swap
 			space.
 
diff --git a/Documentation/scsi/00-INDEX b/Documentation/scsi/00-INDEX
index c2e18e1..b48ded5 100644
--- a/Documentation/scsi/00-INDEX
+++ b/Documentation/scsi/00-INDEX
@@ -28,6 +28,8 @@
 	- Licence of the Flashpoint driver
 LICENSE.qla2xxx
 	- License for QLogic Linux Fibre Channel HBA Driver firmware.
+LICENSE.qla4xxx
+	- License for QLogic Linux iSCSI HBA Driver.
 Mylex.txt
 	- info on driver for Mylex adapters
 NinjaSCSI.txt
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index 1b6e27d..64adb98 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -1,3 +1,18 @@
+Release Date    : Wed. Oct 5, 2011 17:00:00 PST 2010 -
+			(emaild-id:megaraidlinux@lsi.com)
+			Adam Radford
+Current Version : 00.00.06.12-rc1
+Old Version     : 00.00.05.40-rc1
+    1. Continue booting immediately if FW in FAULT at driver load time.
+    2. Increase default cmds per lun to 256.
+    3. Fix mismatch in megasas_reset_fusion() mutex lock-unlock.
+    4. Remove some un-necessary code.
+    5. Clear state change interrupts for Fusion/Invader.
+    6. Clear FUSION_IN_RESET before enabling interrupts.
+    7. Add support for MegaRAID 9360/9380 12GB/s controllers.
+    8. Add multiple MSI-X vector/multiple reply queue support.
+    9. Add driver workaround for PERC5/1068 kdump kernel panic.
+-------------------------------------------------------------------------------
 Release Date    : Tue. Jul 26, 2011 17:00:00 PST 2010 -
 			(emaild-id:megaraidlinux@lsi.com)
 			Adam Radford
diff --git a/Documentation/scsi/LICENSE.qla4xxx b/Documentation/scsi/LICENSE.qla4xxx
new file mode 100644
index 0000000..494980e
--- /dev/null
+++ b/Documentation/scsi/LICENSE.qla4xxx
@@ -0,0 +1,310 @@
+Copyright (c) 2003-2011 QLogic Corporation
+QLogic Linux iSCSI HBA Driver
+
+This program includes a device driver for Linux 3.x.
+You may modify and redistribute the device driver code under the
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
+
+REGARDLESS OF WHAT LICENSING MECHANISM IS USED OR APPLICABLE,
+THIS PROGRAM IS PROVIDED BY QLOGIC CORPORATION "AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+USER ACKNOWLEDGES AND AGREES THAT USE OF THIS PROGRAM WILL NOT
+CREATE OR GIVE GROUNDS FOR A LICENSE BY IMPLICATION, ESTOPPEL, OR
+OTHERWISE IN ANY INTELLECTUAL PROPERTY RIGHTS (PATENT, COPYRIGHT,
+TRADE SECRET, MASK WORK, OR OTHER PROPRIETARY RIGHT) EMBODIED IN
+ANY OTHER QLOGIC HARDWARE OR SOFTWARE EITHER SOLELY OR IN
+COMBINATION WITH THIS PROGRAM.
+
+
+EXHIBIT A
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/scsi/bnx2fc.txt b/Documentation/scsi/bnx2fc.txt
new file mode 100644
index 0000000..8082355
--- /dev/null
+++ b/Documentation/scsi/bnx2fc.txt
@@ -0,0 +1,75 @@
+Operating FCoE using bnx2fc
+===========================
+Broadcom FCoE offload through bnx2fc is full stateful hardware offload that
+cooperates with all interfaces provided by the Linux ecosystem for FC/FCoE and
+SCSI controllers.  As such, FCoE functionality, once enabled is largely
+transparent. Devices discovered on the SAN will be registered and unregistered
+automatically with the upper storage layers.
+
+Despite the fact that the Broadcom's FCoE offload is fully offloaded, it does
+depend on the state of the network interfaces to operate. As such, the network
+interface (e.g. eth0) associated with the FCoE offload initiator must be 'up'.
+It is recommended that the network interfaces be configured to be brought up
+automatically at boot time.
+
+Furthermore, the Broadcom FCoE offload solution creates VLAN interfaces to
+support the VLANs that have been discovered for FCoE operation (e.g.
+eth0.1001-fcoe).  Do not delete or disable these interfaces or FCoE operation
+will be disrupted.
+
+Driver Usage Model:
+===================
+
+1. Ensure that fcoe-utils package is installed.
+
+2. Configure the interfaces on which bnx2fc driver has to operate on.
+Here are the steps to configure:
+	a. cd /etc/fcoe
+	b. copy cfg-ethx to cfg-eth5 if FCoE has to be enabled on eth5.
+	c. Repeat this for all the interfaces where FCoE has to be enabled.
+	d. Edit all the cfg-eth files to set "no" for DCB_REQUIRED** field, and
+	   "yes" for AUTO_VLAN.
+	e. Other configuration parameters should be left as default
+
+3. Ensure that "bnx2fc" is in SUPPORTED_DRIVERS list in /etc/fcoe/config.
+
+4. Start fcoe service. (service fcoe start). If Broadcom devices are present in
+the system, bnx2fc driver would automatically claim the interfaces, starts vlan
+discovery and log into the targets.
+
+5. "Symbolic Name" in 'fcoeadm -i' output would display if bnx2fc has claimed
+the interface.
+Eg:
+[root@bh2 ~]# fcoeadm -i
+    Description:      NetXtreme II BCM57712 10 Gigabit Ethernet
+    Revision:         01
+    Manufacturer:     Broadcom Corporation
+    Serial Number:    0010186FD558
+    Driver:           bnx2x 1.70.00-0
+    Number of Ports:  2
+
+        Symbolic Name:     bnx2fc v1.0.5 over eth5.4
+        OS Device Name:    host11
+        Node Name:         0x10000010186FD559
+        Port Name:         0x20000010186FD559
+        FabricName:        0x2001000DECB3B681
+        Speed:             10 Gbit
+        Supported Speed:   10 Gbit
+        MaxFrameSize:      2048
+        FC-ID (Port ID):   0x0F0377
+        State:             Online
+
+6. Verify the vlan discovery is performed by running ifconfig and notice
+<INTERFACE>.<VLAN>-fcoe interfaces are automatically created.
+
+Refer to fcoeadm manpage for more information on fcoeadm operations to
+create/destroy interfaces or to display lun/target information.
+
+NOTE:
+====
+** Broadcom FCoE capable devices implement a DCBX/LLDP client on-chip. Only one
+LLDP client is allowed per interface. For proper operation all host software
+based DCBX/LLDP clients (e.g. lldpad) must be disabled. To disable lldpad on a
+given interface, run the following command:
+
+lldptool set-lldp -i <interface_name> adminStatus=disabled
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 8975701..936699e 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -886,6 +886,12 @@
 		disable)
     power_save_controller - Reset HD-audio controller in power-saving mode
 		(default = on)
+    align_buffer_size - Force rounding of buffer/period sizes to multiples
+    		      of 128 bytes. This is more efficient in terms of memory
+		      access but isn't required by the HDA spec and prevents
+		      users from specifying exact period/buffer sizes.
+		      (default = on)
+    snoop	- Enable/disable snooping (default = on)
 
     This module supports multiple cards and autoprobe.
     
diff --git a/Documentation/sound/alsa/HD-Audio-Controls.txt b/Documentation/sound/alsa/HD-Audio-Controls.txt
index 1482035..e9621e3 100644
--- a/Documentation/sound/alsa/HD-Audio-Controls.txt
+++ b/Documentation/sound/alsa/HD-Audio-Controls.txt
@@ -98,3 +98,19 @@
 
 * Auto-Mute Mode
   See Reatek codecs.
+
+
+Analog codecs
+--------------
+
+* Channel Mode
+  This is an enum control to change the surround-channel setup,
+  appears only when the surround channels are available.
+  It gives the number of channels to be used, "2ch", "4ch" and "6ch".
+  According to the configuration, this also controls the
+  jack-retasking of multi-I/O jacks.
+
+* Independent HP
+  When this enum control is enabled, the headphone output is routed
+  from an individual stream (the third PCM such as hw:0,2) instead of
+  the primary stream.
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index d70c93b..4f34432 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -29,9 +29,6 @@
 
 ALC260
 ======
-  hp		HP machines
-  hp-3013	HP machines (3013-variant)
-  hp-dc7600	HP DC7600
   fujitsu	Fujitsu S7020
   acer		Acer TravelMate
   will		Will laptops (PB V7900)
@@ -46,15 +43,10 @@
 ALC262
 ======
   fujitsu	Fujitsu Laptop
-  hp-bpc	HP xw4400/6400/8400/9400 laptops
-  hp-bpc-d7000	HP BPC D7000
-  hp-tc-t5735	HP Thin Client T5735
-  hp-rp5700	HP RP5700
   benq		Benq ED8
   benq-t31	Benq T31
   hippo		Hippo (ATI) with jack detection, Sony UX-90s
   hippo_1	Hippo (Benq) with jack detection
-  sony-assamd	Sony ASSAMD
   toshiba-s06	Toshiba S06
   toshiba-rx1	Toshiba RX1
   tyan		Tyan Thunder n6650W (S2915-E)
@@ -66,43 +58,15 @@
 
 ALC267/268
 ==========
-  quanta-il1	Quanta IL1 mini-notebook
-  3stack	3-stack model
-  toshiba	Toshiba A205
-  acer		Acer laptops
-  acer-dmic	Acer laptops with digital-mic
-  acer-aspire	Acer Aspire One
-  dell		Dell OEM laptops (Vostro 1200)
-  zepto		Zepto laptops
-  test		for testing/debugging purpose, almost all controls can
-		adjusted.  Appearing only when compiled with
-		$CONFIG_SND_DEBUG=y
-  auto		auto-config reading BIOS (default)
+  N/A
 
 ALC269
 ======
-  basic		Basic preset
-  quanta	Quanta FL1
   laptop-amic	Laptops with analog-mic input
   laptop-dmic	Laptops with digital-mic input
-  fujitsu	FSC Amilo
-  lifebook	Fujitsu Lifebook S6420
-  auto		auto-config reading BIOS (default)
 
 ALC662/663/272
 ==============
-  3stack-dig	3-stack (2-channel) with SPDIF
-  3stack-6ch	 3-stack (6-channel)
-  3stack-6ch-dig 3-stack (6-channel) with SPDIF
-  5stack-dig	 5-stack with SPDIF
-  lenovo-101e	 Lenovo laptop
-  eeepc-p701	ASUS Eeepc P701
-  eeepc-ep20	ASUS Eeepc EP20
-  ecs		ECS/Foxconn mobo
-  m51va		ASUS M51VA
-  g71v		ASUS G71V
-  h13		ASUS H13
-  g50v		ASUS G50V
   asus-mode1	ASUS
   asus-mode2	ASUS
   asus-mode3	ASUS
@@ -111,15 +75,10 @@
   asus-mode6	ASUS
   asus-mode7	ASUS
   asus-mode8	ASUS
-  dell		Dell with ALC272
-  dell-zm1	Dell ZM1 with ALC272
-  samsung-nc10	Samsung NC10 mini notebook
-  auto		auto-config reading BIOS (default)
 
 ALC680
 ======
-  base		Base model (ASUS NX90)
-  auto		auto-config reading BIOS (default)
+  N/A
 
 ALC882/883/885/888/889
 ======================
@@ -175,28 +134,11 @@
 
 ALC861/660
 ==========
-  3stack	3-jack
-  3stack-dig	3-jack with SPDIF I/O
-  6stack-dig	6-jack with SPDIF I/O
-  3stack-660	3-jack (for ALC660)
-  uniwill-m31	Uniwill M31 laptop
-  toshiba	Toshiba laptop support
-  asus		Asus laptop support
-  asus-laptop	ASUS F2/F3 laptops
-  auto		auto-config reading BIOS (default)
+  N/A
 
 ALC861VD/660VD
 ==============
-  3stack	3-jack
-  3stack-dig	3-jack with SPDIF OUT
-  6stack-dig	6-jack with SPDIF OUT
-  3stack-660	3-jack (for ALC660VD)
-  3stack-660-digout 3-jack with SPDIF OUT (for ALC660VD)
-  lenovo	Lenovo 3000 C200
-  dallas	Dallas laptops
-  hp		HP TX1000
-  asus-v1s	ASUS V1Sn
-  auto		auto-config reading BIOS (default)
+  N/A
 
 CMI9880
 =======
@@ -289,7 +231,6 @@
   hp-dv6736	HP dv6736
   hp-f700	HP Compaq Presario F700
   ideapad	Lenovo IdeaPad laptop
-  lenovo-x200	Lenovo X200 laptop
   toshiba	Toshiba Satellite M300
 
 Conexant 5066
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index c82beb0..03e2771 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -447,7 +447,10 @@
 three numbers indicating the codec vendor-id (0x12345678 in the
 example), the codec subsystem-id (0xabcd1234) and the address (2) of
 the codec.  The rest patch entries are applied to this specified codec
-until another codec entry is given.
+until another codec entry is given.  Passing 0 or a negative number to
+the first or the second value will make the check of the corresponding
+field be skipped.  It'll be useful for really broken devices that don't
+initialize SSID properly.
 
 The `[model]` line allows to change the model name of the each codec.
 In the example above, it will be changed to model=auto.
@@ -491,7 +494,7 @@
 The hd-audio driver reads the file via request_firmware().  Thus,
 a patch file has to be located on the appropriate firmware path,
 typically, /lib/firmware.  For example, when you pass the option
-`patch=hda-init.fw`, the file /lib/firmware/hda-init-fw must be
+`patch=hda-init.fw`, the file /lib/firmware/hda-init.fw must be
 present.
 
 The patch module option is specific to each card instance, and you
@@ -524,6 +527,54 @@
 check the current value.  If it's non-zero, the feature is turned on.
 
 
+Tracepoints
+~~~~~~~~~~~
+The hd-audio driver gives a few basic tracepoints.
+`hda:hda_send_cmd` traces each CORB write while `hda:hda_get_response`
+traces the response from RIRB (only when read from the codec driver).
+`hda:hda_bus_reset` traces the bus-reset due to fatal error, etc,
+`hda:hda_unsol_event` traces the unsolicited events, and
+`hda:hda_power_down` and `hda:hda_power_up` trace the power down/up
+via power-saving behavior.
+
+Enabling all tracepoints can be done like
+------------------------------------------------------------------------
+  # echo 1 > /sys/kernel/debug/tracing/events/hda/enable
+------------------------------------------------------------------------
+then after some commands, you can traces from
+/sys/kernel/debug/tracing/trace file.  For example, when you want to
+trace what codec command is sent, enable the tracepoint like:
+------------------------------------------------------------------------
+  # cat /sys/kernel/debug/tracing/trace
+  # tracer: nop
+  #
+  #       TASK-PID    CPU#    TIMESTAMP  FUNCTION
+  #          | |       |          |         |
+         <...>-7807  [002] 105147.774889: hda_send_cmd: [0:0] val=e3a019
+         <...>-7807  [002] 105147.774893: hda_send_cmd: [0:0] val=e39019
+         <...>-7807  [002] 105147.999542: hda_send_cmd: [0:0] val=e3a01a
+         <...>-7807  [002] 105147.999543: hda_send_cmd: [0:0] val=e3901a
+         <...>-26764 [001] 349222.837143: hda_send_cmd: [0:0] val=e3a019
+         <...>-26764 [001] 349222.837148: hda_send_cmd: [0:0] val=e39019
+         <...>-26764 [001] 349223.058539: hda_send_cmd: [0:0] val=e3a01a
+         <...>-26764 [001] 349223.058541: hda_send_cmd: [0:0] val=e3901a
+------------------------------------------------------------------------
+Here `[0:0]` indicates the card number and the codec address, and
+`val` shows the value sent to the codec, respectively.  The value is
+a packed value, and you can decode it via hda-decode-verb program
+included in hda-emu package below.  For example, the value e3a019 is
+to set the left output-amp value to 25.
+------------------------------------------------------------------------
+  % hda-decode-verb 0xe3a019
+  raw value = 0x00e3a019
+  cid = 0, nid = 0x0e, verb = 0x3a0, parm = 0x19
+  raw value: verb = 0x3a0, parm = 0x19
+  verbname = set_amp_gain_mute
+  amp raw val = 0xa019
+  output, left, idx=0, mute=0, val=25
+------------------------------------------------------------------------
+
+
 Development Tree
 ~~~~~~~~~~~~~~~~
 The latest development codes for HD-audio are found on sound git tree:
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt
index 7869f14..bc7226e 100644
--- a/Documentation/x86/entry_64.txt
+++ b/Documentation/x86/entry_64.txt
@@ -27,9 +27,6 @@
    magically-generated functions that make their way to do_IRQ with
    the interrupt number as a parameter.
 
- - emulate_vsyscall: int 0xcc, a special non-ABI entry used by
-   vsyscall emulation.
-
  - APIC interrupts: Various special-purpose interrupts for things
    like TLB shootdown.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 27af9c9..07e5dbd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -529,6 +529,7 @@
 F:	drivers/infiniband/hw/amso1100/
 
 ANALOG DEVICES INC ASOC CODEC DRIVERS
+M:	Lars-Peter Clausen <lars@metafoo.de>
 L:	device-drivers-devel@blackfin.uclinux.org
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://wiki.analog.com/
@@ -4760,10 +4761,12 @@
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Grant Likely <grant.likely@secretlab.ca>
+M:	Rob Herring <rob.herring@calxeda.com>
 L:	devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
 W:	http://fdt.secretlab.ca
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained
+F:	Documentation/devicetree
 F:	drivers/of
 F:	include/linux/of*.h
 K:	of_get_property
@@ -6036,7 +6039,7 @@
 M:	Takashi Iwai <tiwai@suse.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://www.alsa-project.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
 T:	git git://git.alsa-project.org/alsa-kernel.git
 S:	Maintained
 F:	Documentation/sound/
@@ -7257,6 +7260,7 @@
 W:	http://opensource.wolfsonmicro.com/content/linux-drivers-wolfson-devices
 S:	Supported
 F:	Documentation/hwmon/wm83??
+F:	arch/arm/mach-s3c64xx/mach-crag6410*
 F:	drivers/leds/leds-wm83*.c
 F:	drivers/input/misc/wm831x-on.c
 F:	drivers/input/touchscreen/wm831x-ts.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8090cad..5ca86e7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
 	select GENERIC_IRQ_SHOW
+	select CPU_PM if (SUSPEND || CPU_IDLE)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -211,6 +212,19 @@
 	  this feature (eg, building a kernel for a single machine) and
 	  you need to shrink the kernel to the minimal size.
 
+config NEED_MACH_MEMORY_H
+	bool
+	help
+	  Select this when mach/memory.h is required to provide special
+	  definitions for this platform.  The need for mach/memory.h should
+	  be avoided when possible.
+
+config PHYS_OFFSET
+	hex "Physical address of main memory"
+	depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
+	help
+	  Please provide the physical address corresponding to the
+	  location of main memory in your system.
 
 config GENERIC_BUG
 	def_bool y
@@ -247,6 +261,7 @@
 	select GENERIC_CLOCKEVENTS
 	select PLAT_VERSATILE
 	select PLAT_VERSATILE_FPGA_IRQ
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for ARM's Integrator platform.
 
@@ -262,6 +277,7 @@
 	select PLAT_VERSATILE_CLCD
 	select ARM_TIMER_SP804
 	select GPIO_PL061 if GPIOLIB
+	select NEED_MACH_MEMORY_H
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -322,6 +338,7 @@
 	bool "Cirrus Logic CLPS711x/EP721x-based"
 	select CPU_ARM720T
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Cirrus Logic 711x/721x based boards.
 
@@ -361,6 +378,7 @@
 	select ISA
 	select NO_IOPORT
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  This is an evaluation board for the StrongARM processor available
 	  from Digital. It has limited hardware on-board, including an
@@ -376,6 +394,7 @@
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MEMORY_H
 	help
 	  This enables support for the Cirrus EP93xx series of CPUs.
 
@@ -385,6 +404,7 @@
 	select FOOTBRIDGE
 	select GENERIC_CLOCKEVENTS
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for systems based on the DC21285 companion chip
 	  ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
@@ -434,6 +454,7 @@
 	select PCI
 	select ARCH_SUPPORTS_MSI
 	select VMSPLIT_1G
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IOP13XX (XScale) family of processors.
 
@@ -464,6 +485,7 @@
 	select CPU_XSC3
  	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IXP23xx (XScale) family of processors.
 
@@ -473,6 +495,7 @@
 	select CPU_XSCALE
 	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Intel's IXP2400/2800 (XScale) family of processors.
 
@@ -565,6 +588,7 @@
 	select CPU_ARM922T
 	select ARCH_REQUIRE_GPIOLIB
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
 	  System-on-Chip devices.
@@ -657,6 +681,7 @@
 	select SPARSE_IRQ
 	select MULTI_IRQ_HANDLER
 	select PM_GENERIC_DOMAINS if PM
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for Renesas's SH-Mobile and R-Mobile ARM platforms.
 
@@ -672,6 +697,7 @@
 	select ARCH_SPARSEMEM_ENABLE
 	select ARCH_USES_GETTIMEOFFSET
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -691,6 +717,7 @@
 	select TICK_ONESHOT
 	select ARCH_REQUIRE_GPIOLIB
 	select HAVE_IDE
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for StrongARM 11x0 based boards.
 
@@ -782,6 +809,7 @@
 	select HAVE_S3C2410_I2C if I2C
 	select HAVE_S3C_RTC if RTC_CLASS
 	select HAVE_S3C2410_WATCHDOG if WATCHDOG
+	select NEED_MACH_MEMORY_H
 	help
 	  Samsung S5PV210/S5PC110 series based systems
 
@@ -798,6 +826,7 @@
 	select HAVE_S3C_RTC if RTC_CLASS
 	select HAVE_S3C2410_I2C if I2C
 	select HAVE_S3C2410_WATCHDOG if WATCHDOG
+	select NEED_MACH_MEMORY_H
 	help
 	  Samsung EXYNOS4 series based systems
 
@@ -809,6 +838,7 @@
 	select ZONE_DMA
 	select PCI
 	select ARCH_USES_GETTIMEOFFSET
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for the StrongARM based Digital DNARD machine, also known
 	  as "Shark" (<http://www.shark-linux.de/shark.html>).
@@ -836,6 +866,8 @@
 	select CLKDEV_LOOKUP
 	select HAVE_MACH_CLKDEV
 	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	select NEED_MACH_MEMORY_H
 	help
 	  Support for ST-Ericsson U300 series mobile platforms.
 
@@ -1834,6 +1866,38 @@
 
 endchoice
 
+config ARM_APPENDED_DTB
+	bool "Use appended device tree blob to zImage (EXPERIMENTAL)"
+	depends on OF && !ZBOOT_ROM && EXPERIMENTAL
+	help
+	  With this option, the boot code will look for a device tree binary
+	  (DTB) appended to zImage
+	  (e.g. cat zImage <filename>.dtb > zImage_w_dtb).
+
+	  This is meant as a backward compatibility convenience for those
+	  systems with a bootloader that can't be upgraded to accommodate
+	  the documented boot protocol using a device tree.
+
+	  Beware that there is very little in terms of protection against
+	  this option being confused by leftover garbage in memory that might
+	  look like a DTB header after a reboot if no actual DTB is appended
+	  to zImage.  Do not leave this option active in a production kernel
+	  if you don't intend to always append a DTB.  Proper passing of the
+	  location into r2 of a bootloader provided DTB is always preferable
+	  to this option.
+
+config ARM_ATAG_DTB_COMPAT
+	bool "Supplement the appended DTB with traditional ATAG information"
+	depends on ARM_APPENDED_DTB
+	help
+	  Some old bootloaders can't be updated to a DTB capable one, yet
+	  they provide ATAGs with memory configuration, the ramdisk address,
+	  the kernel cmdline string, etc.  Such information is dynamically
+	  provided by the bootloader and can't always be stored in a static
+	  DTB.  To allow a device tree enabled kernel to be used with such
+	  bootloaders, this option allows zImage to extract the information
+	  from the ATAG list and store it at run time into the appended DTB.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index df3eb3c..f283938 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -158,4 +158,10 @@
 	  The uncompressor code port configuration is now handled
 	  by CONFIG_S3C_LOWLEVEL_UART_PORT.
 
+config ARM_KPROBES_TEST
+	tristate "Kprobes test module"
+	depends on KPROBES && MODULES
+	help
+	  Perform tests of kprobes API and instruction set simulation.
+
 endmenu
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index c602896..e0936a1 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -5,3 +5,12 @@
 piggy.lzma
 vmlinux
 vmlinux.lds
+
+# borrowed libfdt files
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index a6b30b3..21f56ff 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -26,6 +26,10 @@
 OBJS	+= misc.o decompress.o
 FONTC	= $(srctree)/drivers/video/console/font_acorn_8x8.c
 
+# string library code (-Os is enforced to keep it much smaller)
+OBJS		+= string.o
+CFLAGS_string.o	:= -Os
+
 #
 # Architecture dependencies
 #
@@ -89,21 +93,41 @@
 suffix_$(CONFIG_KERNEL_LZO)  = lzo
 suffix_$(CONFIG_KERNEL_LZMA) = lzma
 
+# Borrowed libfdt files for the ATAG compatibility mode
+
+libfdt		:= fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+libfdt_hdrs	:= fdt.h libfdt.h libfdt_internal.h
+
+libfdt_objs	:= $(addsuffix .o, $(basename $(libfdt)))
+
+$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
+	$(call cmd,shipped)
+
+$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \
+	$(addprefix $(obj)/,$(libfdt_hdrs))
+
+ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+OBJS	+= $(libfdt_objs) atags_to_fdt.o
+endif
+
 targets       := vmlinux vmlinux.lds \
 		 piggy.$(suffix_y) piggy.$(suffix_y).o \
-		 font.o font.c head.o misc.o $(OBJS)
+		 lib1funcs.o lib1funcs.S font.o font.c head.o misc.o $(OBJS)
 
 # Make sure files are removed during clean
-extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S
+extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S $(libfdt) $(libfdt_hdrs)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-ccflags-y := -fpic -fno-builtin
+ccflags-y := -fpic -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all
 
+# Supply kernel BSS size to the decompressor via a linker symbol.
+KBSS_SZ = $(shell size $(obj)/../../../../vmlinux | awk 'END{print $$3}')
+LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
 LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
@@ -123,7 +147,7 @@
 # For __aeabi_uidivmod
 lib1funcs = $(obj)/lib1funcs.o
 
-$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
+$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S
 	$(call cmd,shipped)
 
 # We need to prevent any GOTOFF relocs being used with references
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
new file mode 100644
index 0000000..6ce11c4
--- /dev/null
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -0,0 +1,97 @@
+#include <asm/setup.h>
+#include <libfdt.h>
+
+static int node_offset(void *fdt, const char *node_path)
+{
+	int offset = fdt_path_offset(fdt, node_path);
+	if (offset == -FDT_ERR_NOTFOUND)
+		offset = fdt_add_subnode(fdt, 0, node_path);
+	return offset;
+}
+
+static int setprop(void *fdt, const char *node_path, const char *property,
+		   uint32_t *val_array, int size)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop(fdt, offset, property, val_array, size);
+}
+
+static int setprop_string(void *fdt, const char *node_path,
+			  const char *property, const char *string)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_string(fdt, offset, property, string);
+}
+
+static int setprop_cell(void *fdt, const char *node_path,
+			const char *property, uint32_t val)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_cell(fdt, offset, property, val);
+}
+
+/*
+ * Convert and fold provided ATAGs into the provided FDT.
+ *
+ * REturn values:
+ *    = 0 -> pretend success
+ *    = 1 -> bad ATAG (may retry with another possible ATAG pointer)
+ *    < 0 -> error from libfdt
+ */
+int atags_to_fdt(void *atag_list, void *fdt, int total_space)
+{
+	struct tag *atag = atag_list;
+	uint32_t mem_reg_property[2 * NR_BANKS];
+	int memcount = 0;
+	int ret;
+
+	/* make sure we've got an aligned pointer */
+	if ((u32)atag_list & 0x3)
+		return 1;
+
+	/* if we get a DTB here we're done already */
+	if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC))
+	       return 0;
+
+	/* validate the ATAG */
+	if (atag->hdr.tag != ATAG_CORE ||
+	    (atag->hdr.size != tag_size(tag_core) &&
+	     atag->hdr.size != 2))
+		return 1;
+
+	/* let's give it all the room it could need */
+	ret = fdt_open_into(fdt, fdt, total_space);
+	if (ret < 0)
+		return ret;
+
+	for_each_tag(atag, atag_list) {
+		if (atag->hdr.tag == ATAG_CMDLINE) {
+			setprop_string(fdt, "/chosen", "bootargs",
+					atag->u.cmdline.cmdline);
+		} else if (atag->hdr.tag == ATAG_MEM) {
+			if (memcount >= sizeof(mem_reg_property)/4)
+				continue;
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+		} else if (atag->hdr.tag == ATAG_INITRD2) {
+			uint32_t initrd_start, initrd_size;
+			initrd_start = atag->u.initrd.start;
+			initrd_size = atag->u.initrd.size;
+			setprop_cell(fdt, "/chosen", "linux,initrd-start",
+					initrd_start);
+			setprop_cell(fdt, "/chosen", "linux,initrd-end",
+					initrd_start + initrd_size);
+		}
+	}
+
+	if (memcount)
+		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+
+	return fdt_pack(fdt);
+}
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index e95a598..c2effc9 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -216,6 +216,104 @@
 		mov	r10, r6
 #endif
 
+		mov	r5, #0			@ init dtb size to 0
+#ifdef CONFIG_ARM_APPENDED_DTB
+/*
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = final kernel address
+ *   r5  = appended dtb size (still unknown)
+ *   r6  = _edata
+ *   r7  = architecture ID
+ *   r8  = atags/device tree pointer
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ *
+ * if there are device trees (dtb) appended to zImage, advance r10 so that the
+ * dtb data will get relocated along with the kernel if necessary.
+ */
+
+		ldr	lr, [r6, #0]
+#ifndef __ARMEB__
+		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
+#else
+		ldr	r1, =0xd00dfeed
+#endif
+		cmp	lr, r1
+		bne	dtb_check_done		@ not found
+
+#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
+		/*
+		 * OK... Let's do some funky business here.
+		 * If we do have a DTB appended to zImage, and we do have
+		 * an ATAG list around, we want the later to be translated
+		 * and folded into the former here.  To be on the safe side,
+		 * let's temporarily move  the stack away into the malloc
+		 * area.  No GOT fixup has occurred yet, but none of the
+		 * code we're about to call uses any global variable.
+		*/
+		add	sp, sp, #0x10000
+		stmfd	sp!, {r0-r3, ip, lr}
+		mov	r0, r8
+		mov	r1, r6
+		sub	r2, sp, r6
+		bl	atags_to_fdt
+
+		/*
+		 * If returned value is 1, there is no ATAG at the location
+		 * pointed by r8.  Try the typical 0x100 offset from start
+		 * of RAM and hope for the best.
+		 */
+		cmp	r0, #1
+		sub	r0, r4, #TEXT_OFFSET
+		add	r0, r0, #0x100
+		mov	r1, r6
+		sub	r2, sp, r6
+		blne	atags_to_fdt
+
+		ldmfd	sp!, {r0-r3, ip, lr}
+		sub	sp, sp, #0x10000
+#endif
+
+		mov	r8, r6			@ use the appended device tree
+
+		/*
+		 * Make sure that the DTB doesn't end up in the final
+		 * kernel's .bss area. To do so, we adjust the decompressed
+		 * kernel size to compensate if that .bss size is larger
+		 * than the relocated code.
+		 */
+		ldr	r5, =_kernel_bss_size
+		adr	r1, wont_overwrite
+		sub	r1, r6, r1
+		subs	r1, r5, r1
+		addhi	r9, r9, r1
+
+		/* Get the dtb's size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert r5 (dtb size) to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+
+		/* relocate some pointers past the appended dtb */
+		add	r6, r6, r5
+		add	r10, r10, r5
+		add	sp, sp, r5
+dtb_check_done:
+#endif
+
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
@@ -223,15 +321,14 @@
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= current position (pc) -> OK
+ *   r4 + image length <= address of wont_overwrite -> OK
  */
 		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-   ARM(		cmp	r10, pc		)
- THUMB(		mov	lr, pc		)
- THUMB(		cmp	r10, lr		)
+		adr	r9, wont_overwrite
+		cmp	r10, r9
 		bls	wont_overwrite
 
 /*
@@ -285,14 +382,16 @@
  *   r2  = BSS start
  *   r3  = BSS end
  *   r4  = kernel execution address
+ *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
  *   r11 = GOT start
  *   r12 = GOT end
  *   sp  = stack pointer
  */
-		teq	r0, #0
+		orrs	r1, r0, r5
 		beq	not_relocated
+
 		add	r11, r11, r0
 		add	r12, r12, r0
 
@@ -307,12 +406,21 @@
 
 		/*
 		 * Relocate all entries in the GOT table.
+		 * Bump bss entries to _edata + dtb size
 		 */
 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		add	r1, r1, r0		@ table.  This fixes up the
-		str	r1, [r11], #4		@ C references.
+		add	r1, r1, r0		@ This fixes up C references
+		cmp	r1, r2			@ if entry >= bss_start &&
+		cmphs	r3, r1			@       bss_end > entry
+		addhi	r1, r1, r5		@    entry += dtb size
+		str	r1, [r11], #4		@ next entry
 		cmp	r11, r12
 		blo	1b
+
+		/* bump our bss pointers too */
+		add	r2, r2, r5
+		add	r3, r3, r5
+
 #else
 
 		/*
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
new file mode 100644
index 0000000..1f4e718
--- /dev/null
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -0,0 +1,15 @@
+#ifndef _ARM_LIBFDT_ENV_H
+#define _ARM_LIBFDT_ENV_H
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
+
+#endif
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 832d372..8e2a8fc 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -18,14 +18,9 @@
 
 unsigned int __machine_arch_type;
 
-#define _LINUX_STRING_H_
-
 #include <linux/compiler.h>	/* for inline */
-#include <linux/types.h>	/* for size_t */
-#include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 #include <linux/linkage.h>
-#include <asm/string.h>
-
 
 static void putstr(const char *ptr);
 extern void error(char *x);
@@ -101,41 +96,6 @@
 	flush();
 }
 
-
-void *memcpy(void *__dest, __const void *__src, size_t __n)
-{
-	int i = 0;
-	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
-
-	for (i = __n >> 3; i > 0; i--) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 2) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 1) {
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1)
-		*d++ = *s++;
-
-	return __dest;
-}
-
 /*
  * gzip declarations
  */
diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
new file mode 100644
index 0000000..36e53ef
--- /dev/null
+++ b/arch/arm/boot/compressed/string.c
@@ -0,0 +1,127 @@
+/*
+ * arch/arm/boot/compressed/string.c
+ *
+ * Small subset of simple string routines
+ */
+
+#include <linux/string.h>
+
+void *memcpy(void *__dest, __const void *__src, size_t __n)
+{
+	int i = 0;
+	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+
+	for (i = __n >> 3; i > 0; i--) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 2) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1)
+		*d++ = *s++;
+
+	return __dest;
+}
+
+void *memmove(void *__dest, __const void *__src, size_t count)
+{
+	unsigned char *d = __dest;
+	const unsigned char *s = __src;
+
+	if (__dest == __src)
+		return __dest;
+
+	if (__dest < __src)
+		return memcpy(__dest, __src, count);
+
+	while (count--)
+		d[count] = s[count];
+	return __dest;
+}
+
+size_t strlen(const char *s)
+{
+	const char *sc = s;
+
+	while (*sc != '\0')
+		sc++;
+	return sc - s;
+}
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1 = cs, *su2 = ct, *end = su1 + count;
+	int res = 0;
+
+	while (su1 < end) {
+		res = *su1++ - *su2++;
+		if (res)
+			break;
+	}
+	return res;
+}
+
+int strcmp(const char *cs, const char *ct)
+{
+	unsigned char c1, c2;
+	int res = 0;
+
+	do {
+		c1 = *cs++;
+		c2 = *ct++;
+		res = c1 - c2;
+		if (res)
+			break;
+	} while (c1);
+	return res;
+}
+
+void *memchr(const void *s, int c, size_t count)
+{
+	const unsigned char *p = s;
+
+	while (count--)
+		if ((unsigned char)c == *p++)
+			return (void *)(p - 1);
+	return NULL;
+}
+
+char *strchr(const char *s, int c)
+{
+	while (*s != (char)c)
+		if (*s++ == '\0')
+			return NULL;
+	return (char *)s;
+}
+
+#undef memset
+
+void *memset(void *s, int c, size_t count)
+{
+	char *xs = s;
+	while (count--)
+		*xs++ = c;
+	return s;
+}
+
+void __memzero(void *s, size_t count)
+{
+	memset(s, 0, count);
+}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 4e72883..4919f2a 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -51,6 +51,10 @@
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
+
+  /* ensure the zImage file size is always a multiple of 64 bits */
+  /* (without a dummy byte, ld just ignores the empty section) */
+  .pad			: { BYTE(0); . = ALIGN(8); }
   _edata = .;
 
   . = BSS_START;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index bdbb3f7..a8fc6b2 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -26,8 +26,12 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/smp.h>
+#include <linux/cpu_pm.h>
 #include <linux/cpumask.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/mach/irq.h>
@@ -262,6 +266,7 @@
 	u32 cpumask;
 	void __iomem *base = gic->dist_base;
 	u32 cpu = 0;
+	u32 nrppis = 0, ppi_base = 0;
 
 #ifdef CONFIG_SMP
 	cpu = cpu_logical_map(smp_processor_id());
@@ -282,6 +287,25 @@
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 
+	gic->gic_irqs = gic_irqs;
+
+	/*
+	 * Nobody would be insane enough to use PPIs on a secondary
+	 * GIC, right?
+	 */
+	if (gic == &gic_data[0]) {
+		nrppis = (32 - irq_start) & 31;
+
+		/* The GIC only supports up to 16 PPIs. */
+		if (nrppis > 16)
+			BUG();
+
+		ppi_base = gic->irq_offset + 32 - nrppis;
+	}
+
+	pr_info("Configuring GIC with %d sources (%d PPIs)\n",
+		gic_irqs, (gic == &gic_data[0]) ? nrppis : 0);
+
 	/*
 	 * Set all global interrupts to be level triggered, active low.
 	 */
@@ -317,7 +341,17 @@
 	/*
 	 * Setup the Linux IRQ subsystem.
 	 */
-	for (i = irq_start; i < irq_limit; i++) {
+	for (i = 0; i < nrppis; i++) {
+		int ppi = i + ppi_base;
+
+		irq_set_percpu_devid(ppi);
+		irq_set_chip_and_handler(ppi, &gic_chip,
+					 handle_percpu_devid_irq);
+		irq_set_chip_data(ppi, gic);
+		set_irq_flags(ppi, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+
+	for (i = irq_start + nrppis; i < irq_limit; i++) {
 		irq_set_chip_and_handler(i, &gic_chip, handle_fasteoi_irq);
 		irq_set_chip_data(i, gic);
 		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
@@ -349,6 +383,189 @@
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+#ifdef CONFIG_CPU_PM
+/*
+ * Saves the GIC distributor registers during suspend or idle.  Must be called
+ * with interrupts disabled but before powering down the GIC.  After calling
+ * this function, no interrupts will be delivered by the GIC, and another
+ * platform-specific wakeup source must be enabled.
+ */
+static void gic_dist_save(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	void __iomem *dist_base;
+	int i;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		gic_data[gic_nr].saved_spi_conf[i] =
+			readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		gic_data[gic_nr].saved_spi_target[i] =
+			readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		gic_data[gic_nr].saved_spi_enable[i] =
+			readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+}
+
+/*
+ * Restores the GIC distributor registers during resume or when coming out of
+ * idle.  Must be called before enabling interrupts.  If a level interrupt
+ * that occured while the GIC was suspended is still present, it will be
+ * handled normally, but any edge interrupts that occured will not be seen by
+ * the GIC and need to be handled by the platform-specific wakeup source.
+ */
+static void gic_dist_restore(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	unsigned int i;
+	void __iomem *dist_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
+			dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(0xa0a0a0a0,
+			dist_base + GIC_DIST_PRI + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
+			dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
+			dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+}
+
+static void gic_cpu_save(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+}
+
+static void gic_cpu_restore(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
+		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+
+	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+}
+
+static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
+{
+	int i;
+
+	for (i = 0; i < MAX_GIC_NR; i++) {
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			gic_cpu_save(i);
+			break;
+		case CPU_PM_ENTER_FAILED:
+		case CPU_PM_EXIT:
+			gic_cpu_restore(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER:
+			gic_dist_save(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER_FAILED:
+		case CPU_CLUSTER_PM_EXIT:
+			gic_dist_restore(i);
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gic_notifier_block = {
+	.notifier_call = gic_notifier,
+};
+
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+	gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_enable);
+
+	gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_conf);
+
+	cpu_pm_register_notifier(&gic_notifier_block);
+}
+#else
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+}
+#endif
+
 void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	void __iomem *dist_base, void __iomem *cpu_base)
 {
@@ -364,8 +581,10 @@
 	if (gic_nr == 0)
 		gic_cpu_base_addr = cpu_base;
 
+	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic, irq_start);
 	gic_cpu_init(gic);
+	gic_pm_init(gic);
 }
 
 void __cpuinit gic_secondary_init(unsigned int gic_nr)
@@ -375,16 +594,6 @@
 	gic_cpu_init(&gic_data[gic_nr]);
 }
 
-void __cpuinit gic_enable_ppi(unsigned int irq)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	irq_set_status_flags(irq, IRQ_NOPROBE);
-	gic_unmask_irq(irq_get_irq_data(irq));
-	local_irq_restore(flags);
-}
-
 #ifdef CONFIG_SMP
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 0569de6..61691cd 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -718,6 +718,10 @@
 		goto err_free;
 	}
 
+	ret = clk_prepare(sachip->clk);
+	if (ret)
+		goto err_clkput;
+
 	spin_lock_init(&sachip->lock);
 
 	sachip->dev = me;
@@ -733,7 +737,7 @@
 	sachip->base = ioremap(mem->start, PAGE_SIZE * 2);
 	if (!sachip->base) {
 		ret = -ENOMEM;
-		goto err_clkput;
+		goto err_clk_unprep;
 	}
 
 	/*
@@ -809,6 +813,8 @@
 
  err_unmap:
 	iounmap(sachip->base);
+ err_clk_unprep:
+	clk_unprepare(sachip->clk);
  err_clkput:
 	clk_put(sachip->clk);
  err_free:
@@ -835,6 +841,7 @@
 	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
 
 	clk_disable(sachip->clk);
+	clk_unprepare(sachip->clk);
 
 	if (sachip->irq != NO_IRQ) {
 		irq_set_chained_handler(sachip->irq, NULL);
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index a07b0e7..1cde34a 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -12,11 +12,11 @@
  */
 
 #include <linux/device.h>
+#include <linux/gpio.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <asm/gpio.h>
 #include <asm/hardware/scoop.h>
 
 /* PCMCIA to Scoop linkage
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index 41df478..2393b5b 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -41,9 +41,17 @@
 		return PTR_ERR(clk);
 	}
 
+	err = clk_prepare(clk);
+	if (err) {
+		pr_err("sp804: %s clock failed to prepare: %d\n", name, err);
+		clk_put(clk);
+		return err;
+	}
+
 	err = clk_enable(clk);
 	if (err) {
 		pr_err("sp804: %s clock failed to enable: %d\n", name, err);
+		clk_unprepare(clk);
 		clk_put(clk);
 		return err;
 	}
@@ -52,6 +60,7 @@
 	if (rate < 0) {
 		pr_err("sp804: %s clock failed to get rate: %ld\n", name, rate);
 		clk_disable(clk);
+		clk_unprepare(clk);
 		clk_put(clk);
 	}
 
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 28b7ee8..cb3b7c9 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -205,6 +205,13 @@
 int dma_mmap_writecombine(struct device *, struct vm_area_struct *,
 		void *, dma_addr_t, size_t);
 
+/*
+ * This can be called during boot to increase the size of the consistent
+ * DMA region above it's default value of 2MB. It must be called before the
+ * memory allocator is initialised, i.e. before any core_initcall.
+ */
+extern void __init init_consistent_dma_size(unsigned long size);
+
 
 #ifdef CONFIG_DMABOUNCE
 /*
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 2f1e209..88d6181 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -25,13 +25,6 @@
 	movne	r1, sp
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
-
-#ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r2, r6, lr
-	movne	r0, sp
-	adrne	lr, BSYM(1b)
-	bne	do_local_timer
-#endif
 #endif
 9997:
 	.endm
diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h
index 166a7a3..11ad0bf 100644
--- a/arch/arm/include/asm/gpio.h
+++ b/arch/arm/include/asm/gpio.h
@@ -4,4 +4,23 @@
 /* not all ARM platforms necessarily support this API ... */
 #include <mach/gpio.h>
 
+#ifndef __ARM_GPIOLIB_COMPLEX
+/* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */
+#include <asm-generic/gpio.h>
+
+/* The trivial gpiolib dispatchers */
+#define gpio_get_value  __gpio_get_value
+#define gpio_set_value  __gpio_set_value
+#define gpio_cansleep   __gpio_cansleep
+#endif
+
+/*
+ * Provide a default gpio_to_irq() which should satisfy every case.
+ * However, some platforms want to do this differently, so allow them
+ * to override it.
+ */
+#ifndef gpio_to_irq
+#define gpio_to_irq	__gpio_to_irq
+#endif
+
 #endif /* _ARCH_ARM_GPIO_H */
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 89ad180..ddf07a9 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -9,9 +9,6 @@
 
 typedef struct {
 	unsigned int __softirq_pending;
-#ifdef CONFIG_LOCAL_TIMERS
-	unsigned int local_timer_irqs;
-#endif
 #ifdef CONFIG_SMP
 	unsigned int ipi_irqs[NR_IPI];
 #endif
diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S
index c115b82..74ebc80 100644
--- a/arch/arm/include/asm/hardware/entry-macro-gic.S
+++ b/arch/arm/include/asm/hardware/entry-macro-gic.S
@@ -22,15 +22,11 @@
  * interrupt controller spec.  To wit:
  *
  * Interrupts 0-15 are IPI
- * 16-28 are reserved
- * 29-31 are local.  We allow 30 to be used for the watchdog.
+ * 16-31 are local.  We allow 30 to be used for the watchdog.
  * 32-1020 are global
  * 1021-1022 are reserved
  * 1023 is "spurious" (no interrupt)
  *
- * For now, we ignore all local interrupts so only return an interrupt if it's
- * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
- *
  * A simple read from the controller will tell us the number of the highest
  * priority enabled interrupt.  We then just need to check whether it is in the
  * valid range for an IRQ (30-1020 inclusive).
@@ -43,7 +39,7 @@
 
 	ldr	\tmp, =1021
 	bic     \irqnr, \irqstat, #0x1c00
-	cmp     \irqnr, #29
+	cmp     \irqnr, #15
 	cmpcc	\irqnr, \irqnr
 	cmpne	\irqnr, \tmp
 	cmpcs	\irqnr, \irqnr
@@ -62,14 +58,3 @@
 	strcc	\irqstat, [\base, #GIC_CPU_EOI]
 	cmpcs	\irqnr, \irqnr
 	.endm
-
-/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	mov 	\tmp, #0
-	cmp	\irqnr, #29
-	moveq	\tmp, #1
-	streq	\irqstat, [\base, #GIC_CPU_EOI]
-	cmp	\tmp, #0
-	.endm
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 435d3f8..14867e1 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -40,12 +40,19 @@
 void gic_secondary_init(unsigned int);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq);
-void gic_enable_ppi(unsigned int);
 
 struct gic_chip_data {
 	unsigned int irq_offset;
 	void __iomem *dist_base;
 	void __iomem *cpu_base;
+#ifdef CONFIG_CPU_PM
+	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
+	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
+	u32 __percpu *saved_ppi_enable;
+	u32 __percpu *saved_ppi_conf;
+#endif
+	unsigned int gic_irqs;
 };
 #endif
 
diff --git a/arch/arm/include/asm/hardware/iop3xx-gpio.h b/arch/arm/include/asm/hardware/iop3xx-gpio.h
index b69d972..9eda7dc 100644
--- a/arch/arm/include/asm/hardware/iop3xx-gpio.h
+++ b/arch/arm/include/asm/hardware/iop3xx-gpio.h
@@ -28,6 +28,8 @@
 #include <mach/hardware.h>
 #include <asm-generic/gpio.h>
 
+#define __ARM_GPIOLIB_COMPLEX
+
 #define IOP3XX_N_GPIOS	8
 
 static inline int gpio_get_value(unsigned gpio)
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index f389b27..c190bc9 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -50,6 +50,7 @@
 #define ARM_DEBUG_ARCH_V6_1	2
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
+#define ARM_DEBUG_ARCH_V7_1	5
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
@@ -57,6 +58,7 @@
 /* Watchpoints */
 #define ARM_BREAKPOINT_LOAD	1
 #define ARM_BREAKPOINT_STORE	2
+#define ARM_FSR_ACCESS_MASK	(1 << 11)
 
 /* Privilege Levels */
 #define ARM_BREAKPOINT_PRIV	1
diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h
index 6fd955d..c6a1842 100644
--- a/arch/arm/include/asm/localtimer.h
+++ b/arch/arm/include/asm/localtimer.h
@@ -11,6 +11,7 @@
 #define __ASM_ARM_LOCALTIMER_H
 
 #include <linux/errno.h>
+#include <linux/interrupt.h>
 
 struct clock_event_device;
 
@@ -19,31 +20,20 @@
  */
 void percpu_timer_setup(void);
 
-/*
- * Called from assembly, this is the local timer IRQ handler
- */
-asmlinkage void do_local_timer(struct pt_regs *);
-
-/*
- * Called from C code
- */
-void handle_local_timer(struct pt_regs *);
-
 #ifdef CONFIG_LOCAL_TIMERS
 
 #ifdef CONFIG_HAVE_ARM_TWD
 
 #include "smp_twd.h"
 
-#define local_timer_ack()	twd_timer_ack()
+#define local_timer_stop(c)	twd_timer_stop((c))
 
 #else
 
 /*
- * Platform provides this to acknowledge a local timer IRQ.
- * Returns true if the local timer IRQ is to be processed.
+ * Stop the local timer
  */
-int local_timer_ack(void);
+void local_timer_stop(struct clock_event_device *);
 
 #endif
 
@@ -58,6 +48,10 @@
 {
 	return -ENXIO;
 }
+
+static inline void local_timer_stop(struct clock_event_device *evt)
+{
+}
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index c569998..7d19425 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -17,7 +17,7 @@
 struct machine_desc {
 	unsigned int		nr;		/* architecture number	*/
 	const char		*name;		/* architecture name	*/
-	unsigned long		boot_params;	/* tagged list		*/
+	unsigned long		atag_offset;	/* tagged list (relative) */
 	const char		**dt_compat;	/* array of device tree
 						 * 'compatible' strings	*/
 
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index d2fedb5..b36f365 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -29,6 +29,7 @@
 #define MT_MEMORY_NONCACHED	11
 #define MT_MEMORY_DTCM		12
 #define MT_MEMORY_ITCM		13
+#define MT_MEMORY_SO		14
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 441fc4f..a8997d7 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,9 +16,12 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <mach/memory.h>
 #include <asm/sizes.h>
 
+#ifdef CONFIG_NEED_MACH_MEMORY_H
+#include <mach/memory.h>
+#endif
+
 /*
  * Allow for constants defined here to be used from assembly code
  * by prepending the UL suffix only with actual C code compilation.
@@ -77,16 +80,7 @@
  */
 #define IOREMAP_MAX_ORDER	24
 
-/*
- * Size of DMA-consistent memory region.  Must be multiple of 2M,
- * between 2MB and 14MB inclusive.
- */
-#ifndef CONSISTENT_DMA_SIZE
-#define CONSISTENT_DMA_SIZE 	SZ_2M
-#endif
-
 #define CONSISTENT_END		(0xffe00000UL)
-#define CONSISTENT_BASE		(CONSISTENT_END - CONSISTENT_DMA_SIZE)
 
 #else /* CONFIG_MMU */
 
@@ -193,7 +187,11 @@
 #endif
 
 #ifndef PHYS_OFFSET
+#ifdef PLAT_PHYS_OFFSET
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
+#else
+#define PHYS_OFFSET	UL(CONFIG_PHYS_OFFSET)
+#endif
 #endif
 
 /*
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 8ade184..9451dce 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -101,6 +101,9 @@
 #define pgprot_writecombine(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
 
+#define pgprot_stronglyordered(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index b7e82c4..71d99b8 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -13,7 +13,12 @@
 #define __ARM_PMU_H__
 
 #include <linux/interrupt.h>
+#include <linux/perf_event.h>
 
+/*
+ * Types of PMUs that can be accessed directly and require mutual
+ * exclusion between profiling tools.
+ */
 enum arm_pmu_type {
 	ARM_PMU_DEVICE_CPU	= 0,
 	ARM_NUM_PMU_DEVICES,
@@ -37,21 +42,17 @@
  * reserve_pmu() - reserve the hardware performance counters
  *
  * Reserve the hardware performance counters in the system for exclusive use.
- * The platform_device for the system is returned on success, ERR_PTR()
- * encoded error on failure.
+ * Returns 0 on success or -EBUSY if the lock is already held.
  */
-extern struct platform_device *
+extern int
 reserve_pmu(enum arm_pmu_type type);
 
 /**
  * release_pmu() - Relinquish control of the performance counters
  *
  * Release the performance counters and allow someone else to use them.
- * Callers must have disabled the counters and released IRQs before calling
- * this. The platform_device returned from reserve_pmu() must be passed as
- * a cookie.
  */
-extern int
+extern void
 release_pmu(enum arm_pmu_type type);
 
 /**
@@ -68,24 +69,78 @@
 
 #include <linux/err.h>
 
-static inline struct platform_device *
+static inline int
 reserve_pmu(enum arm_pmu_type type)
 {
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int
-release_pmu(enum arm_pmu_type type)
-{
 	return -ENODEV;
 }
 
-static inline int
-init_pmu(enum arm_pmu_type type)
-{
-	return -ENODEV;
-}
+static inline void
+release_pmu(enum arm_pmu_type type)	{ }
 
 #endif /* CONFIG_CPU_HAS_PMU */
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	**events;
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long           *used_mask;
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+};
+
+struct arm_pmu {
+	struct pmu	pmu;
+	enum arm_perf_pmu_ids id;
+	enum arm_pmu_type type;
+	cpumask_t	active_irqs;
+	const char	*name;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
+					 struct hw_perf_event *hwc);
+	int		(*set_event_filter)(struct hw_perf_event *evt,
+					    struct perf_event_attr *attr);
+	u32		(*read_counter)(int idx);
+	void		(*write_counter)(int idx, u32 val);
+	void		(*start)(void);
+	void		(*stop)(void);
+	void		(*reset)(void *);
+	int		(*map_event)(struct perf_event *event);
+	int		num_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+	u64		max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	*(*get_hw_events)(void);
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+
+u64 armpmu_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx, int overflow);
+
+int armpmu_event_set_period(struct perf_event *event,
+			    struct hw_perf_event *hwc,
+			    int idx);
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 #endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 633d1cb..9e92cb2 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -81,6 +81,10 @@
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+
+/* These three are private to arch/arm/kernel/suspend.c */
+extern void cpu_do_suspend(void *);
+extern void cpu_do_resume(void *);
 #else
 #define cpu_proc_init			processor._proc_init
 #define cpu_proc_fin			processor._proc_fin
@@ -89,6 +93,10 @@
 #define cpu_dcache_clean_area		processor.dcache_clean_area
 #define cpu_set_pte_ext			processor.set_pte_ext
 #define cpu_do_switch_mm		processor.switch_mm
+
+/* These three are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend			processor.do_suspend
+#define cpu_do_resume			processor.do_resume
 #endif
 
 extern void cpu_resume(void);
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 0a17b62..1e5717a 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -99,9 +99,4 @@
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-/*
- * show local interrupt info
- */
-extern void show_local_irqs(struct seq_file *, int);
-
 #endif /* ifndef __ASM_ARM_SMP_H */
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h
index fed9981..ef9ffba9 100644
--- a/arch/arm/include/asm/smp_twd.h
+++ b/arch/arm/include/asm/smp_twd.h
@@ -22,7 +22,7 @@
 
 extern void __iomem *twd_base;
 
-int twd_timer_ack(void);
 void twd_timer_setup(struct clock_event_device *);
+void twd_timer_stop(struct clock_event_device *);
 
 #endif
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index b0e4e1a..1c0a551 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,22 +1,7 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
-#include <asm/memory.h>
-#include <asm/tlbflush.h>
-
 extern void cpu_resume(void);
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	extern int __cpu_suspend(int, long, unsigned long,
-				 int (*)(unsigned long));
-	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
-	flush_tlb_all();
-	return ret;
-}
+extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
 #endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 68036ee..16eed6a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,7 +29,7 @@
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o
+obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
@@ -43,6 +43,13 @@
 else
 obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
 endif
+obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
+test-kprobes-objs		:= kprobes-test.o
+ifdef CONFIG_THUMB2_KERNEL
+test-kprobes-objs		+= kprobes-test-thumb.o
+else
+test-kprobes-objs		+= kprobes-test-arm.o
+endif
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 0f852d0..204e216 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -22,7 +22,7 @@
 #if defined(CONFIG_DEBUG_ICEDCC)
 		@@ debug using ARM EmbeddedICE DCC channel
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		.endm
 
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
@@ -106,7 +106,7 @@
 
 #ifdef CONFIG_MMU
 		.macro	addruart_current, rx, tmp1, tmp2
-		addruart	\tmp1, \tmp2
+		addruart	\tmp1, \tmp2, \rx
 		mrc		p15, 0, \rx, c1, c0
 		tst		\rx, #1
 		moveq		\rx, \tmp1
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 239703d..566c54c 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -99,7 +99,7 @@
 	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
 	add	r8, r8, r4			@ PHYS_OFFSET
 #else
-	ldr	r8, =PLAT_PHYS_OFFSET
+	ldr	r8, =PHYS_OFFSET		@ always constant in this case
 #endif
 
 	/*
@@ -238,7 +238,7 @@
 	 * This allows debug messages to be output
 	 * via a serial console before paging_init.
 	 */
-	addruart r7, r3
+	addruart r7, r3, r0
 
 	mov	r3, r3, lsr #SECTION_SHIFT
 	mov	r3, r3, lsl #PMD_ORDER
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index a927ca1..814a52a9 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -45,7 +45,6 @@
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
-static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -137,10 +136,11 @@
 	u32 didr;
 
 	/* Do we implement the extended CPUID interface? */
-	if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf),
-	    "CPUID feature registers not supported. "
-	    "Assuming v6 debug is present.\n"))
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+			   "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
+	}
 
 	ARM_DBG_READ(c0, 0, didr);
 	return (didr >> 16) & 0xf;
@@ -154,10 +154,21 @@
 static int debug_arch_supported(void)
 {
 	u8 arch = get_debug_arch();
-	return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14;
+
+	/* We don't support the memory-mapped interface. */
+	return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) ||
+		arch >= ARM_DEBUG_ARCH_V7_1;
 }
 
-/* Determine number of BRP register available. */
+/* Determine number of WRP registers available. */
+static int get_num_wrp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
+}
+
+/* Determine number of BRP registers available. */
 static int get_num_brp_resources(void)
 {
 	u32 didr;
@@ -176,9 +187,10 @@
 static int get_num_wrps(void)
 {
 	/*
-	 * FIXME: When a watchpoint fires, the only way to work out which
-	 * watchpoint it was is by disassembling the faulting instruction
-	 * and working out the address of the memory access.
+	 * On debug architectures prior to 7.1, when a watchpoint fires, the
+	 * only way to work out which watchpoint it was is by disassembling
+	 * the faulting instruction and working out the address of the memory
+	 * access.
 	 *
 	 * Furthermore, we can only do this if the watchpoint was precise
 	 * since imprecise watchpoints prevent us from calculating register
@@ -192,36 +204,17 @@
 	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
 	 * that it is set on some implementations].
 	 */
+	if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1)
+		return 1;
 
-#if 0
-	int wrps;
-	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
-	wrps = ((didr >> 28) & 0xf) + 1;
-#endif
-	int wrps = 1;
-
-	if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
-		wrps = get_num_brp_resources() - 1;
-
-	return wrps;
-}
-
-/* We reserve one breakpoint for each watchpoint. */
-static int get_num_reserved_brps(void)
-{
-	if (core_has_mismatch_brps())
-		return get_num_wrps();
-	return 0;
+	return get_num_wrp_resources();
 }
 
 /* Determine number of usable BRPs available. */
 static int get_num_brps(void)
 {
 	int brps = get_num_brp_resources();
-	if (core_has_mismatch_brps())
-		brps -= get_num_reserved_brps();
-	return brps;
+	return core_has_mismatch_brps() ? brps - 1 : brps;
 }
 
 /*
@@ -239,7 +232,7 @@
 
 	/* Ensure that halting mode is disabled. */
 	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-			"halting debug mode enabled. Unable to access hardware resources.\n")) {
+		"halting debug mode enabled. Unable to access hardware resources.\n")) {
 		ret = -EPERM;
 		goto out;
 	}
@@ -255,6 +248,7 @@
 		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
+	case ARM_DEBUG_ARCH_V7_1:
 		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	default:
@@ -346,24 +340,10 @@
 		val_base = ARM_BASE_BVR;
 		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
 		max_slots = core_num_brps;
-		if (info->step_ctrl.enabled) {
-			/* Override the breakpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		}
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled) {
-			/* Install into the reserved breakpoint region. */
-			ctrl_base = ARM_BASE_BCR + core_num_brps;
-			val_base = ARM_BASE_BVR + core_num_brps;
-			/* Override the watchpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		} else {
-			ctrl_base = ARM_BASE_WCR;
-			val_base = ARM_BASE_WVR;
-		}
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -382,6 +362,17 @@
 		goto out;
 	}
 
+	/* Override the breakpoint data with the step data. */
+	if (info->step_ctrl.enabled) {
+		addr = info->trigger & ~0x3;
+		ctrl = encode_ctrl_reg(info->step_ctrl);
+		if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) {
+			i = 0;
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+		}
+	}
+
 	/* Setup the address register. */
 	write_wb_reg(val_base + i, addr);
 
@@ -405,10 +396,7 @@
 		max_slots = core_num_brps;
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled)
-			base = ARM_BASE_BCR + core_num_brps;
-		else
-			base = ARM_BASE_WCR;
+		base = ARM_BASE_WCR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -426,6 +414,13 @@
 	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
 		return;
 
+	/* Ensure that we disable the mismatch breakpoint. */
+	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
+	    info->step_ctrl.enabled) {
+		i = 0;
+		base = ARM_BASE_BCR + core_num_brps;
+	}
+
 	/* Reset the control register. */
 	write_wb_reg(base + i, 0);
 }
@@ -632,10 +627,9 @@
 	 * we can use the mismatch feature as a poor-man's hardware
 	 * single-step, but this only works for per-task breakpoints.
 	 */
-	if (WARN_ONCE(!bp->overflow_handler &&
-		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
-		 || !bp->hw.bp_target),
-			"overflow handler required but none found\n")) {
+	if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
+	    !core_has_mismatch_brps() || !bp->hw.bp_target)) {
+		pr_warning("overflow handler required but none found\n");
 		ret = -EINVAL;
 	}
 out:
@@ -666,34 +660,62 @@
 	arch_install_hw_breakpoint(bp);
 }
 
-static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
+static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+			       struct pt_regs *regs)
 {
-	int i;
+	int i, access;
+	u32 val, ctrl_reg, alignment_mask;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	/* Without a disassembler, we can only handle 1 watchpoint. */
-	BUG_ON(core_num_wrps > 1);
-
 	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
 
-		if (wp == NULL) {
-			rcu_read_unlock();
-			continue;
+		if (wp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(wp);
+		/*
+		 * The DFAR is an unknown value on debug architectures prior
+		 * to 7.1. Since we only allow a single watchpoint on these
+		 * older CPUs, we can set the trigger to the lowest possible
+		 * faulting address.
+		 */
+		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
+			BUG_ON(i > 0);
+			info->trigger = wp->attr.bp_addr;
+		} else {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+
+			/* Check if the watchpoint value matches. */
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			if (val != (addr & ~alignment_mask))
+				goto unlock;
+
+			/* Possible match, check the byte address select. */
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			if (!((1 << (addr & alignment_mask)) & ctrl.len))
+				goto unlock;
+
+			/* Check that the access type matches. */
+			access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+				 HW_BREAKPOINT_R;
+			if (!(access & hw_breakpoint_type(wp)))
+				goto unlock;
+
+			/* We have a winner. */
+			info->trigger = addr;
 		}
 
-		/*
-		 * The DFAR is an unknown value. Since we only allow a
-		 * single watchpoint, we can set the trigger to the lowest
-		 * possible faulting address.
-		 */
-		info = counter_arch_bp(wp);
-		info->trigger = wp->attr.bp_addr;
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
 
@@ -705,6 +727,7 @@
 		if (!wp->overflow_handler)
 			enable_single_step(wp, instruction_pointer(regs));
 
+unlock:
 		rcu_read_unlock();
 	}
 }
@@ -717,7 +740,7 @@
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	for (i = 0; i < core_num_reserved_brps; ++i) {
+	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
@@ -820,7 +843,7 @@
 	case ARM_ENTRY_ASYNC_WATCHPOINT:
 		WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n");
 	case ARM_ENTRY_SYNC_WATCHPOINT:
-		watchpoint_handler(addr, regs);
+		watchpoint_handler(addr, fsr, regs);
 		break;
 	default:
 		ret = 1; /* Unhandled fault. */
@@ -834,11 +857,31 @@
 /*
  * One-time initialisation.
  */
-static void reset_ctrl_regs(void *info)
+static cpumask_t debug_err_mask;
+
+static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
 {
-	int i, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
+
+	pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		   instr, cpu);
+
+	/* Set the error flag for this CPU and skip the faulting instruction. */
+	cpumask_set_cpu(cpu, &debug_err_mask);
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_reg_hook = {
+	.instr_mask	= 0x0fe80f10,
+	.instr_val	= 0x0e000e10,
+	.fn		= debug_reg_trap,
+};
+
+static void reset_ctrl_regs(void *unused)
+{
+	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
 	u32 dbg_power;
-	cpumask_t *cpumask = info;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -848,38 +891,57 @@
 	 * Access Register to avoid taking undefined instruction exceptions
 	 * later on.
 	 */
-	if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		/* ARMv6 cores just need to reset the registers. */
+		goto reset_regs;
+	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
 		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0) {
-			pr_warning("CPU %d debug is powered down!\n", cpu);
-			cpumask_or(cpumask, cpumask, cpumask_of(cpu));
-			return;
-		}
-
+		if ((dbg_power & 0x1) == 0)
+			err = -EPERM;
+		break;
+	case ARM_DEBUG_ARCH_V7_1:
 		/*
-		 * Unconditionally clear the lock by writing a value
-		 * other than 0xC5ACCE55 to the access register.
+		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
-		isb();
-
-		/*
-		 * Clear any configured vector-catch events before
-		 * enabling monitor mode.
-		 */
-		asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
-		isb();
+		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
+		if ((dbg_power & 0x1) == 1)
+			err = -EPERM;
+		break;
 	}
 
+	if (err) {
+		pr_warning("CPU %d debug is powered down!\n", cpu);
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+		return;
+	}
+
+	/*
+	 * Unconditionally clear the lock by writing a value
+	 * other than 0xC5ACCE55 to the access register.
+	 */
+	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	isb();
+
+	/*
+	 * Clear any configured vector-catch events before
+	 * enabling monitor mode.
+	 */
+	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+	isb();
+
+reset_regs:
 	if (enable_monitor_mode())
 		return;
 
 	/* We must also reset any reserved registers. */
-	for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
+	raw_num_brps = get_num_brp_resources();
+	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
 		write_wb_reg(ARM_BASE_BVR + i, 0UL);
 	}
@@ -895,6 +957,7 @@
 {
 	if (action == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+
 	return NOTIFY_OK;
 }
 
@@ -905,7 +968,6 @@
 static int __init arch_hw_breakpoint_init(void)
 {
 	u32 dscr;
-	cpumask_t cpumask = { CPU_BITS_NONE };
 
 	debug_arch = get_debug_arch();
 
@@ -916,28 +978,31 @@
 
 	/* Determine how many BRPs/WRPs are available. */
 	core_num_brps = get_num_brps();
-	core_num_reserved_brps = get_num_reserved_brps();
 	core_num_wrps = get_num_wrps();
 
-	pr_info("found %d breakpoint and %d watchpoint registers.\n",
-		core_num_brps + core_num_reserved_brps, core_num_wrps);
-
-	if (core_num_reserved_brps)
-		pr_info("%d breakpoint(s) reserved for watchpoint "
-				"single-step.\n", core_num_reserved_brps);
+	/*
+	 * We need to tread carefully here because DBGSWENABLE may be
+	 * driven low on this core and there isn't an architected way to
+	 * determine that.
+	 */
+	register_undef_hook(&debug_reg_hook);
 
 	/*
 	 * Reset the breakpoint resources. We assume that a halting
 	 * debugger will leave the world in a nice state for us.
 	 */
-	on_each_cpu(reset_ctrl_regs, &cpumask, 1);
-	if (!cpumask_empty(&cpumask)) {
+	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	unregister_undef_hook(&debug_reg_hook);
+	if (!cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
-		core_num_reserved_brps = 0;
 		core_num_wrps = 0;
 		return 0;
 	}
 
+	pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
+		"", core_num_wrps);
+
 	ARM_DBG_READ(c1, 0, dscr);
 	if (dscr & ARM_DSCR_HDBGEN) {
 		max_watchpoint_len = 4;
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 53919b2..7cb2926 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -59,9 +59,6 @@
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-#ifdef CONFIG_LOCAL_TIMERS
-	show_local_irqs(p, prec);
-#endif
 	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
 	return 0;
 }
diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/kernel/kprobes-arm.c
index 79203ee..9fe8910 100644
--- a/arch/arm/kernel/kprobes-arm.c
+++ b/arch/arm/kernel/kprobes-arm.c
@@ -60,6 +60,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -971,6 +972,9 @@
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_arm_table);
+#endif
 
 static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c
new file mode 100644
index 0000000..fc82de8
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-arm.c
@@ -0,0 +1,1323 @@
+/*
+ * arch/arm/kernel/kprobes-test-arm.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "32"
+
+#define TEST_ARM_TO_THUMB_INTERWORK_R(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_REG(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+#define TEST_ARM_TO_THUMB_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_MEM(15, 3f+1)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_arm_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)")
+
+#define _DATA_PROCESSING_DNM(op,s,val)						\
+	TEST_RR(  op "eq" s "	r0,  r",1, VAL1,", r",2, val, "")		\
+	TEST_RR(  op "ne" s "	r1,  r",1, VAL1,", r",2, val, ", lsl #3")	\
+	TEST_RR(  op "cs" s "	r2,  r",3, VAL1,", r",2, val, ", lsr #4")	\
+	TEST_RR(  op "cc" s "	r3,  r",3, VAL1,", r",2, val, ", asr #5")	\
+	TEST_RR(  op "mi" s "	r4,  r",5, VAL1,", r",2, N(val),", asr #6")	\
+	TEST_RR(  op "pl" s "	r5,  r",5, VAL1,", r",2, val, ", ror #7")	\
+	TEST_RR(  op "vs" s "	r6,  r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", pc, lsl #3")			\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vc" s "	r6,  pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vc" s "	r6,  sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "hi" s "	r8,  r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
+	TEST_RRR( op "ls" s "	r9,  r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
+	TEST_RRR( op "ge" s "	r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
+	TEST_RRR( op "lt" s "	r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
+	TEST_RR(  op "gt" s "	r12, r13"       ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op "le" s "	r14, r",0, val, ", r13"       ", lsl r",14,8,"")\
+	TEST_RR(  op s "	r12, pc"        ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op s "	r14, r",0, val, ", pc"        ", lsl r",14,8,"")\
+	TEST_R(   op "eq" s "	r0,  r",11,VAL1,", #0xf5")			\
+	TEST_R(   op "ne" s "	r11, r",0, VAL1,", #0xf5000000")		\
+	TEST_R(   op s "	r7,  r",8, VAL2,", #0x000af000")		\
+	TEST(     op s "	r4,  pc"        ", #0x00005a00")
+
+#define DATA_PROCESSING_DNM(op,val)		\
+	_DATA_PROCESSING_DNM(op,"",val)		\
+	_DATA_PROCESSING_DNM(op,"s",val)
+
+#define DATA_PROCESSING_NM(op,val)						\
+	TEST_RR(  op "ne	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(  op "eq	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(  op "cc	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(  op "cs	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(  op "pl	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(  op "mi	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(  op "vc	r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R (  op "vs	r",7, VAL1,", pc, lsl #3")			\
+	TEST_R (  op "vs	r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vs	pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vs	sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "ls	r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")	\
+	TEST_RRR( op "hi	r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")	\
+	TEST_RRR( op "lt	r",11,VAL1,", r",14,val, ", asr r",7, 5,"")	\
+	TEST_RRR( op "ge	r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "le	r13"       ", r",14,val, ", ror r",14,7,"")	\
+	TEST_RR(  op "gt	r",0, val, ", r13"       ", lsl r",14,8,"")	\
+	TEST_RR(  op "	pc"        ", r",14,val, ", ror r",14,7,"")		\
+	TEST_RR(  op "	r",0, val, ", pc"        ", lsl r",14,8,"")		\
+	TEST_R(   op "eq	r",11,VAL1,", #0xf5")				\
+	TEST_R(   op "ne	r",0, VAL1,", #0xf5000000")			\
+	TEST_R(   op "	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING_DM(op,s,val)					\
+	TEST_R(   op "eq" s "	r0,  r",1, val, "")			\
+	TEST_R(   op "ne" s "	r1,  r",1, val, ", lsl #3")		\
+	TEST_R(   op "cs" s "	r2,  r",3, val, ", lsr #4")		\
+	TEST_R(   op "cc" s "	r3,  r",3, val, ", asr #5")		\
+	TEST_R(   op "mi" s "	r4,  r",5, N(val),", asr #6")		\
+	TEST_R(   op "pl" s "	r5,  r",5, val, ", ror #7")		\
+	TEST_R(   op "vs" s "	r6,  r",10,val, ", rrx")		\
+	TEST(     op "vs" s "	r7,  pc, lsl #3")			\
+	TEST(     op "vs" s "	r7,  sp, lsr #4")			\
+	TEST_RR(  op "vc" s "	r8,  r",7, val, ", lsl r",0, 3,"")	\
+	TEST_RR(  op "hi" s "	r9,  r",9, val, ", lsr r",7, 4,"")	\
+	TEST_RR(  op "ls" s "	r10, r",9, val, ", asr r",7, 5,"")	\
+	TEST_RR(  op "ge" s "	r11, r",11,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "lt" s "	r12, r",11,val, ", ror r",14,7,"")	\
+	TEST_R(   op "gt" s "	r14, r13"       ", lsl r",14,8,"")	\
+	TEST_R(   op "le" s "	r14, pc"        ", lsl r",14,8,"")	\
+	TEST(     op "eq" s "	r0,  #0xf5")				\
+	TEST(     op "ne" s "	r11, #0xf5000000")			\
+	TEST(     op s "	r7,  #0x000af000")			\
+	TEST(     op s "	r4,  #0x00005a00")
+
+#define DATA_PROCESSING_DM(op,val)		\
+	_DATA_PROCESSING_DM(op,"",val)		\
+	_DATA_PROCESSING_DM(op,"s",val)
+
+	DATA_PROCESSING_DNM("and",0xf00f00ff)
+	DATA_PROCESSING_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING_DNM("sub",VAL2)
+	DATA_PROCESSING_DNM("rsb",VAL2)
+	DATA_PROCESSING_DNM("add",VAL2)
+	DATA_PROCESSING_DNM("adc",VAL2)
+	DATA_PROCESSING_DNM("sbc",VAL2)
+	DATA_PROCESSING_DNM("rsc",VAL2)
+	DATA_PROCESSING_NM("tst",0xf00f00ff)
+	DATA_PROCESSING_NM("teq",0xf00f00ff)
+	DATA_PROCESSING_NM("cmp",VAL2)
+	DATA_PROCESSING_NM("cmn",VAL2)
+	DATA_PROCESSING_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING_DM("mov",VAL2)
+	DATA_PROCESSING_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING_DM("mvn",VAL2)
+
+	TEST("mov	ip, sp") /* This has special case emulation code */
+
+	TEST_SUPPORTED("mov	pc, #0x1000");
+	TEST_SUPPORTED("mov	sp, #0x1000");
+	TEST_SUPPORTED("cmp	pc, #0x1000");
+	TEST_SUPPORTED("cmp	sp, #0x1000");
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED(".word 0xe15c0f1e	@ cmp	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe1a0cf1e	@ mov	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe08caf1e	@ add	r10, r12, r14, asl pc")
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED("movs	pc, r1")
+	TEST_UNSUPPORTED("movs	pc, r1, lsl r2")
+	TEST_UNSUPPORTED("movs	pc, #0x10000")
+	TEST_UNSUPPORTED("adds	pc, lr, r1")
+	TEST_UNSUPPORTED("adds	pc, lr, r1, lsl r2")
+	TEST_UNSUPPORTED("adds	pc, lr, #4")
+
+	/* Data-processing with SP as target */
+	TEST("add	sp, sp, #16")
+	TEST("sub	sp, sp, #8")
+	TEST("bic	sp, sp, #0x20")
+	TEST("orr	sp, sp, #0x20")
+	TEST_PR( "add	sp, r",10,0,", r",11,4,"")
+	TEST_PRR("add	sp, r",10,0,", r",11,4,", asl r",12,1,"")
+	TEST_P(  "mov	sp, r",10,0,"")
+	TEST_PR( "mov	sp, r",10,0,", asl r",12,0,"")
+
+	/* Data-processing with PC as target */
+	TEST_BF(   "add	pc, pc, #2f-1b-8")
+	TEST_BF_R ("add	pc, pc, r",14,2f-1f-8,"")
+	TEST_BF_R ("add	pc, r",14,2f-1f-8,", pc")
+	TEST_BF_R ("mov	pc, r",0,2f,"")
+	TEST_BF_RR("mov	pc, r",0,2f,", asl r",1,0,"")
+	TEST_BB(   "sub	pc, pc, #1b-2b+8")
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_BB(   "sub	pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */
+#endif
+	TEST_BB_R( "sub	pc, pc, r",14, 1f-2f+8,"")
+	TEST_BB_R( "rsb	pc, r",14,1f-2f+8,", pc")
+	TEST_RR(   "add	pc, pc, r",10,-2,", asl r",11,1,"")
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_R("add	pc, pc, r",0,3f-1f-8+1,"")
+	TEST_ARM_TO_THUMB_INTERWORK_R("sub	pc, r",0,3f+8+1,", #8")
+#endif
+	TEST_GROUP("Miscellaneous instructions")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrspl	r7, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".word 0xe10ff000	@ mrs r15, cpsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, lr")
+	TEST_UNSUPPORTED("msr	spsr, r0")
+
+	TEST_BF_R("bx	r",0,2f,"")
+	TEST_BB_R("bx	r",7,2f,"")
+	TEST_BF_R("bxeq	r",14,2f,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clzeq	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+	TEST(  "clz	r4, sp")
+	TEST_UNSUPPORTED(".word 0x016fff10	@ clz pc, r0")
+	TEST_UNSUPPORTED(".word 0x016f0f1f	@ clz r0, pc")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("bxj	r0")
+#endif
+
+	TEST_BF_R("blx	r",0,2f,"")
+	TEST_BB_R("blx	r",7,2f,"")
+	TEST_BF_R("blxeq	r",14,2f,"")
+	TEST_UNSUPPORTED(".word 0x0120003f	@ blx pc")
+
+	TEST_RR(   "qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qsub	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdsub	lr, r",9, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe101f050	@ qadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe121f050	@ qsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe141f050	@ qdadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe161f050	@ qdsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe16f2050	@ qdsub r2, r0, pc")
+	TEST_UNSUPPORTED(".word 0xe161205f	@ qdsub r2, pc, r1")
+
+	TEST_UNSUPPORTED("bkpt	0xffff")
+	TEST_UNSUPPORTED("bkpt	0x0000")
+
+	TEST_UNSUPPORTED(".word 0xe1600070 @ smc #0")
+
+	TEST_GROUP("Halfword multiply and multiply-accumulate")
+
+	TEST_RRR(    "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f3281 @ smlabb pc, r1, r2, r3")
+	TEST_RRR(    "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlatbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32a1 @ smlatb pc, r1, r2, r3")
+	TEST_RRR(    "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32c1 @ smlabt pc, r1, r2, r3")
+	TEST_RRR(    "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlattge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32e1 @ smlatt pc, r1, r2, r3")
+
+	TEST_RRR(    "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f3281 @ smlawb pc, r1, r2, r3")
+	TEST_RRR(    "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f32c1 @ smlawt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe12032cf @ smlawt r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe1203fc1 @ smlawt r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe120f2c1 @ smlawt r0, r1, r2, pc")
+
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02a1 @ smulwb pc, r1, r2")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02e1 @ smulwt pc, r1, r2")
+
+	TEST_RRRR(  "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f1382 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f382 @ smlalbb r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlaltble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13a2 @ smlaltb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3a2 @ smlaltb r1, pc, r2, r3")
+	TEST_RRRR(  "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbtle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13c2 @ smlalbt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3c2 @ smlalbt r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalttle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13e2 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe140f3e2 @ smlalbb r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe14013ef @ smlalbb r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe1401fe2 @ smlalbb r0, r1, r2, pc")
+
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f0281 @ smulbb pc, r1, r2")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02a1 @ smultb pc, r1, r2")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02c1 @ smultb pc, r1, r2")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulttge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02e1 @ smultt pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe16002ef @ smultt r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe1600fe1 @ smultt r0, r1, pc")
+
+	TEST_GROUP("Multiply and multiply-accumulate")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mulls	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "mul	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe00f0291 @ mul pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe000029f @ mul r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe0000f91 @ mul r0, r1, pc")
+	TEST_RR(    "muls	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mullss	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "muls	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe01f0291 @ muls pc, r1, r2")
+
+	TEST_RRR(    "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mla	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe02f3291 @ mla pc, r1, r2, r3")
+	TEST_RRR(    "mlas	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahis	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mlas	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe03f3291 @ mlas pc, r1, r2, r3")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_RR(  "umaal	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umaalls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umaal	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe041f392 @ umaal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe04f0392 @ umaal r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0500090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe05fff9f @ undef")
+
+	TEST_RRR(  "mls		r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(  "mlshi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(   "mls		lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe06f3291 @ mls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe060329f @ mls r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0603f91 @ mls r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe060f291 @ mls r0, r1, r2, pc")
+#endif
+
+	TEST_UNSUPPORTED(".word 0xe0700090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe07fff9f @ undef")
+
+	TEST_RR(  "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe081f392 @ umull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe08f1392 @ umull r1, pc, r2, r3")
+	TEST_RR(  "umulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe091f392 @ umulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe09f1392 @ umulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0af1392 @ umlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0a1f392 @ umlal r1, pc, r2, r3")
+	TEST_RRRR(  "umlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0bf1392 @ umlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0b1f392 @ umlals r1, pc, r2, r3")
+
+	TEST_RR(  "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0c1f392 @ smull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0cf1392 @ smull r1, pc, r2, r3")
+	TEST_RR(  "smulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0d1f392 @ smulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0df1392 @ smulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ef1392 @ smlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0e1f392 @ smlal r1, pc, r2, r3")
+	TEST_RRRR(  "smlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ff1392 @ smlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0f392 @ smlals r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0139f @ smlals r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe0f01f92 @ smlals r0, r1, r2, pc")
+
+	TEST_GROUP("Synchronization primitives")
+
+	/*
+	 * Use hard coded constants for SWP instructions to avoid warnings
+	 * about deprecated instructions.
+	 */
+	TEST_RP( ".word 0xe108e097 @ swp	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x610d0091 @ swpvs	r0, r",1,VAL1,", [sp]")
+	TEST_RP( ".word 0xe10cd09e @ swp	sp, r",14,VAL2,", [r",12,13*4,"]")
+	TEST_UNSUPPORTED(".word 0xe102f091 @ swp pc, r1, [r2]")
+	TEST_UNSUPPORTED(".word 0xe102009f @ swp r0, pc, [r2]")
+	TEST_UNSUPPORTED(".word 0xe10f0091 @ swp r0, r1, [pc]")
+	TEST_RP( ".word 0xe148e097 @ swpb	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x614d0091 @ swpvsb	r0, r",1,VAL1,", [sp]")
+	TEST_UNSUPPORTED(".word 0xe142f091 @ swpb pc, r1, [r2]")
+
+	TEST_UNSUPPORTED(".word	0xe1100090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1200090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1300090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1500090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1600090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1700090") /* Unallocated space */
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("ldrex	r2, [sp]")
+	TEST_UNSUPPORTED("strexd	r0, r2, r3, [sp]")
+	TEST_UNSUPPORTED("ldrexd	r2, r3, [sp]")
+	TEST_UNSUPPORTED("strexb	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexb	r2, [sp]")
+	TEST_UNSUPPORTED("strexh	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexh	r2, [sp]")
+#endif
+	TEST_GROUP("Extra load/store instructions")
+
+	TEST_RPR(  "strh	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
+	TEST_RPR(  "streqh	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")
+	TEST_RPR(  "strh	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")
+	TEST_RPR(  "strneh	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strh	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
+	TEST_RPR(  "strh	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0ba	@ strh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0bb	@ strh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089a0bf	@ strh r10, [r9], pc")
+
+	TEST_PR(   "ldrh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrcsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrcch	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0ba	@ ldrh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0bb	@ ldrh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe099a0bf	@ ldrh r10, [r9], pc")
+
+	TEST_RP(   "strh	r",0, VAL1,", [r",1, 24,", #-2]")
+	TEST_RP(   "strmih	r",14,VAL2,", [r",13,0, ", #2]")
+	TEST_RP(   "strh	r",1, VAL1,", [r",2, 24,", #4]!")
+	TEST_RP(   "strplh	r",12,VAL2,", [r",11,24,", #-4]!")
+	TEST_RP(   "strh	r",2, VAL1,", [r",3, 24,"], #48")
+	TEST_RP(   "strh	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3b0	@ strh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3b0	@ strh pc, [r9], #48")
+
+	TEST_P(	   "ldrh	r0, [r",0,  24,", #-2]")
+	TEST_P(	   "ldrvsh	r14, [r",13,0, ", #2]")
+	TEST_P(	   "ldrh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrvch	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3b0	@ ldrh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3b0	@ ldrh pc, [r9], #48")
+
+	TEST_PR(   "ldrsb	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrhisb	r14, [r",13,0,", r",12,  48,"]")
+	TEST_PR(   "ldrsb	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlssb	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsb	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsb	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0da	@ ldrsb r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0db	@ ldrsb pc, [r9], r11")
+
+	TEST_P(	   "ldrsb	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldrgesb	r14, [r",13,0, ", #1]")
+	TEST_P(	   "ldrsb	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrltsb	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsb	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsb	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsb	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3d0	@ ldrsb r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3d0	@ ldrsb pc, [r9], #48")
+
+	TEST_PR(   "ldrsh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrgtsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrsh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlesh	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0fa	@ ldrsh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0fb	@ ldrsh pc, [r9], r11")
+
+	TEST_P(	   "ldrsh	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldreqsh	r14, [r",13,0 ,", #1]")
+	TEST_P(	   "ldrsh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrnesh	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3f0	@ ldrsh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3f0	@ ldrsh pc, [r9], #48")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("strht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], r3")
+	TEST_UNSUPPORTED("strht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], #48")
+#endif
+
+	TEST_RPR(  "strd	r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
+	TEST_RPR(  "strccd	r",8, VAL2,", [r",13,0, ", r",12,48,"]")
+	TEST_RPR(  "strd	r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
+	TEST_RPR(  "strcsd	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strd	r",2, VAL1,", [r",3, 24,"], r",4,48,"")
+	TEST_RPR(  "strd	r",10,VAL2,", [r",9, 48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0fa	@ strd r12, [pc, r10]!")
+
+	TEST_PR(   "ldrd	r0, [r",0, 48,", -r",2,24,"]")
+	TEST_PR(   "ldrmid	r8, [r",13,0, ", r",12,48,"]")
+	TEST_PR(   "ldrd	r4, [r",2, 24,", r",3, 48,"]!")
+	TEST_PR(   "ldrpld	r6, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrd	r2, [r",5, 24,"], r",4,48,"")
+	TEST_PR(   "ldrd	r10, [r",9,48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0da	@ ldrd r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0db	@ ldrd pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089e0db	@ ldrd lr, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089c0df	@ ldrd r12, [r9], pc")
+
+	TEST_RP(   "strd	r",0, VAL1,", [r",1, 24,", #-8]")
+	TEST_RP(   "strvsd	r",8, VAL2,", [r",13,0, ", #8]")
+	TEST_RP(   "strd	r",4, VAL1,", [r",2, 24,", #16]!")
+	TEST_RP(   "strvcd	r",12,VAL2,", [r",11,24,", #-16]!")
+	TEST_RP(   "strd	r",2, VAL1,", [r",4, 24,"], #48")
+	TEST_RP(   "strd	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3f0	@ strd r12, [pc, #48]!")
+
+	TEST_P(	   "ldrd	r0, [r",0, 24,", #-8]")
+	TEST_P(	   "ldrhid	r8, [r",13,0, ", #8]")
+	TEST_P(	   "ldrd	r4, [r",2, 24,", #16]!")
+	TEST_P(	   "ldrlsd	r6, [r",11,24,", #-16]!")
+	TEST_P(	   "ldrd	r2, [r",5, 24,"], #48")
+	TEST_P(	   "ldrd	r10, [r",9,6,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3d0	@ ldrd r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3d0	@ ldrd pc, [r9], #48")
+	TEST_UNSUPPORTED(".word 0xe0c9e3d0	@ ldrd lr, [r9], #48")
+
+	TEST_GROUP("Miscellaneous")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".word 0xe300f000	@ movw pc, #0")
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".word 0xe340f000	@ movt pc, #0")
+#endif
+
+	TEST_UNSUPPORTED("msr	cpsr, 0x13")
+	TEST_UNSUPPORTED("msr	cpsr_f, 0xf0000000")
+	TEST_UNSUPPORTED("msr	spsr, 0x13")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED("dbg #0")
+#endif
+
+	TEST_GROUP("Load/store word and unsigned byte")
+
+#define LOAD_STORE(byte)							\
+	TEST_RP( "str"byte"	r",0, VAL1,", [r",1, 24,", #-2]")		\
+	TEST_RP( "str"byte"	r",14,VAL2,", [r",13,0, ", #2]")		\
+	TEST_RP( "str"byte"	r",1, VAL1,", [r",2, 24,", #4]!")		\
+	TEST_RP( "str"byte"	r",12,VAL2,", [r",11,24,", #-4]!")		\
+	TEST_RP( "str"byte"	r",2, VAL1,", [r",3, 24,"], #48")		\
+	TEST_RP( "str"byte"	r",10,VAL2,", [r",9, 64,"], #-48")		\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")	\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")	\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")	\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")	\
+	TEST_RPR("str"byte"	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")	\
+	TEST_RPR("str"byte"	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")	\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 24,", r",2,  32,", asl #1]")\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  32,", asr #3]!")\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\
+	TEST_P(  "ldr"byte"	r0, [r",0,  24,", #-2]")			\
+	TEST_P(  "ldr"byte"	r14, [r",13,0, ", #2]")				\
+	TEST_P(  "ldr"byte"	r1, [r",2,  24,", #4]!")			\
+	TEST_P(  "ldr"byte"	r12, [r",11,24,", #-4]!")			\
+	TEST_P(  "ldr"byte"	r2, [r",3,  24,"], #48")			\
+	TEST_P(  "ldr"byte"	r10, [r",9, 64,"], #-48")			\
+	TEST_PR( "ldr"byte"	r0, [r",0,  48,", -r",2, 24,"]")		\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 48,"]")		\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3, 48,"]!")		\
+	TEST_PR( "ldr"byte"	r12, [r",11,48,", -r",10,24,"]!")		\
+	TEST_PR( "ldr"byte"	r2, [r",3,  24,"], r",4, 48,"")			\
+	TEST_PR( "ldr"byte"	r10, [r",9, 48,"], -r",11,24,"")		\
+	TEST_PR( "ldr"byte"	r0, [r",0,  24,", r",2,  32,", asl #1]")	\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 32,", lsr #2]")	\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3,  32,", asr #3]!")	\
+	TEST_PR( "ldr"byte"	r12, [r",11,24,", r",10, 4,", ror #31]!")	\
+	TEST(    "ldr"byte"	r0, [pc, #0]")					\
+	TEST_R(  "ldr"byte"	r12, [pc, r",14,0,"]")
+
+	LOAD_STORE("")
+	TEST_P(   "str	pc, [r",0,0,", #15*4]")
+	TEST_R(   "str	pc, [sp, r",2,15*4,"]")
+	TEST_BF(  "ldr	pc, [sp, #15*4]")
+	TEST_BF_R("ldr	pc, [sp, r",2,15*4,"]")
+
+	TEST_P(   "str	sp, [r",0,0,", #13*4]")
+	TEST_R(   "str	sp, [sp, r",2,13*4,"]")
+	TEST_BF(  "ldr	sp, [sp, #13*4]")
+	TEST_BF_R("ldr	sp, [sp, r",2,13*4,"]")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+#endif
+	TEST_UNSUPPORTED(".word 0xe5af6008	@ str r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7af6008	@ str r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5bf6008	@ ldr r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7bf6008	@ ldr r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe788600f	@ str r6, [r8, pc]")
+	TEST_UNSUPPORTED(".word 0xe798600f	@ ldr r6, [r8, pc]")
+
+	LOAD_STORE("b")
+	TEST_UNSUPPORTED(".word 0xe5f7f008	@ ldrb pc, [r7, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7f7f008	@ ldrb pc, [r7, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5ef6008	@ strb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ef6008	@ strb r6, [pc, r3]!")
+	TEST_UNSUPPORTED(".word 0xe5ff6008	@ ldrb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ff6008	@ ldrb r6, [pc, r3]!")
+
+	TEST_UNSUPPORTED("ldrt	r0, [r1], #4")
+	TEST_UNSUPPORTED("ldrt	r1, [r2], r3")
+	TEST_UNSUPPORTED("strt	r2, [r3], #4")
+	TEST_UNSUPPORTED("strt	r3, [r4], r5")
+	TEST_UNSUPPORTED("ldrbt	r4, [r5], #4")
+	TEST_UNSUPPORTED("ldrbt	r5, [r6], r7")
+	TEST_UNSUPPORTED("strbt	r6, [r7], #4")
+	TEST_UNSUPPORTED("strbt	r7, [r8], r9")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	TEST_UNSUPPORTED(".word 0xe6000010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe60fffff") /* Unallocated space */
+
+	TEST_RR(    "sadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff1a	@ sadd16	pc, r12, r10")
+	TEST_RR(    "sasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff3a	@ sasx	pc, r12, r10")
+	TEST_RR(    "ssax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff5a	@ ssax	pc, r12, r10")
+	TEST_RR(    "ssub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff7a	@ ssub16	pc, r12, r10")
+	TEST_RR(    "sadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff9a	@ sadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe61000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffdf") /* Unallocated space */
+	TEST_RR(    "ssub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cfffa	@ ssub8	pc, r12, r10")
+
+	TEST_RR(    "qadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff1a	@ qadd16	pc, r12, r10")
+	TEST_RR(    "qasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff3a	@ qasx	pc, r12, r10")
+	TEST_RR(    "qsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff5a	@ qsax	pc, r12, r10")
+	TEST_RR(    "qsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff7a	@ qsub16	pc, r12, r10")
+	TEST_RR(    "qadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff9a	@ qadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe62000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffdf") /* Unallocated space */
+	TEST_RR(    "qsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cfffa	@ qsub8	pc, r12, r10")
+
+	TEST_RR(    "shadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff1a	@ shadd16	pc, r12, r10")
+	TEST_RR(    "shasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff3a	@ shasx	pc, r12, r10")
+	TEST_RR(    "shsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff5a	@ shsax	pc, r12, r10")
+	TEST_RR(    "shsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff7a	@ shsub16	pc, r12, r10")
+	TEST_RR(    "shadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff9a	@ shadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe63000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffdf") /* Unallocated space */
+	TEST_RR(    "shsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cfffa	@ shsub8	pc, r12, r10")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	TEST_UNSUPPORTED(".word 0xe6400010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe64fffff") /* Unallocated space */
+
+	TEST_RR(    "uadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff1a	@ uadd16	pc, r12, r10")
+	TEST_RR(    "uasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff3a	@ uasx	pc, r12, r10")
+	TEST_RR(    "usax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff5a	@ usax	pc, r12, r10")
+	TEST_RR(    "usub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff7a	@ usub16	pc, r12, r10")
+	TEST_RR(    "uadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff9a	@ uadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe65000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffdf") /* Unallocated space */
+	TEST_RR(    "usub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cfffa	@ usub8	pc, r12, r10")
+
+	TEST_RR(    "uqadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff1a	@ uqadd16	pc, r12, r10")
+	TEST_RR(    "uqasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff3a	@ uqasx	pc, r12, r10")
+	TEST_RR(    "uqsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff5a	@ uqsax	pc, r12, r10")
+	TEST_RR(    "uqsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff7a	@ uqsub16	pc, r12, r10")
+	TEST_RR(    "uqadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff9a	@ uqadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe66000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffdf") /* Unallocated space */
+	TEST_RR(    "uqsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cfffa	@ uqsub8	pc, r12, r10")
+
+	TEST_RR(    "uhadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff1a	@ uhadd16	pc, r12, r10")
+	TEST_RR(    "uhasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff3a	@ uhasx	pc, r12, r10")
+	TEST_RR(    "uhsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff5a	@ uhsax	pc, r12, r10")
+	TEST_RR(    "uhsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff7a	@ uhsub16	pc, r12, r10")
+	TEST_RR(    "uhadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff9a	@ uhadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffdf") /* Unallocated space */
+	TEST_RR(    "uhsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cfffa	@ uhsub8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67feffa	@ uhsub8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe67cefff	@ uhsub8	r14, r12, pc")
+#endif /* __LINUX_ARM_ARCH__ >= 7 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Packing, unpacking, saturation, and reversal")
+
+	TEST_RR(    "pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_UNSUPPORTED(".word 0xe68cf11a	@ pkhbt	pc, r12, r10, lsl #2")
+	TEST_RR(    "pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+	TEST_UNSUPPORTED(".word 0xe68cf15a	@ pkhtb	pc, r12, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68fe15a	@ pkhtb	r14, pc, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68ce15f	@ pkhtb	r14, r12, pc, asr #2")
+	TEST_UNSUPPORTED(".word 0xe6900010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fffdf") /* Unallocated space */
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6b7f01c	@ ssat	pc, #24, r12")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6f7f01c	@ usat	pc, #24, r12")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe68cf47a	@ sxtab16	pc,r12, r10, ror #8")
+
+	TEST_RR(    "sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "sel	r14, r",12,VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe68cffba	@ sel	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe68fefba	@ sel	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe68cefbf	@ sel	r14, r12, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6abff3c	@ ssat16	pc, #12, r12")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6acf47a	@ sxtab	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev	r0, r",0,   VAL1,"")
+	TEST_R(     "rev	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfff3c	@ rev	pc, r12")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6bcf47a	@ sxtah	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev16	r0, r",0,   VAL1,"")
+	TEST_R(     "rev16	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfffbc	@ rev16	pc, r12")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ccf47a	@ uxtab16	pc,r12, r10, ror #8")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6ecff3c	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".word 0xe6ecef3f	@ usat16	r14, #12, pc")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ecf47a	@ uxtab	pc,r12, r10, ror #8")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_R(     "rbit	r0, r",0,   VAL1,"")
+	TEST_R(     "rbit	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ rbit	pc, r12")
+#endif
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6fff077	@ uxth	pc, r7")
+	TEST_UNSUPPORTED(".word 0xe6ff807f	@ uxth	r8, pc")
+	TEST_UNSUPPORTED(".word 0xe6fcf47a	@ uxtah	pc, r12, r10, ror #8")
+	TEST_UNSUPPORTED(".word 0xe6fce47f	@ uxtah	r14, r12, pc, ror #8")
+
+	TEST_R(     "revsh	r0, r",0,   VAL1,"")
+	TEST_R(     "revsh	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ revsh	pc, r12")
+	TEST_UNSUPPORTED(".word 0xe6ffef3f	@ revsh	r14, pc")
+
+	TEST_UNSUPPORTED(".word 0xe6900070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fff7f") /* Unallocated space */
+
+	TEST_UNSUPPORTED(".word 0xe6d00070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe6dfff7f") /* Unallocated space */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Signed multiplies")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a1c	@ smlad	pc, r12, r10, r8")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a3c	@ smladx	pc, r12, r10, r8")
+
+	TEST_RR(   "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa1c	@ smuad	pc, r12, r10")
+	TEST_RR(   "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa3c	@ smuadx	pc, r12, r10")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a5c	@ smlsd	pc, r12, r10, r8")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a7c	@ smlsdx	pc, r12, r10, r8")
+
+	TEST_RR(   "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa5c	@ smusd	pc, r12, r10")
+	TEST_RR(   "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa7c	@ smusdx	pc, r12, r10")
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af819	@ smlald	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb819	@ smlald	r11, pc, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74ab81f	@ smlald	r11, r10, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe74abf19	@ smlald	r11, r10, r9, pc")
+
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af839	@ smlaldx	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb839	@ smlaldx	r11, pc, r9, r8")
+
+	TEST_RRR(  "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a1c	@ smmla	pc, r12, r10, r8")
+	TEST_RRR(  "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a3c	@ smmlar	pc, r12, r10, r8")
+
+	TEST_RR(   "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ smmul	pc, r12, r10")
+	TEST_RR(   "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa3c	@ smmulr	pc, r12, r10")
+
+	TEST_RRR(  "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8adc	@ smmls	pc, r12, r10, r8")
+	TEST_RRR(  "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8afc	@ smmlsr	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8aff	@ smmlsr	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8ffc	@ smmlsr	r14, r12, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe75efafc	@ smmlsr	r14, r12, r10, pc")
+
+	TEST_RR(   "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ usad8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe75efa1f	@ usad8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe75eff1c	@ usad8	r14, r12, pc")
+
+	TEST_RRR(  "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(  "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_UNSUPPORTED(".word 0xe78f8a1c	@ usada8	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8a1f	@ usada8	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8f1c	@ usada8	r14, r12, pc, r8")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Bit Field")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfxeq	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7aff45c	@ sbfx	pc, r12, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfxcs	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7eff45c	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".word 0xe7efc45f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfcvs	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".word 0xe7def01f	@ bfc	pc, #0, #31");
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfipl	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".word 0xe7d7f21e	@ bfi	pc, r14, #4, #20")
+
+	TEST_UNSUPPORTED(".word 0x07f000f0")  /* Permanently UNDEFINED */
+	TEST_UNSUPPORTED(".word 0x07ffffff")  /* Permanently UNDEFINED */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	TEST_GROUP("Branch, branch with link, and block data transfer")
+
+	TEST_P(   "stmda	r",0, 16*4,", {r0}")
+	TEST_P(   "stmeqda	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmneda	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmda	r",13,0,   "!, {pc}")
+
+	TEST_P(   "ldmda	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmcsda	r",4, 15*4,", {r0-r15}")
+	TEST_BF_P("ldmccda	r",7, 15*4,"!, {r8-r15}")
+	TEST_P(   "ldmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmda	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0}")
+	TEST_P(   "stmmiia	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmplia	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmia	r",14,0,   "!, {pc}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmvsia	r",4, 0,   ", {r0-r15}")
+	TEST_BF_P("ldmvcia	r",7, 8*4, "!, {r8-r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0}")
+	TEST_P(   "stmhidb	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmlsdb	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmdb	r",13,4,   "!, {pc}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmgedb	r",4, 16*4,", {r0-r15}")
+	TEST_BF_P("ldmltdb	r",7, 16*4,"!, {r8-r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {pc}")
+
+	TEST_P(   "stmib	r",0, 16*4,", {r0}")
+	TEST_P(   "stmgtib	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmleib	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmib	r",13,-4,  "!, {pc}")
+
+	TEST_P(   "ldmib	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmeqib	r",4, -4,", {r0-r15}")
+	TEST_BF_P("ldmneib	r",7, 7*4,"!, {r8-r15}")
+	TEST_P(   "ldmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmib	r",14,14*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmeqdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmnedb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmccia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmcsia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmplia	r",0,15*4,", {pc}")
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmmiia	r",13,0,", {r0-r15}")
+#endif
+	TEST_BF("b	2f")
+	TEST_BF("bl	2f")
+	TEST_BB("b	2b")
+	TEST_BB("bl	2b")
+
+	TEST_BF("beq	2f")
+	TEST_BF("bleq	2f")
+	TEST_BB("bne	2b")
+	TEST_BB("blne	2b")
+
+	TEST_BF("bgt	2f")
+	TEST_BF("blgt	2f")
+	TEST_BB("blt	2b")
+	TEST_BB("bllt	2b")
+
+	TEST_GROUP("Supervisor Call, and coprocessor instructions")
+
+	/*
+	 * We can't really test these by executing them, so all
+	 * we can do is check that probes are, or are not allowed.
+	 * At the moment none are allowed...
+	 */
+#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code)
+
+#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc)					\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], {1}")			\
+										\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"daf0001	@ stc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d2f0001	@ stc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"caf0001	@ stc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c2f0001	@ stc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"def0001	@ stc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d6f0001	@ stc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cef0001	@ stc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c6f0001	@ stc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dbf0001	@ ldc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d3f0001	@ ldc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cbf0001	@ ldc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c3f0001	@ ldc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dff0001	@ ldc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d7f0001	@ ldc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cff0001	@ ldc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c7f0001	@ ldc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15], {1}")
+
+#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc)					\
+										\
+	TEST_COPROCESSOR( "mcrr"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mcrr"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c4f00f0	@ mcrr"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c40ff0f	@ mcrr"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "mrrc"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mrrc"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c5f00f0	@ mrrc"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c50ff0f	@ mrrc"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "cdp"two"	15, 15, cr15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "cdp"two"	0, 0, cr0, cr0, cr0, 0")		\
+	TEST_COPROCESSOR( "mcr"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mcr"two"	0, 0, r0, cr0, cr0, 0")			\
+	TEST_COPROCESSOR( "mrc"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mrc"two"	0, 0, r0, cr0, cr0, 0")
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("","e")
+	COPROCESSOR_INSTRUCTIONS_MC_MR("","e")
+	TEST_UNSUPPORTED("svc	0")
+	TEST_UNSUPPORTED("svc	0xffffff")
+
+	TEST_UNSUPPORTED("svc	0")
+
+	TEST_GROUP("Unconditional instruction")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("srsda	sp, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp, 0x13")
+	TEST_UNSUPPORTED("srsia	sp, 0x13")
+	TEST_UNSUPPORTED("srsib	sp, 0x13")
+	TEST_UNSUPPORTED("srsda	sp!, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp!, 0x13")
+	TEST_UNSUPPORTED("srsia	sp!, 0x13")
+	TEST_UNSUPPORTED("srsib	sp!, 0x13")
+
+	TEST_UNSUPPORTED("rfeda	sp")
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfeib	sp")
+	TEST_UNSUPPORTED("rfeda	sp!")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+	TEST_UNSUPPORTED("rfeib	sp!")
+	TEST_UNSUPPORTED(".word 0xf81d0a00	@ rfeda	pc")
+	TEST_UNSUPPORTED(".word 0xf91d0a00	@ rfedb	pc")
+	TEST_UNSUPPORTED(".word 0xf89d0a00	@ rfeia	pc")
+	TEST_UNSUPPORTED(".word 0xf99d0a00	@ rfeib	pc")
+	TEST_UNSUPPORTED(".word 0xf83d0a00	@ rfeda	pc!")
+	TEST_UNSUPPORTED(".word 0xf93d0a00	@ rfedb	pc!")
+	TEST_UNSUPPORTED(".word 0xf8bd0a00	@ rfeia	pc!")
+	TEST_UNSUPPORTED(".word 0xf9bd0a00	@ rfeib	pc!")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_X(	"blx	__dummy_thumb_subroutine_even",
+		".thumb				\n\t"
+		".space 4			\n\t"
+		".type __dummy_thumb_subroutine_even, %%function \n\t"
+		"__dummy_thumb_subroutine_even:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_even")
+
+	TEST_X(	"blx	__dummy_thumb_subroutine_odd",
+		".thumb				\n\t"
+		".space 2			\n\t"
+		".type __dummy_thumb_subroutine_odd, %%function	\n\t"
+		"__dummy_thumb_subroutine_odd:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_odd")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("2","f")
+#if __LINUX_ARM_ARCH__ >= 6
+	COPROCESSOR_INSTRUCTIONS_MC_MR("2","f")
+#endif
+
+	TEST_GROUP("Miscellaneous instructions, memory hints, and Advanced SIMD instructions")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("cps	0x13")
+	TEST_UNSUPPORTED("cpsie	i")
+	TEST_UNSUPPORTED("cpsid	i")
+	TEST_UNSUPPORTED("cpsie	i,0x13")
+	TEST_UNSUPPORTED("cpsid	i,0x13")
+	TEST_UNSUPPORTED("setend	le")
+	TEST_UNSUPPORTED("setend	be")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_P("pli	[r",0,0b,", #16]")
+	TEST(  "pli	[pc, #0]")
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 5
+	TEST_P("pld	[r",0,32,", #-16]")
+	TEST(  "pld	[pc, #0]")
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED(  ".word 0xf590f000	@ pldw [r0, #0]")
+	TEST_SUPPORTED(  ".word 0xf797f000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(  ".word 0xf798f18c	@ pldw	[r8, r12, lsl #3]");
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+#endif
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/kernel/kprobes-test-thumb.c
new file mode 100644
index 0000000..5e726c3
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-thumb.c
@@ -0,0 +1,1187 @@
+/*
+ * arch/arm/kernel/kprobes-test-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "16"
+
+#define DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_flags |= TEST_FLAG_NO_ITBLOCK;	\
+	tests						\
+	kprobe_test_flags &= ~TEST_FLAG_NO_ITBLOCK;
+
+#define CONDITION_INSTRUCTIONS(cc_pos, tests)		\
+	kprobe_test_cc_position = cc_pos;		\
+	DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_cc_position = 0;
+
+#define TEST_ITBLOCK(code)				\
+	kprobe_test_flags |= TEST_FLAG_FULL_ITBLOCK;	\
+	TESTCASE_START(code)				\
+	TEST_ARG_END("")				\
+	"50:	nop			\n\t"		\
+	"1:	"code"			\n\t"		\
+	"	mov r1, #0x11		\n\t"		\
+	"	mov r2, #0x22		\n\t"		\
+	"	mov r3, #0x33		\n\t"		\
+	"2:	nop			\n\t"		\
+	TESTCASE_END					\
+	kprobe_test_flags &= ~TEST_FLAG_FULL_ITBLOCK;
+
+#define TEST_THUMB_TO_ARM_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f+1)					\
+	TEST_ARG_MEM(15, 3f)					\
+	TEST_ARG_END("")					\
+	"	nop			\n\t" /* To align 1f */	\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"3:	adr	lr, 2f+1	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_thumb16_test_cases(void)
+{
+	kprobe_test_flags = TEST_FLAG_NARROW_INSTR;
+
+	TEST_GROUP("Shift (immediate), add, subtract, move, and compare")
+
+	TEST_R(    "lsls	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsls	r0, r",7,VAL2,", #11")
+	TEST_R(    "lsrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsrs	r0, r",7,VAL2,", #11")
+	TEST_R(    "asrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "asrs	r0, r",7,VAL2,", #11")
+	TEST_RR(   "adds	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "adds	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_RR(   "subs	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "subs	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_R(    "adds	r7, r",0,VAL1,", #5")
+	TEST_R(    "adds	r0, r",7,VAL2,", #2")
+	TEST_R(    "subs	r7, r",0,VAL1,", #5")
+	TEST_R(    "subs	r0, r",7,VAL2,", #2")
+	TEST(      "movs.n	r0, #0x5f")
+	TEST(      "movs.n	r7, #0xa0")
+	TEST_R(    "cmp.n	r",0,0x5e, ", #0x5f")
+	TEST_R(    "cmp.n	r",5,0x15f,", #0x5f")
+	TEST_R(    "cmp.n	r",7,0xa0, ", #0xa0")
+	TEST_R(    "adds.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "adds.n	r",7,VAL2,", #0xa0")
+	TEST_R(    "subs.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "subs.n	r",7,VAL2,", #0xa0")
+
+	TEST_GROUP("16-bit Thumb data-processing instructions")
+
+#define DATA_PROCESSING16(op,val)			\
+	TEST_RR(   op"	r",0,VAL1,", r",7,val,"")	\
+	TEST_RR(   op"	r",7,VAL2,", r",0,val,"")
+
+	DATA_PROCESSING16("ands",0xf00f00ff)
+	DATA_PROCESSING16("eors",0xf00f00ff)
+	DATA_PROCESSING16("lsls",11)
+	DATA_PROCESSING16("lsrs",11)
+	DATA_PROCESSING16("asrs",11)
+	DATA_PROCESSING16("adcs",VAL2)
+	DATA_PROCESSING16("sbcs",VAL2)
+	DATA_PROCESSING16("rors",11)
+	DATA_PROCESSING16("tst",0xf00f00ff)
+	TEST_R("rsbs	r",0,VAL1,", #0")
+	TEST_R("rsbs	r",7,VAL2,", #0")
+	DATA_PROCESSING16("cmp",0xf00f00ff)
+	DATA_PROCESSING16("cmn",0xf00f00ff)
+	DATA_PROCESSING16("orrs",0xf00f00ff)
+	DATA_PROCESSING16("muls",VAL2)
+	DATA_PROCESSING16("bics",0xf00f00ff)
+	DATA_PROCESSING16("mvns",VAL2)
+
+	TEST_GROUP("Special data instructions and branch and exchange")
+
+	TEST_RR(  "add	r",0, VAL1,", r",7,VAL2,"")
+	TEST_RR(  "add	r",3, VAL2,", r",8,VAL3,"")
+	TEST_RR(  "add	r",8, VAL3,", r",0,VAL1,"")
+	TEST_R(   "add	sp"        ", r",8,-8,  "")
+	TEST_R(   "add	r",14,VAL1,", pc")
+	TEST_BF_R("add	pc"        ", r",0,2f-1f-8,"")
+	TEST_UNSUPPORTED(".short 0x44ff	@ add pc, pc")
+
+	TEST_RR(  "cmp	r",3,VAL1,", r",8,VAL2,"")
+	TEST_RR(  "cmp	r",8,VAL2,", r",0,VAL1,"")
+	TEST_R(   "cmp	sp"       ", r",8,-8,  "")
+
+	TEST_R(   "mov	r0, r",7,VAL2,"")
+	TEST_R(   "mov	r3, r",8,VAL3,"")
+	TEST_R(   "mov	r8, r",0,VAL1,"")
+	TEST_P(   "mov	sp, r",8,-8,  "")
+	TEST(     "mov	lr, pc")
+	TEST_BF_R("mov	pc, r",0,2f,  "")
+
+	TEST_BF_R("bx	r",0, 2f+1,"")
+	TEST_BF_R("bx	r",14,2f+1,"")
+	TESTCASE_START("bx	pc")
+		TEST_ARG_REG(14, 99f+1)
+		TEST_ARG_END("")
+		"	nop			\n\t" /* To align the bx pc*/
+		"50:	nop			\n\t"
+		"1:	bx	pc		\n\t"
+		"	bx	lr		\n\t"
+		".arm				\n\t"
+		"	adr	lr, 2f+1	\n\t"
+		"	bx	lr		\n\t"
+		".thumb				\n\t"
+		"2:	nop			\n\t"
+	TESTCASE_END
+
+	TEST_BF_R("blx	r",0, 2f+1,"")
+	TEST_BB_R("blx	r",14,2f+1,"")
+	TEST_UNSUPPORTED(".short 0x47f8	@ blx pc")
+
+	TEST_GROUP("Load from Literal Pool")
+
+	TEST_X( "ldr	r0, 3f",
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL1))
+	TEST_X( "ldr	r7, 3f",
+		".space 128				\n\t"
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL2))
+
+	TEST_GROUP("16-bit Thumb Load/store instructions")
+
+	TEST_RPR("str	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("str	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strh	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strh	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strb	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strb	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrsb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldr	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldr	r7, [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrh	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrsh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsh	r7, [r",6, 24,", r",5,  50,"]")
+
+	TEST_RP("str	r",0, VAL1,", [r",1, 24,", #120]")
+	TEST_RP("str	r",7, VAL2,", [r",6, 24,", #120]")
+	TEST_P( "ldr	r0, [r",1, 24,", #120]")
+	TEST_P( "ldr	r7, [r",6, 24,", #120]")
+	TEST_RP("strb	r",0, VAL1,", [r",1, 24,", #30]")
+	TEST_RP("strb	r",7, VAL2,", [r",6, 24,", #30]")
+	TEST_P( "ldrb	r0, [r",1, 24,", #30]")
+	TEST_P( "ldrb	r7, [r",6, 24,", #30]")
+	TEST_RP("strh	r",0, VAL1,", [r",1, 24,", #60]")
+	TEST_RP("strh	r",7, VAL2,", [r",6, 24,", #60]")
+	TEST_P( "ldrh	r0, [r",1, 24,", #60]")
+	TEST_P( "ldrh	r7, [r",6, 24,", #60]")
+
+	TEST_R( "str	r",0, VAL1,", [sp, #0]")
+	TEST_R( "str	r",7, VAL2,", [sp, #160]")
+	TEST(   "ldr	r0, [sp, #0]")
+	TEST(   "ldr	r7, [sp, #160]")
+
+	TEST_RP("str	r",0, VAL1,", [r",0, 24,"]")
+	TEST_P( "ldr	r0, [r",0, 24,"]")
+
+	TEST_GROUP("Generate PC-/SP-relative address")
+
+	TEST("add	r0, pc, #4")
+	TEST("add	r7, pc, #1020")
+	TEST("add	r0, sp, #4")
+	TEST("add	r7, sp, #1020")
+
+	TEST_GROUP("Miscellaneous 16-bit instructions")
+
+	TEST_UNSUPPORTED( "cpsie	i")
+	TEST_UNSUPPORTED( "cpsid	i")
+	TEST_UNSUPPORTED( "setend	le")
+	TEST_UNSUPPORTED( "setend	be")
+
+	TEST("add	sp, #"__stringify(TEST_MEMORY_SIZE)) /* Assumes TEST_MEMORY_SIZE < 0x400 */
+	TEST("sub	sp, #0x7f*4")
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_BF_R(  "cbnz	r",0,0, ", 2f")
+	TEST_BF_R(  "cbz	r",2,-1,", 2f")
+	TEST_BF_RX( "cbnz	r",4,1, ", 2f",0x20)
+	TEST_BF_RX( "cbz	r",7,0, ", 2f",0x40)
+)
+	TEST_R("sxth	r0, r",7, HH1,"")
+	TEST_R("sxth	r7, r",0, HH2,"")
+	TEST_R("sxtb	r0, r",7, HH1,"")
+	TEST_R("sxtb	r7, r",0, HH2,"")
+	TEST_R("uxth	r0, r",7, HH1,"")
+	TEST_R("uxth	r7, r",0, HH2,"")
+	TEST_R("uxtb	r0, r",7, HH1,"")
+	TEST_R("uxtb	r7, r",0, HH2,"")
+	TEST_R("rev	r0, r",7, VAL1,"")
+	TEST_R("rev	r7, r",0, VAL2,"")
+	TEST_R("rev16	r0, r",7, VAL1,"")
+	TEST_R("rev16	r7, r",0, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xba80")
+	TEST_UNSUPPORTED(".short 0xbabf")
+	TEST_R("revsh	r0, r",7, VAL1,"")
+	TEST_R("revsh	r7, r",0, VAL2,"")
+
+#define TEST_POPPC(code, offset)	\
+	TESTCASE_START(code)		\
+	TEST_ARG_PTR(13, offset)	\
+	TEST_ARG_END("")		\
+	TEST_BRANCH_F(code,0)		\
+	TESTCASE_END
+
+	TEST("push	{r0}")
+	TEST("push	{r7}")
+	TEST("push	{r14}")
+	TEST("push	{r0-r7,r14}")
+	TEST("push	{r0,r2,r4,r6,r14}")
+	TEST("push	{r1,r3,r5,r7}")
+	TEST("pop	{r0}")
+	TEST("pop	{r7}")
+	TEST("pop	{r0,r2,r4,r6}")
+	TEST_POPPC("pop	{pc}",15*4)
+	TEST_POPPC("pop	{r0-r7,pc}",7*4)
+	TEST_POPPC("pop	{r1,r3,r5,r7,pc}",11*4)
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{pc}	@ ",13,15*4,"")
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{r0-r7,pc}	@ ",13,7*4,"")
+
+	TEST_UNSUPPORTED("bkpt.n	0")
+	TEST_UNSUPPORTED("bkpt.n	255")
+
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED(".short 0xbf50") /* Unassigned hints */
+	TEST_UNSUPPORTED(".short 0xbff0") /* Unassigned hints */
+
+#define TEST_IT(code, code2)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	"50:	nop			\n\t"	\
+	"1:	"code"			\n\t"	\
+	"	"code2"			\n\t"	\
+	"2:	nop			\n\t"	\
+	TESTCASE_END
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_IT("it	eq","moveq r0,#0")
+	TEST_IT("it	vc","movvc r0,#0")
+	TEST_IT("it	le","movle r0,#0")
+	TEST_IT("ite	eq","moveq r0,#0\n\t  movne r1,#1")
+	TEST_IT("itet	vc","movvc r0,#0\n\t  movvs r1,#1\n\t  movvc r2,#2")
+	TEST_IT("itete	le","movle r0,#0\n\t  movgt r1,#1\n\t  movle r2,#2\n\t  movgt r3,#3")
+	TEST_IT("itttt	le","movle r0,#0\n\t  movle r1,#1\n\t  movle r2,#2\n\t  movle r3,#3")
+	TEST_IT("iteee	le","movle r0,#0\n\t  movgt r1,#1\n\t  movgt r2,#2\n\t  movgt r3,#3")
+)
+
+	TEST_GROUP("Load and store multiple")
+
+	TEST_P("ldmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("ldmia	r",7, 16*4,"!, {r0-r6}")
+	TEST_P("stmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("stmia	r",0, 16*4,"!, {r0-r7}")
+
+	TEST_GROUP("Conditional branch and Supervisor Call instructions")
+
+CONDITION_INSTRUCTIONS(8,
+	TEST_BF("beq	2f")
+	TEST_BB("bne	2b")
+	TEST_BF("bgt	2f")
+	TEST_BB("blt	2b")
+)
+	TEST_UNSUPPORTED(".short 0xde00")
+	TEST_UNSUPPORTED(".short 0xdeff")
+	TEST_UNSUPPORTED("svc	#0x00")
+	TEST_UNSUPPORTED("svc	#0xff")
+
+	TEST_GROUP("Unconditional branch")
+
+	TEST_BF(  "b	2f")
+	TEST_BB(  "b	2b")
+	TEST_BF_X("b	2f", 0x400)
+	TEST_BB_X("b	2b", 0x400)
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("subs.n r0, r0")
+
+	verbose("\n");
+}
+
+
+void kprobe_thumb32_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Load/store multiple")
+
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmia	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmia	r",7, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmia	r",4, 0,   ", {r0-r12,r14}")
+	TEST_BF_P("ldmia	r",5, 8*4, "!, {r6-r12,r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,14*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmdb	r",5, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_BF_P("ldmdb	r",5, 16*4,"!, {r6-r12,r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmdb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",0,14*4,", {r12,pc}")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",13,2*4,", {r0-r12,pc}")
+
+	TEST_UNSUPPORTED(".short 0xe88f,0x0101	@ stmia	pc, {r0,r8}")
+	TEST_UNSUPPORTED(".short 0xe92f,0x5f00	@ stmdb	pc!, {r8-r12,r14}")
+	TEST_UNSUPPORTED(".short 0xe8bd,0xc000	@ ldmia	r13!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0xc000	@ ldmdb	r14!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x3f00	@ stmia	r7!, {r8-r12,sp}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x9f00	@ stmia	r7!, {r8-r12,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0x2010	@ ldmdb	r14!, {r4,sp}")
+
+	TEST_GROUP("Load/store double or exclusive, table branch")
+
+	TEST_P(  "ldrd	r0, r1, [r",1, 24,", #-16]")
+	TEST(    "ldrd	r12, r14, [sp, #16]")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,", #-16]!")
+	TEST(    "ldrd	r14, r12, [sp, #16]!")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,"], #16")
+	TEST(    "ldrd	r7, r8, [sp], #-16")
+
+	TEST_X( "ldrd	r12, r14, 3f",
+		".align 3				\n\t"
+		"3:	.word	"__stringify(VAL1)"	\n\t"
+		"	.word	"__stringify(VAL2))
+
+	TEST_UNSUPPORTED(".short 0xe9ff,0xec04	@ ldrd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ff,0xec04	@ ldrd	r14, r12, [pc], #16")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xd800	@ ldrd	sp, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xf800	@ ldrd	pc, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7d00	@ ldrd	r7, sp, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7f00	@ ldrd	r7, pc, [r4]")
+
+	TEST_RRP("strd	r",0, VAL1,", r",1, VAL2,", [r",1, 24,", #-16]")
+	TEST_RR( "strd	r",12,VAL2,", r",14,VAL1,", [sp, #16]")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,", #-16]!")
+	TEST_RR( "strd	r",14,VAL2,", r",12,VAL1,", [sp, #16]!")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16")
+	TEST_RR( "strd	r",7, VAL2,", r",8, VAL1,", [sp], #-16")
+	TEST_UNSUPPORTED(".short 0xe9ef,0xec04	@ strd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ef,0xec04	@ strd	r14, r12, [pc], #16")
+
+	TEST_RX("tbb	[pc, r",0, (9f-(1f+4)),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbb	[pc, r",4, (9f-(1f+4)+1),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbb	[r",1,9f,", r",2,0,"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",7, (9f-(1f+4))>>1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",12, ((9f-(1f+4))>>1)+1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbh	[r",1,9f, ", r",14,1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01f	@ tbh [r1, pc]")
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01d	@ tbh [r1, sp]")
+	TEST_UNSUPPORTED(".short 0xe8dd,0xf012	@ tbh [sp, r2]")
+
+	TEST_UNSUPPORTED("strexb	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexh	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexd	r0, r1, [r2]")
+	TEST_UNSUPPORTED("ldrexb	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexh	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexd	r0, [r1]")
+
+	TEST_GROUP("Data-processing (shifted register) and (modified immediate)")
+
+#define _DATA_PROCESSING32_DNM(op,s,val)					\
+	TEST_RR(op s".w	r0,  r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op s"	r1,  r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op s"	r2,  r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op s"	r3,  r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op s"	r4,  r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op s"	r5,  r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op s"	r8,  r",9, VAL1,", r",10,val, ", rrx")			\
+	TEST_R( op s"	r0,  r",11,VAL1,", #0x00010001")			\
+	TEST_R( op s"	r11, r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op s"	r7,  r",8, VAL2,", #0x000af000")
+
+#define DATA_PROCESSING32_DNM(op,val)		\
+	_DATA_PROCESSING32_DNM(op,"",val)	\
+	_DATA_PROCESSING32_DNM(op,"s",val)
+
+#define DATA_PROCESSING32_NM(op,val)					\
+	TEST_RR(op".w	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op"	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op"	r",9, VAL1,", r",10,val, ", rrx")		\
+	TEST_R( op"	r",11,VAL1,", #0x00010001")			\
+	TEST_R( op"	r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op"	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING32_DM(op,s,val)				\
+	TEST_R( op s".w	r0,  r",14, val, "")			\
+	TEST_R( op s"	r1,  r",12, val, ", lsl #3")		\
+	TEST_R( op s"	r2,  r",11, val, ", lsr #4")		\
+	TEST_R( op s"	r3,  r",10, val, ", asr #5")		\
+	TEST_R( op s"	r4,  r",9, N(val),", asr #6")		\
+	TEST_R( op s"	r5,  r",8, val, ", ror #7")		\
+	TEST_R( op s"	r8,  r",7,val, ", rrx")			\
+	TEST(   op s"	r0,  #0x00010001")			\
+	TEST(   op s"	r11, #0xf5000000")			\
+	TEST(   op s"	r7,  #0x000af000")			\
+	TEST(   op s"	r4,  #0x00005a00")
+
+#define DATA_PROCESSING32_DM(op,val)		\
+	_DATA_PROCESSING32_DM(op,"",val)	\
+	_DATA_PROCESSING32_DM(op,"s",val)
+
+	DATA_PROCESSING32_DNM("and",0xf00f00ff)
+	DATA_PROCESSING32_NM("tst",0xf00f00ff)
+	DATA_PROCESSING32_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING32_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING32_DM("mov",VAL2)
+	DATA_PROCESSING32_DNM("orn",0xf00f00ff)
+	DATA_PROCESSING32_DM("mvn",VAL2)
+	DATA_PROCESSING32_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING32_NM("teq",0xf00f00ff)
+	DATA_PROCESSING32_DNM("add",VAL2)
+	DATA_PROCESSING32_NM("cmn",VAL2)
+	DATA_PROCESSING32_DNM("adc",VAL2)
+	DATA_PROCESSING32_DNM("sbc",VAL2)
+	DATA_PROCESSING32_DNM("sub",VAL2)
+	DATA_PROCESSING32_NM("cmp",VAL2)
+	DATA_PROCESSING32_DNM("rsb",VAL2)
+
+	TEST_RR("pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_RR("pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0d	@ tst.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0f	@ tst.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea1d,0x0f07	@ tst.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea1f,0x0f07	@ tst.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf01d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf01f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0d	@ teq.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0f	@ teq.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea9d,0x0f07	@ teq.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea9f,0x0f07	@ teq.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf09d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf09f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0d	@ cmn.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0f	@ cmn.w r7, pc")
+	TEST_P("cmn.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xeb1f,0x0f07	@ cmn.w pc, r7")
+	TEST(  "cmn	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf11f,0x1f08	@ cmn pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0d	@ cmp.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0f	@ cmp.w r7, pc")
+	TEST_P("cmp.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xebbf,0x0f07	@ cmp.w pc, r7")
+	TEST(  "cmp	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf1bf,0x1f08	@ cmp pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea5f,0x070d	@ movs.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea5f,0x070f	@ movs.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea5f,0x0d07	@ movs.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea4f,0x0f07	@ mov.w  pc, r7")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1d08	@ mov sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1f08	@ mov pc, #0x00080008")
+
+	TEST_R("add.w	r0, sp, r",1, 4,"")
+	TEST_R("adds	r0, sp, r",1, 4,", asl #3")
+	TEST_R("add	r0, sp, r",1, 4,", asl #4")
+	TEST_R("add	r0, sp, r",1, 16,", ror #1")
+	TEST_R("add.w	sp, sp, r",1, 4,"")
+	TEST_R("add	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x1d01	@ add sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d71	@ add sp, sp, r1, ror #1")
+	TEST(  "add.w	r0, sp, #24")
+	TEST(  "add.w	sp, sp, #24")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0f01	@ add pc, sp, r1")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000f	@ add r0, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000d	@ add r0, sp, sp")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0f	@ add sp, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0d	@ add sp, sp, sp")
+
+	TEST_R("sub.w	r0, sp, r",1, 4,"")
+	TEST_R("subs	r0, sp, r",1, 4,", asl #3")
+	TEST_R("sub	r0, sp, r",1, 4,", asl #4")
+	TEST_R("sub	r0, sp, r",1, 16,", ror #1")
+	TEST_R("sub.w	sp, sp, r",1, 4,"")
+	TEST_R("sub	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xebad,0x1d01	@ sub sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xebad,0x0d71	@ sub sp, sp, r1, ror #1")
+	TEST_UNSUPPORTED(".short 0xebad,0x0f01	@ sub pc, sp, r1")
+	TEST(  "sub.w	r0, sp, #24")
+	TEST(  "sub.w	sp, sp, #24")
+
+	TEST_UNSUPPORTED(".short 0xea02,0x010f	@ and r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xea0f,0x0103	@ and r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0f03	@ and pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x010d	@ and r1, r2, sp")
+	TEST_UNSUPPORTED(".short 0xea0d,0x0103	@ and r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0d03	@ and sp, r2, r3")
+	TEST_UNSUPPORTED(".short 0xf00d,0x1108	@ and r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf00f,0x1108	@ and r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1d08	@ and sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1f08	@ and pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb02,0x010f	@ add r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xeb0f,0x0103	@ add r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0f03	@ add pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x010d	@ add r1, r2, sp")
+	TEST_SUPPORTED(  ".short 0xeb0d,0x0103	@ add r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0d03	@ add sp, r2, r3")
+	TEST_SUPPORTED(  ".short 0xf10d,0x1108	@ add r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10d,0x1f08	@ add pc, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10f,0x1108	@ add r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1d08	@ add sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1f08	@ add pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeaa0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeaf0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb20,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb80,0x0000")
+	TEST_UNSUPPORTED(".short 0xebe0,0x0000")
+
+	TEST_UNSUPPORTED(".short 0xf0a0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0c0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0f0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf120,0x0000")
+	TEST_UNSUPPORTED(".short 0xf180,0x0000")
+	TEST_UNSUPPORTED(".short 0xf1e0,0x0000")
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xec00,0x0000")
+	TEST_UNSUPPORTED(".short 0xeff0,0x0000")
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xfff0,0x0000")
+
+	TEST_GROUP("Data-processing (plain binary immediate)")
+
+	TEST_R("addw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "addw	r14, sp, #0xf5a")
+	TEST(  "addw	sp, sp, #0x20")
+	TEST(  "addw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1f20	@ addw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20d,0x1f20	@ addw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1d20	@ addw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf200,0x1d20	@ addw sp, r0, #0x120")
+
+	TEST_R("subw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "subw	r14, sp, #0xf5a")
+	TEST(  "subw	sp, sp, #0x20")
+	TEST(  "subw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1f20	@ subw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2ad,0x1f20	@ subw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1d20	@ subw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2a0,0x1d20	@ subw sp, r0, #0x120")
+
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".short 0xf240,0x0d00	@ movw sp, #0")
+	TEST_UNSUPPORTED(".short 0xf240,0x0f00	@ movw pc, #0")
+
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0d00	@ movt sp, #0")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0f00	@ movt pc, #0")
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0d17	@ ssat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0f17	@ ssat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30d,0x0c17	@ ssat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf30f,0x0c17	@ ssat	r12, #24, pc")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0d17	@ usat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0f17	@ usat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38d,0x0c17	@ usat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf38f,0x0c17	@ usat	r12, #24, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0d0b	@ ssat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0f0b	@ ssat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32d,0x0c0b	@ ssat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf32f,0x0c0b	@ ssat16	r12, #12, pc")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0d0b	@ usat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0f0b	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ad,0x0c0b	@ usat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf3af,0x0c0b	@ usat16	r12, #12, pc")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2d0f	@ sbfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2f0f	@ sbfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34d,0x2c0f	@ sbfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34f,0x2c0f	@ sbfx	r12, pc, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2d0f	@ ubfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2f0f	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cd,0x2c0f	@ ubfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cf,0x2c0f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfc	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0d1e	@ bfc	sp, #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0f1e	@ bfc	pc, #0, #31")
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfi	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1d17	@ bfi	sp, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1f17	@ bfi	pc, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36d,0x1e17	@ bfi	r14, sp, #4, #20")
+
+	TEST_GROUP("Branches and miscellaneous control")
+
+CONDITION_INSTRUCTIONS(22,
+	TEST_BF("beq.w	2f")
+	TEST_BB("bne.w	2b")
+	TEST_BF("bgt.w	2f")
+	TEST_BB("blt.w	2b")
+	TEST_BF_X("bpl.w	2f",0x1000)
+)
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, r1")
+	TEST_UNSUPPORTED("msr	spsr, r2")
+
+	TEST_UNSUPPORTED("cpsie.w	i")
+	TEST_UNSUPPORTED("cpsid.w	i")
+	TEST_UNSUPPORTED("cps	0x13")
+
+	TEST_SUPPORTED("yield.w")
+	TEST("sev.w")
+	TEST("nop.w")
+	TEST("wfi.w")
+	TEST_SUPPORTED("wfe.w")
+	TEST_UNSUPPORTED("dbg.w	#0")
+
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+
+	TEST_UNSUPPORTED("bxj	r0")
+
+	TEST_UNSUPPORTED("subs	pc, lr, #4")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8d00	@ mrs	sp, spsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8f00	@ mrs	pc, spsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0x8000 @ smc #0")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0xa000 @ undefeined")
+
+	TEST_BF(  "b.w	2f")
+	TEST_BB(  "b.w	2b")
+	TEST_BF_X("b.w	2f", 0x1000)
+
+	TEST_BF(  "bl.w	2f")
+	TEST_BB(  "bl.w	2b")
+	TEST_BB_X("bl.w	2b", 0x1000)
+
+	TEST_X(	"blx	__dummy_arm_subroutine",
+		".arm				\n\t"
+		".align				\n\t"
+		".type __dummy_arm_subroutine, %%function \n\t"
+		"__dummy_arm_subroutine:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".thumb				\n\t"
+	)
+	TEST(	"blx	__dummy_arm_subroutine")
+
+	TEST_GROUP("Store single data item")
+
+#define SINGLE_STORE(size)							\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,-1024,", #1024]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, -1024,", #1080]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #-120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #-128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,   ", #120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,   ", #128]!")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]!")		\
+	TEST_RPR("str"size".w	r",0, VAL1,", [r",1, 0,", r",2, 4,"]")		\
+	TEST_RPR("str"size"	r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_R(  "str"size".w	r",7, VAL1,", [sp, #24]")			\
+	TEST_RP( "str"size".w	r",0, VAL2,", [r",0,0, "]")			\
+	TEST_UNSUPPORTED("str"size"t	r0, [r1, #4]")
+
+	SINGLE_STORE("b")
+	SINGLE_STORE("h")
+	SINGLE_STORE("")
+
+	TEST("str	sp, [sp]")
+	TEST_UNSUPPORTED(".short 0xf8cf,0xe000	@ str	r14, [pc]")
+	TEST_UNSUPPORTED(".short 0xf8ce,0xf000	@ str	pc, [r14]")
+
+	TEST_GROUP("Advanced SIMD element or structure load/store instructions")
+
+	TEST_UNSUPPORTED(".short 0xf900,0x0000")
+	TEST_UNSUPPORTED(".short 0xf92f,0xffff")
+	TEST_UNSUPPORTED(".short 0xf980,0x0000")
+	TEST_UNSUPPORTED(".short 0xf9ef,0xffff")
+
+	TEST_GROUP("Load single data item and memory hints")
+
+#define SINGLE_LOAD(size)						\
+	TEST_P( "ldr"size"	r0, [r",11,-1024, ", #1024]")		\
+	TEST_P( "ldr"size"	r14, [r",1, -1024,", #1080]")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #120")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,  "], #128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #-120")		\
+	TEST_P( "ldr"size"	r14, [r",1,24,   "], #-128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,    ", #120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,   ", #128]!")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]!")		\
+	TEST_PR("ldr"size".w	r0, [r",1, 0,", r",2, 4,"]")		\
+	TEST_PR("ldr"size"	r14, [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_X( "ldr"size".w	r0, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL1))			\
+	TEST_X( "ldr"size".w	r14, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL2))			\
+	TEST(   "ldr"size".w	r7, 3b")				\
+	TEST(   "ldr"size".w	r7, [sp, #24]")				\
+	TEST_P( "ldr"size".w	r0, [r",0,0, "]")			\
+	TEST_UNSUPPORTED("ldr"size"t	r0, [r1, #4]")
+
+	SINGLE_LOAD("b")
+	SINGLE_LOAD("sb")
+	SINGLE_LOAD("h")
+	SINGLE_LOAD("sh")
+	SINGLE_LOAD("")
+
+	TEST_BF_P("ldr	pc, [r",14, 15*4,"]")
+	TEST_P(   "ldr	sp, [r",14, 13*4,"]")
+	TEST_BF_R("ldr	pc, [sp, r",14, 15*4,"]")
+	TEST_R(   "ldr	sp, [sp, r",14, 13*4,"]")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+	TEST_SUPPORTED("ldr	sp, 99f")
+	TEST_SUPPORTED("ldr	pc, 99f")
+
+	TEST_UNSUPPORTED(".short 0xf854,0x700d	@ ldr	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf854,0x700f	@ ldr	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700d	@ ldrb	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700f	@ ldrb	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf89f,0xd004	@ ldrb	sp, 99f")
+	TEST_UNSUPPORTED(".short 0xf814,0xd008	@ ldrb	sp, [r4, r8]")
+	TEST_UNSUPPORTED(".short 0xf894,0xd000	@ ldrb	sp, [r4]")
+
+	TEST_UNSUPPORTED(".short 0xf860,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf9ff,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf950,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf95f,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf800,0x0800") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf97f,0xfaff") /* Unallocated space */
+
+	TEST(   "pli	[pc, #4]")
+	TEST(   "pli	[pc, #-4]")
+	TEST(   "pld	[pc, #4]")
+	TEST(   "pld	[pc, #-4]")
+
+	TEST_P( "pld	[r",0,-1024,", #1024]")
+	TEST(   ".short 0xf8b0,0xf400	@ pldw	[r0, #1024]")
+	TEST_P( "pli	[r",4, 0b,", #1024]")
+	TEST_P( "pld	[r",7, 120,", #-120]")
+	TEST(   ".short 0xf837,0xfc78	@ pldw	[r7, #-120]")
+	TEST_P( "pli	[r",11,120,", #-120]")
+	TEST(   "pld	[sp, #0]")
+
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", r",12,16,", lsl #3]")
+	TEST_SUPPORTED(".short 0xf837,0xf000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(".short 0xf838,0xf03c	@ pldw	[r8, r12, lsl #3]");
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", r",12,16,", lsl #3]")
+	TEST_R( "pld	[sp, r",1, 16,"]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00d  @pld	[r7, sp]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00f  @pld	[r7, pc]")
+
+	TEST_GROUP("Data-processing (register)")
+
+#define SHIFTS32(op)					\
+	TEST_RR(op"	r0,  r",1, VAL1,", r",2, 3, "")	\
+	TEST_RR(op"	r14, r",12,VAL2,", r",11,10,"")
+
+	SHIFTS32("lsl")
+	SHIFTS32("lsls")
+	SHIFTS32("lsr")
+	SHIFTS32("lsrs")
+	SHIFTS32("asr")
+	SHIFTS32("asrs")
+	SHIFTS32("ror")
+	SHIFTS32("rors")
+
+	TEST_UNSUPPORTED(".short 0xfa01,0xff02	@ lsl	pc, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xfd02	@ lsl	sp, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf002	@ lsl	r0, pc, r2")
+	TEST_UNSUPPORTED(".short 0xfa0d,0xf002	@ lsl	r0, sp, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00f	@ lsl	r0, r1, pc")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00d	@ lsl	r0, r1, sp")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa0f,0xff87	@ sxth	pc, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xfd87	@ sxth	sp, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88f	@ sxth	r8, pc");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88d	@ sxth	r8, sp");
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa60,0x00f0")
+	TEST_UNSUPPORTED(".short 0xfa7f,0xffff")
+
+#define PARALLEL_ADD_SUB(op)					\
+	TEST_RR(  op"add16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"asx	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"asx	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sax	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sax	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"add8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add8	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub8	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	PARALLEL_ADD_SUB("s")
+	PARALLEL_ADD_SUB("q")
+	PARALLEL_ADD_SUB("sh")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	PARALLEL_ADD_SUB("u")
+	PARALLEL_ADD_SUB("uq")
+	PARALLEL_ADD_SUB("uh")
+
+	TEST_GROUP("Miscellaneous operations")
+
+	TEST_RR("qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qsub	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdsub	lr, r",9, VAL2,", r",8, VAL1,"")
+
+	TEST_R("rev.w	r0, r",0,   VAL1,"")
+	TEST_R("rev	r14, r",12, VAL2,"")
+	TEST_R("rev16.w	r0, r",0,   VAL1,"")
+	TEST_R("rev16	r14, r",12, VAL2,"")
+	TEST_R("rbit	r0, r",0,   VAL1,"")
+	TEST_R("rbit	r14, r",12, VAL2,"")
+	TEST_R("revsh.w	r0, r",0,   VAL1,"")
+	TEST_R("revsh	r14, r",12, VAL2,"")
+
+	TEST_UNSUPPORTED(".short 0xfa9c,0xff8c	@ rev	pc, r12");
+	TEST_UNSUPPORTED(".short 0xfa9c,0xfd8c	@ rev	sp, r12");
+	TEST_UNSUPPORTED(".short 0xfa9f,0xfe8f	@ rev	r14, pc");
+	TEST_UNSUPPORTED(".short 0xfa9d,0xfe8d	@ rev	r14, sp");
+
+	TEST_RR("sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR("sel	r14, r",12,VAL1,", r",10, VAL2,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clz	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+
+	TEST_UNSUPPORTED(".short 0xfa80,0xf030") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfab0,0xf000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+
+	TEST_GROUP("Multiply, multiply accumulate, and absolute difference operations")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mul	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xff09	@ mul	pc, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xfd09	@ mul	sp, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb0f,0xf709	@ mul	r7, pc, r9")
+	TEST_UNSUPPORTED(".short 0xfb0d,0xf709	@ mul	r7, sp, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70f	@ mul	r7, r8, pc")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70d	@ mul	r7, r8, sp")
+
+	TEST_RRR(   "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mla	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xaf09	@ mla	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xad09	@ mla	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0f,0xa709	@ mla	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0d,0xa709	@ mla	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70f	@ mla	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70d	@ mla	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xd709	@ mla	r7, r8, r9, sp");
+
+	TEST_RRR(   "mls	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mls	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+
+	TEST_RRR(   "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbt	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RR(    "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_RR(    "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_RRR(   "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+
+	TEST_RRR(   "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(   "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_RR(    "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_UNSUPPORTED(".short 0xfb00,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb0f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0x0010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+
+	TEST_GROUP("Long multiply, long multiply accumulate, and divide")
+
+	TEST_RR(   "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "smull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_UNSUPPORTED(".short 0xfb89,0xf80a	@ smull	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0xd80a	@ smull	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7f0a	@ smull	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7d0a	@ smull	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb8f,0x780a	@ smull	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb8d,0x780a	@ smull	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780f	@ smull	r7, r8, r9, pc");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780d	@ smull	r7, r8, r9, sp");
+
+	TEST_RR(   "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "umull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+
+	TEST_RRRR( "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "smlsld	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsld	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlsldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "umaal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umaal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xffff,0xffff")
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("sub.w	r0, r0")
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c
new file mode 100644
index 0000000..e17cdd6
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.c
@@ -0,0 +1,1748 @@
+/*
+ * arch/arm/kernel/kprobes-test.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains test code for ARM kprobes.
+ *
+ * The top level function run_all_tests() executes tests for all of the
+ * supported instruction sets: ARM, 16-bit Thumb, and 32-bit Thumb. These tests
+ * fall into two categories; run_api_tests() checks basic functionality of the
+ * kprobes API, and run_test_cases() is a comprehensive test for kprobes
+ * instruction decoding and simulation.
+ *
+ * run_test_cases() first checks the kprobes decoding table for self consistency
+ * (using table_test()) then executes a series of test cases for each of the CPU
+ * instruction forms. coverage_start() and coverage_end() are used to verify
+ * that these test cases cover all of the possible combinations of instructions
+ * described by the kprobes decoding tables.
+ *
+ * The individual test cases are in kprobes-test-arm.c and kprobes-test-thumb.c
+ * which use the macros defined in kprobes-test.h. The rest of this
+ * documentation will describe the operation of the framework used by these
+ * test cases.
+ */
+
+/*
+ * TESTING METHODOLOGY
+ * -------------------
+ *
+ * The methodology used to test an ARM instruction 'test_insn' is to use
+ * inline assembler like:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *
+ * When the test case is run a kprobe is placed of each nop. The
+ * post-handler of the test_before probe is used to modify the saved CPU
+ * register context to that which we require for the test case. The
+ * pre-handler of the of the test_after probe saves a copy of the CPU
+ * register context. In this way we can execute test_insn with a specific
+ * register context and see the results afterwards.
+ *
+ * To actually test the kprobes instruction emulation we perform the above
+ * step a second time but with an additional kprobe on the test_case
+ * instruction itself. If the emulation is accurate then the results seen
+ * by the test_after probe will be identical to the first run which didn't
+ * have a probe on test_case.
+ *
+ * Each test case is run several times with a variety of variations in the
+ * flags value of stored in CPSR, and for Thumb code, different ITState.
+ *
+ * For instructions which can modify PC, a second test_after probe is used
+ * like this:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *		b test_done
+ * test_after2: nop
+ * test_done:
+ *
+ * The test case is constructed such that test_insn branches to
+ * test_after2, or, if testing a conditional instruction, it may just
+ * continue to test_after. The probes inserted at both locations let us
+ * determine which happened. A similar approach is used for testing
+ * backwards branches...
+ *
+ *		b test_before
+ *		b test_done  @ helps to cope with off by 1 branches
+ * test_after2: nop
+ *		b test_done
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ * test_done:
+ *
+ * The macros used to generate the assembler instructions describe above
+ * are TEST_INSTRUCTION, TEST_BRANCH_F (branch forwards) and TEST_BRANCH_B
+ * (branch backwards). In these, the local variables numbered 1, 50, 2 and
+ * 99 represent: test_before, test_case, test_after2 and test_done.
+ *
+ * FRAMEWORK
+ * ---------
+ *
+ * Each test case is wrapped between the pair of macros TESTCASE_START and
+ * TESTCASE_END. As well as performing the inline assembler boilerplate,
+ * these call out to the kprobes_test_case_start() and
+ * kprobes_test_case_end() functions which drive the execution of the test
+ * case. The specific arguments to use for each test case are stored as
+ * inline data constructed using the various TEST_ARG_* macros. Putting
+ * this all together, a simple test case may look like:
+ *
+ *	TESTCASE_START("Testing mov r0, r7")
+ *	TEST_ARG_REG(7, 0x12345678) // Set r7=0x12345678
+ *	TEST_ARG_END("")
+ *	TEST_INSTRUCTION("mov r0, r7")
+ *	TESTCASE_END
+ *
+ * Note, in practice the single convenience macro TEST_R would be used for this
+ * instead.
+ *
+ * The above would expand to assembler looking something like:
+ *
+ *	@ TESTCASE_START
+ *	bl	__kprobes_test_case_start
+ *	@ start of inline data...
+ *	.ascii "mov r0, r7"	@ text title for test case
+ *	.byte	0
+ *	.align	2
+ *
+ *	@ TEST_ARG_REG
+ *	.byte	ARG_TYPE_REG
+ *	.byte	7
+ *	.short	0
+ *	.word	0x1234567
+ *
+ *	@ TEST_ARG_END
+ *	.byte	ARG_TYPE_END
+ *	.byte	TEST_ISA	@ flags, including ISA being tested
+ *	.short	50f-0f		@ offset of 'test_before'
+ *	.short	2f-0f		@ offset of 'test_after2' (if relevent)
+ *	.short	99f-0f		@ offset of 'test_done'
+ *	@ start of test case code...
+ *	0:
+ *	.code	TEST_ISA	@ switch to ISA being tested
+ *
+ *	@ TEST_INSTRUCTION
+ *	50:	nop		@ location for 'test_before' probe
+ *	1:	mov r0, r7	@ the test case instruction 'test_insn'
+ *		nop		@ location for 'test_after' probe
+ *
+ *	// TESTCASE_END
+ *	2:
+ *	99:	bl __kprobes_test_case_end_##TEST_ISA
+ *	.code	NONMAL_ISA
+ *
+ * When the above is execute the following happens...
+ *
+ * __kprobes_test_case_start() is an assembler wrapper which sets up space
+ * for a stack buffer and calls the C function kprobes_test_case_start().
+ * This C function will do some initial processing of the inline data and
+ * setup some global state. It then inserts the test_before and test_after
+ * kprobes and returns a value which causes the assembler wrapper to jump
+ * to the start of the test case code, (local label '0').
+ *
+ * When the test case code executes, the test_before probe will be hit and
+ * test_before_post_handler will call setup_test_context(). This fills the
+ * stack buffer and CPU registers with a test pattern and then processes
+ * the test case arguments. In our example there is one TEST_ARG_REG which
+ * indicates that R7 should be loaded with the value 0x12345678.
+ *
+ * When the test_before probe ends, the test case continues and executes
+ * the "mov r0, r7" instruction. It then hits the test_after probe and the
+ * pre-handler for this (test_after_pre_handler) will save a copy of the
+ * CPU register context. This should now have R0 holding the same value as
+ * R7.
+ *
+ * Finally we get to the call to __kprobes_test_case_end_{32,16}. This is
+ * an assembler wrapper which switches back to the ISA used by the test
+ * code and calls the C function kprobes_test_case_end().
+ *
+ * For each run through the test case, test_case_run_count is incremented
+ * by one. For even runs, kprobes_test_case_end() saves a copy of the
+ * register and stack buffer contents from the test case just run. It then
+ * inserts a kprobe on the test case instruction 'test_insn' and returns a
+ * value to cause the test case code to be re-run.
+ *
+ * For odd numbered runs, kprobes_test_case_end() compares the register and
+ * stack buffer contents to those that were saved on the previous even
+ * numbered run (the one without the kprobe on test_insn). These should be
+ * the same if the kprobe instruction simulation routine is correct.
+ *
+ * The pair of test case runs is repeated with different combinations of
+ * flag values in CPSR and, for Thumb, different ITState. This is
+ * controlled by test_context_cpsr().
+ *
+ * BUILDING TEST CASES
+ * -------------------
+ *
+ *
+ * As an aid to building test cases, the stack buffer is initialised with
+ * some special values:
+ *
+ *   [SP+13*4]	Contains SP+120. This can be used to test instructions
+ *		which load a value into SP.
+ *
+ *   [SP+15*4]	When testing branching instructions using TEST_BRANCH_{F,B},
+ *		this holds the target address of the branch, 'test_after2'.
+ *		This can be used to test instructions which load a PC value
+ *		from memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+#include "kprobes-test.h"
+
+
+#define BENCHMARKING	1
+
+
+/*
+ * Test basic API
+ */
+
+static bool test_regs_ok;
+static int test_func_instance;
+static int pre_handler_called;
+static int post_handler_called;
+static int jprobe_func_called;
+static int kretprobe_handler_called;
+
+#define FUNC_ARG1 0x12345678
+#define FUNC_ARG2 0xabcdef
+
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+long arm_func(long r0, long r1);
+
+static void __used __naked __arm_kprobes_test_func(void)
+{
+	__asm__ __volatile__ (
+		".arm					\n\t"
+		".type arm_func, %%function		\n\t"
+		"arm_func:				\n\t"
+		"adds	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+		".code "NORMAL_ISA	 /* Back to Thumb if necessary */
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+long thumb16_func(long r0, long r1);
+long thumb32even_func(long r0, long r1);
+long thumb32odd_func(long r0, long r1);
+
+static void __used __naked __thumb_kprobes_test_funcs(void)
+{
+	__asm__ __volatile__ (
+		".type thumb16_func, %%function		\n\t"
+		"thumb16_func:				\n\t"
+		"adds.n	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		".type thumb32even_func, %%function	\n\t"
+		"thumb32even_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		"nop.n					\n\t"
+		".type thumb32odd_func, %%function	\n\t"
+		"thumb32odd_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#endif /* CONFIG_THUMB2_KERNEL */
+
+
+static int call_test_func(long (*func)(long, long), bool check_test_regs)
+{
+	long ret;
+
+	++test_func_instance;
+	test_regs_ok = false;
+
+	ret = (*func)(FUNC_ARG1, FUNC_ARG2);
+	if (ret != FUNC_ARG1 + FUNC_ARG2) {
+		pr_err("FAIL: call_test_func: func returned %lx\n", ret);
+		return false;
+	}
+
+	if (check_test_regs && !test_regs_ok) {
+		pr_err("FAIL: test regs not OK\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int __kprobes pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	pre_handler_called = test_func_instance;
+	if (regs->ARM_r0 == FUNC_ARG1 && regs->ARM_r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs,
+				unsigned long flags)
+{
+	post_handler_called = test_func_instance;
+	if (regs->ARM_r0 != FUNC_ARG1 + FUNC_ARG2 || regs->ARM_r1 != FUNC_ARG2)
+		test_regs_ok = false;
+}
+
+static struct kprobe the_kprobe = {
+	.addr		= 0,
+	.pre_handler	= pre_handler,
+	.post_handler	= post_handler
+};
+
+static int test_kprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kprobe.addr = (kprobe_opcode_t *)func;
+	ret = register_kprobe(&the_kprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kprobe(&the_kprobe);
+	the_kprobe.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (pre_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe pre_handler not called\n");
+		return -EINVAL;
+	}
+	if (post_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe post_handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (pre_handler_called == test_func_instance ||
+				post_handler_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __kprobes jprobe_func(long r0, long r1)
+{
+	jprobe_func_called = test_func_instance;
+	if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	jprobe_return();
+}
+
+static struct jprobe the_jprobe = {
+	.entry		= jprobe_func,
+};
+
+static int test_jprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_jprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_jprobe(&the_jprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_jprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_jprobe(&the_jprobe);
+	the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (jprobe_func_called != test_func_instance) {
+		pr_err("FAIL: jprobe handler function not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __kprobes
+kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	kretprobe_handler_called = test_func_instance;
+	if (regs_return_value(regs) == FUNC_ARG1 + FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static struct kretprobe the_kretprobe = {
+	.handler	= kretprobe_handler,
+};
+
+static int test_kretprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kretprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_kretprobe(&the_kretprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kretprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kretprobe(&the_kretprobe);
+	the_kretprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (kretprobe_handler_called != test_func_instance) {
+		pr_err("FAIL: kretprobe handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: kretprobe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int run_api_tests(long (*func)(long, long))
+{
+	int ret;
+
+	pr_info("    kprobe\n");
+	ret = test_kprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    jprobe\n");
+	ret = test_jprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    kretprobe\n");
+	ret = test_kretprobe(func);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+
+/*
+ * Benchmarking
+ */
+
+#if BENCHMARKING
+
+static void __naked benchmark_nop(void)
+{
+	__asm__ __volatile__ (
+		"nop		\n\t"
+		"bx	lr"
+	);
+}
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define wide ".w"
+#else
+#define wide
+#endif
+
+static void __naked benchmark_pushpop1(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r3-r11,lr}  \n\t"
+		"ldmia"wide"	sp!, {r3-r11,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop2(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0-r8,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0-r8,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop3(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r4,lr}  \n\t"
+		"ldmia"wide"	sp!, {r4,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop4(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0,pc}"
+	);
+}
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+static void __naked benchmark_pushpop_thumb(void)
+{
+	__asm__ __volatile__ (
+		"push.n	{r0-r7,lr}  \n\t"
+		"pop.n	{r0-r7,pc}"
+	);
+}
+
+#endif
+
+static int __kprobes
+benchmark_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	return 0;
+}
+
+static int benchmark(void(*fn)(void))
+{
+	unsigned n, i, t, t0;
+
+	for (n = 1000; ; n *= 2) {
+		t0 = sched_clock();
+		for (i = n; i > 0; --i)
+			fn();
+		t = sched_clock() - t0;
+		if (t >= 250000000)
+			break; /* Stop once we took more than 0.25 seconds */
+	}
+	return t / n; /* Time for one iteration in nanoseconds */
+};
+
+static int kprobe_benchmark(void(*fn)(void), unsigned offset)
+{
+	struct kprobe k = {
+		.addr		= (kprobe_opcode_t *)((uintptr_t)fn + offset),
+		.pre_handler	= benchmark_pre_handler,
+	};
+
+	int ret = register_kprobe(&k);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = benchmark(fn);
+
+	unregister_kprobe(&k);
+	return ret;
+};
+
+struct benchmarks {
+	void		(*fn)(void);
+	unsigned	offset;
+	const char	*title;
+};
+
+static int run_benchmarks(void)
+{
+	int ret;
+	struct benchmarks list[] = {
+		{&benchmark_nop, 0, "nop"},
+		/*
+		 * benchmark_pushpop{1,3} will have the optimised
+		 * instruction emulation, whilst benchmark_pushpop{2,4} will
+		 * be the equivalent unoptimised instructions.
+		 */
+		{&benchmark_pushpop1, 0, "stmdb	sp!, {r3-r11,lr}"},
+		{&benchmark_pushpop1, 4, "ldmia	sp!, {r3-r11,pc}"},
+		{&benchmark_pushpop2, 0, "stmdb	sp!, {r0-r8,lr}"},
+		{&benchmark_pushpop2, 4, "ldmia	sp!, {r0-r8,pc}"},
+		{&benchmark_pushpop3, 0, "stmdb	sp!, {r4,lr}"},
+		{&benchmark_pushpop3, 4, "ldmia	sp!, {r4,pc}"},
+		{&benchmark_pushpop4, 0, "stmdb	sp!, {r0,lr}"},
+		{&benchmark_pushpop4, 4, "ldmia	sp!, {r0,pc}"},
+#ifdef CONFIG_THUMB2_KERNEL
+		{&benchmark_pushpop_thumb, 0, "push.n	{r0-r7,lr}"},
+		{&benchmark_pushpop_thumb, 2, "pop.n	{r0-r7,pc}"},
+#endif
+		{0}
+	};
+
+	struct benchmarks *b;
+	for (b = list; b->fn; ++b) {
+		ret = kprobe_benchmark(b->fn, b->offset);
+		if (ret < 0)
+			return ret;
+		pr_info("    %dns for kprobe %s\n", ret, b->title);
+	}
+
+	pr_info("\n");
+	return 0;
+}
+
+#endif /* BENCHMARKING */
+
+
+/*
+ * Decoding table self-consistency tests
+ */
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+static int table_iter(const union decode_item *table,
+			int (*fn)(const struct decode_header *, void *),
+			void *args)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	int result;
+
+	for (;;) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (type == DECODE_TYPE_END)
+			return 0;
+
+		result = fn(h, args);
+		if (result)
+			return result;
+
+		h = (struct decode_header *)
+			((uintptr_t)h + decode_struct_sizes[type]);
+
+	}
+}
+
+static int table_test_fail(const struct decode_header *h, const char* message)
+{
+
+	pr_err("FAIL: kprobes test failure \"%s\" (mask %08x, value %08x)\n",
+					message, h->mask.bits, h->value.bits);
+	return -EINVAL;
+}
+
+struct table_test_args {
+	const union decode_item *root_table;
+	u32			parent_mask;
+	u32			parent_value;
+};
+
+static int table_test_fn(const struct decode_header *h, void *args)
+{
+	struct table_test_args *a = (struct table_test_args *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+	if (h->value.bits & ~h->mask.bits)
+		return table_test_fail(h, "Match value has bits not in mask");
+
+	if ((h->mask.bits & a->parent_mask) != a->parent_mask)
+		return table_test_fail(h, "Mask has bits not in parent mask");
+
+	if ((h->value.bits ^ a->parent_value) & a->parent_mask)
+		return table_test_fail(h, "Value is inconsistent with parent");
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		struct table_test_args args2 = *a;
+		args2.parent_mask = h->mask.bits;
+		args2.parent_value = h->value.bits;
+		return table_iter(d->table.table, table_test_fn, &args2);
+	}
+
+	return 0;
+}
+
+static int table_test(const union decode_item *table)
+{
+	struct table_test_args args = {
+		.root_table	= table,
+		.parent_mask	= 0,
+		.parent_value	= 0
+	};
+	return table_iter(args.root_table, table_test_fn, &args);
+}
+
+
+/*
+ * Decoding table test coverage analysis
+ *
+ * coverage_start() builds a coverage_table which contains a list of
+ * coverage_entry's to match each entry in the specified kprobes instruction
+ * decoding table.
+ *
+ * When test cases are run, coverage_add() is called to process each case.
+ * This looks up the corresponding entry in the coverage_table and sets it as
+ * being matched, as well as clearing the regs flag appropriate for the test.
+ *
+ * After all test cases have been run, coverage_end() is called to check that
+ * all entries in coverage_table have been matched and that all regs flags are
+ * cleared. I.e. that all possible combinations of instructions described by
+ * the kprobes decoding tables have had a test case executed for them.
+ */
+
+bool coverage_fail;
+
+#define MAX_COVERAGE_ENTRIES 256
+
+struct coverage_entry {
+	const struct decode_header	*header;
+	unsigned			regs;
+	unsigned			nesting;
+	char				matched;
+};
+
+struct coverage_table {
+	struct coverage_entry	*base;
+	unsigned		num_entries;
+	unsigned		nesting;
+};
+
+struct coverage_table coverage;
+
+#define COVERAGE_ANY_REG	(1<<0)
+#define COVERAGE_SP		(1<<1)
+#define COVERAGE_PC		(1<<2)
+#define COVERAGE_PCWB		(1<<3)
+
+static const char coverage_register_lookup[16] = {
+	[REG_TYPE_ANY]		= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_SAMEAS16]	= COVERAGE_ANY_REG,
+	[REG_TYPE_SP]		= COVERAGE_SP,
+	[REG_TYPE_PC]		= COVERAGE_PC,
+	[REG_TYPE_NOSP]		= COVERAGE_ANY_REG | COVERAGE_SP,
+	[REG_TYPE_NOSPPC]	= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_NOPC]		= COVERAGE_ANY_REG | COVERAGE_PC,
+	[REG_TYPE_NOPCWB]	= COVERAGE_ANY_REG | COVERAGE_PC | COVERAGE_PCWB,
+	[REG_TYPE_NOPCX]	= COVERAGE_ANY_REG,
+	[REG_TYPE_NOSPPCX]	= COVERAGE_ANY_REG | COVERAGE_SP,
+};
+
+unsigned coverage_start_registers(const struct decode_header *h)
+{
+	unsigned regs = 0;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		int r = (h->type_regs.bits >> (DECODE_TYPE_BITS + i)) & 0xf;
+		regs |= coverage_register_lookup[r] << i;
+	}
+	return regs;
+}
+
+static int coverage_start_fn(const struct decode_header *h, void *args)
+{
+	struct coverage_table *coverage = (struct coverage_table *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+	struct coverage_entry *entry = coverage->base + coverage->num_entries;
+
+	if (coverage->num_entries == MAX_COVERAGE_ENTRIES - 1) {
+		pr_err("FAIL: Out of space for test coverage data");
+		return -ENOMEM;
+	}
+
+	++coverage->num_entries;
+
+	entry->header = h;
+	entry->regs = coverage_start_registers(h);
+	entry->nesting = coverage->nesting;
+	entry->matched = false;
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		int ret;
+		++coverage->nesting;
+		ret = table_iter(d->table.table, coverage_start_fn, coverage);
+		--coverage->nesting;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int coverage_start(const union decode_item *table)
+{
+	coverage.base = kmalloc(MAX_COVERAGE_ENTRIES *
+				sizeof(struct coverage_entry), GFP_KERNEL);
+	coverage.num_entries = 0;
+	coverage.nesting = 0;
+	return table_iter(table, coverage_start_fn, &coverage);
+}
+
+static void
+coverage_add_registers(struct coverage_entry *entry, kprobe_opcode_t insn)
+{
+	int regs = entry->header->type_regs.bits >> DECODE_TYPE_BITS;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		enum decode_reg_type reg_type = (regs >> i) & 0xf;
+		int reg = (insn >> i) & 0xf;
+		int flag;
+
+		if (!reg_type)
+			continue;
+
+		if (reg == 13)
+			flag = COVERAGE_SP;
+		else if (reg == 15)
+			flag = COVERAGE_PC;
+		else
+			flag = COVERAGE_ANY_REG;
+		entry->regs &= ~(flag << i);
+
+		switch (reg_type) {
+
+		case REG_TYPE_NONE:
+		case REG_TYPE_ANY:
+		case REG_TYPE_SAMEAS16:
+			break;
+
+		case REG_TYPE_SP:
+			if (reg != 13)
+				return;
+			break;
+
+		case REG_TYPE_PC:
+			if (reg != 15)
+				return;
+			break;
+
+		case REG_TYPE_NOSP:
+			if (reg == 13)
+				return;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			if (reg == 13 || reg == 15)
+				return;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break;
+			if (reg == 15) {
+				entry->regs &= ~(COVERAGE_PCWB << i);
+				return;
+			}
+			break;
+
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			if (reg == 15)
+				return;
+			break;
+		}
+
+	}
+}
+
+static void coverage_add(kprobe_opcode_t insn)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+	bool matched = false;
+	unsigned nesting = 0;
+
+	for (; entry < end; ++entry) {
+		const struct decode_header *h = entry->header;
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (entry->nesting > nesting)
+			continue; /* Skip sub-table we didn't match */
+
+		if (entry->nesting < nesting)
+			break; /* End of sub-table we were scanning */
+
+		if (!matched) {
+			if ((insn & h->mask.bits) != h->value.bits)
+				continue;
+			entry->matched = true;
+		}
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE:
+			++nesting;
+			break;
+
+		case DECODE_TYPE_CUSTOM:
+		case DECODE_TYPE_SIMULATE:
+		case DECODE_TYPE_EMULATE:
+			coverage_add_registers(entry, insn);
+			return;
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return;
+		}
+
+	}
+}
+
+static void coverage_end(void)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+
+	for (; entry < end; ++entry) {
+		u32 mask = entry->header->mask.bits;
+		u32 value = entry->header->value.bits;
+
+		if (entry->regs) {
+			pr_err("FAIL: Register test coverage missing for %08x %08x (%05x)\n",
+				mask, value, entry->regs);
+			coverage_fail = true;
+		}
+		if (!entry->matched) {
+			pr_err("FAIL: Test coverage entry missing for %08x %08x\n",
+				mask, value);
+			coverage_fail = true;
+		}
+	}
+
+	kfree(coverage.base);
+}
+
+
+/*
+ * Framework for instruction set test cases
+ */
+
+void __naked __kprobes_test_case_start(void)
+{
+	__asm__ __volatile__ (
+		"stmdb	sp!, {r4-r11}				\n\t"
+		"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"bic	r0, lr, #1  @ r0 = inline title string	\n\t"
+		"mov	r1, sp					\n\t"
+		"bl	kprobes_test_case_start			\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"movne	pc, r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"mov	pc, r0					\n\t"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+void __naked __kprobes_test_case_end_16(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"bxne	r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		".arm						\n\t"
+		"orr	lr, lr, #1  @ will return to Thumb code	\n\t"
+		"ldr	pc, 1f					\n\t"
+		"1:						\n\t"
+		".word	__kprobes_test_case_end_16		\n\t"
+	);
+}
+
+#endif
+
+
+int kprobe_test_flags;
+int kprobe_test_cc_position;
+
+static int test_try_count;
+static int test_pass_count;
+static int test_fail_count;
+
+static struct pt_regs initial_regs;
+static struct pt_regs expected_regs;
+static struct pt_regs result_regs;
+
+static u32 expected_memory[TEST_MEMORY_SIZE/sizeof(u32)];
+
+static const char *current_title;
+static struct test_arg *current_args;
+static u32 *current_stack;
+static uintptr_t current_branch_target;
+
+static uintptr_t current_code_start;
+static kprobe_opcode_t current_instruction;
+
+
+#define TEST_CASE_PASSED -1
+#define TEST_CASE_FAILED -2
+
+static int test_case_run_count;
+static bool test_case_is_thumb;
+static int test_instance;
+
+/*
+ * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
+ * can change randomly as the kernel doesn't take care to preserve or initialise
+ * this across context switches. Also, with Security Extentions, the flag may
+ * not be under control of the kernel; for this reason we ignore the state of
+ * the FIQ disable flag CPSR.F as well.
+ */
+#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
+
+static unsigned long test_check_cc(int cc, unsigned long cpsr)
+{
+	unsigned long temp;
+
+	switch (cc) {
+	case 0x0: /* eq */
+		return cpsr & PSR_Z_BIT;
+
+	case 0x1: /* ne */
+		return (~cpsr) & PSR_Z_BIT;
+
+	case 0x2: /* cs */
+		return cpsr & PSR_C_BIT;
+
+	case 0x3: /* cc */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0x4: /* mi */
+		return cpsr & PSR_N_BIT;
+
+	case 0x5: /* pl */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0x6: /* vs */
+		return cpsr & PSR_V_BIT;
+
+	case 0x7: /* vc */
+		return (~cpsr) & PSR_V_BIT;
+
+	case 0x8: /* hi */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return cpsr & PSR_C_BIT;
+
+	case 0x9: /* ls */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0xa: /* ge */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0xb: /* lt */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return cpsr & PSR_N_BIT;
+
+	case 0xc: /* gt */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return (~temp) & PSR_N_BIT;
+
+	case 0xd: /* le */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return temp & PSR_N_BIT;
+
+	case 0xe: /* al */
+	case 0xf: /* unconditional */
+		return true;
+	}
+	BUG();
+	return false;
+}
+
+static int is_last_scenario;
+static int probe_should_run; /* 0 = no, 1 = yes, -1 = unknown */
+static int memory_needs_checking;
+
+static unsigned long test_context_cpsr(int scenario)
+{
+	unsigned long cpsr;
+
+	probe_should_run = 1;
+
+	/* Default case is that we cycle through 16 combinations of flags */
+	cpsr  = (scenario & 0xf) << 28; /* N,Z,C,V flags */
+	cpsr |= (scenario & 0xf) << 16; /* GE flags */
+	cpsr |= (scenario & 0x1) << 27; /* Toggle Q flag */
+
+	if (!test_case_is_thumb) {
+		/* Testing ARM code */
+		probe_should_run = test_check_cc(current_instruction >> 28, cpsr) != 0;
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_NO_ITBLOCK) {
+		/* Testing Thumb code without setting ITSTATE */
+		if (kprobe_test_cc_position) {
+			int cc = (current_instruction >> kprobe_test_cc_position) & 0xf;
+			probe_should_run = test_check_cc(cc, cpsr) != 0;
+		}
+
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_FULL_ITBLOCK) {
+		/* Testing Thumb code with all combinations of ITSTATE */
+		unsigned x = (scenario >> 4);
+		unsigned cond_base = x % 7; /* ITSTATE<7:5> */
+		unsigned mask = x / 7 + 2;  /* ITSTATE<4:0>, bits reversed */
+
+		if (mask > 0x1f) {
+			/* Finish by testing state from instruction 'itt al' */
+			cond_base = 7;
+			mask = 0x4;
+			if ((scenario & 0xf) == 0xf)
+				is_last_scenario = true;
+		}
+
+		cpsr |= cond_base << 13;	/* ITSTATE<7:5> */
+		cpsr |= (mask & 0x1) << 12;	/* ITSTATE<4> */
+		cpsr |= (mask & 0x2) << 10;	/* ITSTATE<3> */
+		cpsr |= (mask & 0x4) << 8;	/* ITSTATE<2> */
+		cpsr |= (mask & 0x8) << 23;	/* ITSTATE<1> */
+		cpsr |= (mask & 0x10) << 21;	/* ITSTATE<0> */
+
+		probe_should_run = test_check_cc((cpsr >> 12) & 0xf, cpsr) != 0;
+
+	} else {
+		/* Testing Thumb code with several combinations of ITSTATE */
+		switch (scenario) {
+		case 16: /* Clear NZCV flags and 'it eq' state (false as Z=0) */
+			cpsr = 0x00000800;
+			probe_should_run = 0;
+			break;
+		case 17: /* Set NZCV flags and 'it vc' state (false as V=1) */
+			cpsr = 0xf0007800;
+			probe_should_run = 0;
+			break;
+		case 18: /* Clear NZCV flags and 'it ls' state (true as C=0) */
+			cpsr = 0x00009800;
+			break;
+		case 19: /* Set NZCV flags and 'it cs' state (true as C=1) */
+			cpsr = 0xf0002800;
+			is_last_scenario = true;
+			break;
+		}
+	}
+
+	return cpsr;
+}
+
+static void setup_test_context(struct pt_regs *regs)
+{
+	int scenario = test_case_run_count>>1;
+	unsigned long val;
+	struct test_arg *args;
+	int i;
+
+	is_last_scenario = false;
+	memory_needs_checking = false;
+
+	/* Initialise test memory on stack */
+	val = (scenario & 1) ? VALM : ~VALM;
+	for (i = 0; i < TEST_MEMORY_SIZE / sizeof(current_stack[0]); ++i)
+		current_stack[i] = val + (i << 8);
+	/* Put target of branch on stack for tests which load PC from memory */
+	if (current_branch_target)
+		current_stack[15] = current_branch_target;
+	/* Put a value for SP on stack for tests which load SP from memory */
+	current_stack[13] = (u32)current_stack + 120;
+
+	/* Initialise register values to their default state */
+	val = (scenario & 2) ? VALR : ~VALR;
+	for (i = 0; i < 13; ++i)
+		regs->uregs[i] = val ^ (i << 8);
+	regs->ARM_lr = val ^ (14 << 8);
+	regs->ARM_cpsr &= ~(APSR_MASK | PSR_IT_MASK);
+	regs->ARM_cpsr |= test_context_cpsr(scenario);
+
+	/* Perform testcase specific register setup  */
+	args = current_args;
+	for (; args[0].type != ARG_TYPE_END; ++args)
+		switch (args[0].type) {
+		case ARG_TYPE_REG: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] = arg->val;
+			break;
+		}
+		case ARG_TYPE_PTR: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] =
+				(unsigned long)current_stack + arg->val;
+			memory_needs_checking = true;
+			break;
+		}
+		case ARG_TYPE_MEM: {
+			struct test_arg_mem *arg = (struct test_arg_mem *)args;
+			current_stack[arg->index] = arg->val;
+			break;
+		}
+		default:
+			break;
+		}
+}
+
+struct test_probe {
+	struct kprobe	kprobe;
+	bool		registered;
+	int		hit;
+};
+
+static void unregister_test_probe(struct test_probe *probe)
+{
+	if (probe->registered) {
+		unregister_kprobe(&probe->kprobe);
+		probe->kprobe.flags = 0; /* Clear disable flag to allow reuse */
+	}
+	probe->registered = false;
+}
+
+static int register_test_probe(struct test_probe *probe)
+{
+	int ret;
+
+	if (probe->registered)
+		BUG();
+
+	ret = register_kprobe(&probe->kprobe);
+	if (ret >= 0) {
+		probe->registered = true;
+		probe->hit = -1;
+	}
+	return ret;
+}
+
+static int __kprobes
+test_before_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static void __kprobes
+test_before_post_handler(struct kprobe *p, struct pt_regs *regs,
+							unsigned long flags)
+{
+	setup_test_context(regs);
+	initial_regs = *regs;
+	initial_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+}
+
+static int __kprobes
+test_case_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static int __kprobes
+test_after_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	if (container_of(p, struct test_probe, kprobe)->hit == test_instance)
+		return 0; /* Already run for this test instance */
+
+	result_regs = *regs;
+	result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+
+	/* Undo any changes done to SP by the test case */
+	regs->ARM_sp = (unsigned long)current_stack;
+
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static struct test_probe test_before_probe = {
+	.kprobe.pre_handler	= test_before_pre_handler,
+	.kprobe.post_handler	= test_before_post_handler,
+};
+
+static struct test_probe test_case_probe = {
+	.kprobe.pre_handler	= test_case_pre_handler,
+};
+
+static struct test_probe test_after_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static struct test_probe test_after2_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static void test_case_cleanup(void)
+{
+	unregister_test_probe(&test_before_probe);
+	unregister_test_probe(&test_case_probe);
+	unregister_test_probe(&test_after_probe);
+	unregister_test_probe(&test_after2_probe);
+}
+
+static void print_registers(struct pt_regs *regs)
+{
+	pr_err("r0  %08lx | r1  %08lx | r2  %08lx | r3  %08lx\n",
+		regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	pr_err("r4  %08lx | r5  %08lx | r6  %08lx | r7  %08lx\n",
+		regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	pr_err("r8  %08lx | r9  %08lx | r10 %08lx | r11 %08lx\n",
+		regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp);
+	pr_err("r12 %08lx | sp  %08lx | lr  %08lx | pc  %08lx\n",
+		regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc);
+	pr_err("cpsr %08lx\n", regs->ARM_cpsr);
+}
+
+static void print_memory(u32 *mem, size_t size)
+{
+	int i;
+	for (i = 0; i < size / sizeof(u32); i += 4)
+		pr_err("%08x %08x %08x %08x\n", mem[i], mem[i+1],
+						mem[i+2], mem[i+3]);
+}
+
+static size_t expected_memory_size(u32 *sp)
+{
+	size_t size = sizeof(expected_memory);
+	int offset = (uintptr_t)sp - (uintptr_t)current_stack;
+	if (offset > 0)
+		size -= offset;
+	return size;
+}
+
+static void test_case_failed(const char *message)
+{
+	test_case_cleanup();
+
+	pr_err("FAIL: %s\n", message);
+	pr_err("FAIL: Test %s\n", current_title);
+	pr_err("FAIL: Scenario %d\n", test_case_run_count >> 1);
+}
+
+static unsigned long next_instruction(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if ((pc & 1) && !is_wide_instruction(*(u16 *)(pc - 1)))
+		return pc + 2;
+	else
+#endif
+	return pc + 4;
+}
+
+static uintptr_t __used kprobes_test_case_start(const char *title, void *stack)
+{
+	struct test_arg *args;
+	struct test_arg_end *end_arg;
+	unsigned long test_code;
+
+	args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4);
+
+	current_title = title;
+	current_args = args;
+	current_stack = stack;
+
+	++test_try_count;
+
+	while (args->type != ARG_TYPE_END)
+		++args;
+	end_arg = (struct test_arg_end *)args;
+
+	test_code = (unsigned long)(args + 1); /* Code starts after args */
+
+	test_case_is_thumb = end_arg->flags & ARG_FLAG_THUMB;
+	if (test_case_is_thumb)
+		test_code |= 1;
+
+	current_code_start = test_code;
+
+	current_branch_target = 0;
+	if (end_arg->branch_offset != end_arg->end_offset)
+		current_branch_target = test_code + end_arg->branch_offset;
+
+	test_code += end_arg->code_offset;
+	test_before_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	test_code = next_instruction(test_code);
+	test_case_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (test_case_is_thumb) {
+		u16 *p = (u16 *)(test_code & ~1);
+		current_instruction = p[0];
+		if (is_wide_instruction(current_instruction)) {
+			current_instruction <<= 16;
+			current_instruction |= p[1];
+		}
+	} else {
+		current_instruction = *(u32 *)test_code;
+	}
+
+	if (current_title[0] == '.')
+		verbose("%s\n", current_title);
+	else
+		verbose("%s\t@ %0*x\n", current_title,
+					test_case_is_thumb ? 4 : 8,
+					current_instruction);
+
+	test_code = next_instruction(test_code);
+	test_after_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (kprobe_test_flags & TEST_FLAG_NARROW_INSTR) {
+		if (!test_case_is_thumb ||
+			is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 16-bit instruction");
+				goto fail;
+		}
+	} else {
+		if (test_case_is_thumb &&
+			!is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 32-bit instruction");
+				goto fail;
+		}
+	}
+
+	coverage_add(current_instruction);
+
+	if (end_arg->flags & ARG_FLAG_UNSUPPORTED) {
+		if (register_test_probe(&test_case_probe) < 0)
+			goto pass;
+		test_case_failed("registered probe for unsupported instruction");
+		goto fail;
+	}
+
+	if (end_arg->flags & ARG_FLAG_SUPPORTED) {
+		if (register_test_probe(&test_case_probe) >= 0)
+			goto pass;
+		test_case_failed("couldn't register probe for supported instruction");
+		goto fail;
+	}
+
+	if (register_test_probe(&test_before_probe) < 0) {
+		test_case_failed("register test_before_probe failed");
+		goto fail;
+	}
+	if (register_test_probe(&test_after_probe) < 0) {
+		test_case_failed("register test_after_probe failed");
+		goto fail;
+	}
+	if (current_branch_target) {
+		test_after2_probe.kprobe.addr =
+				(kprobe_opcode_t *)current_branch_target;
+		if (register_test_probe(&test_after2_probe) < 0) {
+			test_case_failed("register test_after2_probe failed");
+			goto fail;
+		}
+	}
+
+	/* Start first run of test case */
+	test_case_run_count = 0;
+	++test_instance;
+	return current_code_start;
+pass:
+	test_case_run_count = TEST_CASE_PASSED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+fail:
+	test_case_run_count = TEST_CASE_FAILED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+}
+
+static bool check_test_results(void)
+{
+	size_t mem_size = 0;
+	u32 *mem = 0;
+
+	if (memcmp(&expected_regs, &result_regs, sizeof(expected_regs))) {
+		test_case_failed("registers differ");
+		goto fail;
+	}
+
+	if (memory_needs_checking) {
+		mem = (u32 *)result_regs.ARM_sp;
+		mem_size = expected_memory_size(mem);
+		if (memcmp(expected_memory, mem, mem_size)) {
+			test_case_failed("test memory differs");
+			goto fail;
+		}
+	}
+
+	return true;
+
+fail:
+	pr_err("initial_regs:\n");
+	print_registers(&initial_regs);
+	pr_err("expected_regs:\n");
+	print_registers(&expected_regs);
+	pr_err("result_regs:\n");
+	print_registers(&result_regs);
+
+	if (mem) {
+		pr_err("current_stack=%p\n", current_stack);
+		pr_err("expected_memory:\n");
+		print_memory(expected_memory, mem_size);
+		pr_err("result_memory:\n");
+		print_memory(mem, mem_size);
+	}
+
+	return false;
+}
+
+static uintptr_t __used kprobes_test_case_end(void)
+{
+	if (test_case_run_count < 0) {
+		if (test_case_run_count == TEST_CASE_PASSED)
+			/* kprobes_test_case_start did all the needed testing */
+			goto pass;
+		else
+			/* kprobes_test_case_start failed */
+			goto fail;
+	}
+
+	if (test_before_probe.hit != test_instance) {
+		test_case_failed("test_before_handler not run");
+		goto fail;
+	}
+
+	if (test_after_probe.hit != test_instance &&
+				test_after2_probe.hit != test_instance) {
+		test_case_failed("test_after_handler not run");
+		goto fail;
+	}
+
+	/*
+	 * Even numbered test runs ran without a probe on the test case so
+	 * we can gather reference results. The subsequent odd numbered run
+	 * will have the probe inserted.
+	*/
+	if ((test_case_run_count & 1) == 0) {
+		/* Save results from run without probe */
+		u32 *mem = (u32 *)result_regs.ARM_sp;
+		expected_regs = result_regs;
+		memcpy(expected_memory, mem, expected_memory_size(mem));
+
+		/* Insert probe onto test case instruction */
+		if (register_test_probe(&test_case_probe) < 0) {
+			test_case_failed("register test_case_probe failed");
+			goto fail;
+		}
+	} else {
+		/* Check probe ran as expected */
+		if (probe_should_run == 1) {
+			if (test_case_probe.hit != test_instance) {
+				test_case_failed("test_case_handler not run");
+				goto fail;
+			}
+		} else if (probe_should_run == 0) {
+			if (test_case_probe.hit == test_instance) {
+				test_case_failed("test_case_handler ran");
+				goto fail;
+			}
+		}
+
+		/* Remove probe for any subsequent reference run */
+		unregister_test_probe(&test_case_probe);
+
+		if (!check_test_results())
+			goto fail;
+
+		if (is_last_scenario)
+			goto pass;
+	}
+
+	/* Do next test run */
+	++test_case_run_count;
+	++test_instance;
+	return current_code_start;
+fail:
+	++test_fail_count;
+	goto end;
+pass:
+	++test_pass_count;
+end:
+	test_case_cleanup();
+	return 0;
+}
+
+
+/*
+ * Top level test functions
+ */
+
+static int run_test_cases(void (*tests)(void), const union decode_item *table)
+{
+	int ret;
+
+	pr_info("    Check decoding tables\n");
+	ret = table_test(table);
+	if (ret)
+		return ret;
+
+	pr_info("    Run test cases\n");
+	ret = coverage_start(table);
+	if (ret)
+		return ret;
+
+	tests();
+
+	coverage_end();
+	return 0;
+}
+
+
+static int __init run_all_tests(void)
+{
+	int ret = 0;
+
+	pr_info("Begining kprobe tests...\n");
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+	pr_info("Probe ARM code\n");
+	ret = run_api_tests(arm_func);
+	if (ret)
+		goto out;
+
+	pr_info("ARM instruction simulation\n");
+	ret = run_test_cases(kprobe_arm_test_cases, kprobe_decode_arm_table);
+	if (ret)
+		goto out;
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+	pr_info("Probe 16-bit Thumb code\n");
+	ret = run_api_tests(thumb16_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, even halfword\n");
+	ret = run_api_tests(thumb32even_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, odd halfword\n");
+	ret = run_api_tests(thumb32odd_func);
+	if (ret)
+		goto out;
+
+	pr_info("16-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb16_test_cases,
+				kprobe_decode_thumb16_table);
+	if (ret)
+		goto out;
+
+	pr_info("32-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb32_test_cases,
+				kprobe_decode_thumb32_table);
+	if (ret)
+		goto out;
+#endif
+
+	pr_info("Total instruction simulation tests=%d, pass=%d fail=%d\n",
+		test_try_count, test_pass_count, test_fail_count);
+	if (test_fail_count) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+#if BENCHMARKING
+	pr_info("Benchmarks\n");
+	ret = run_benchmarks();
+	if (ret)
+		goto out;
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	/* We are able to run all test cases so coverage should be complete */
+	if (coverage_fail) {
+		pr_err("FAIL: Test coverage checks failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+#endif
+
+out:
+	if (ret == 0)
+		pr_info("Finished kprobe tests OK\n");
+	else
+		pr_err("kprobe tests failed\n");
+
+	return ret;
+}
+
+
+/*
+ * Module setup
+ */
+
+#ifdef MODULE
+
+static void __exit kprobe_test_exit(void)
+{
+}
+
+module_init(run_all_tests)
+module_exit(kprobe_test_exit)
+MODULE_LICENSE("GPL");
+
+#else /* !MODULE */
+
+late_initcall(run_all_tests);
+
+#endif
diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/kernel/kprobes-test.h
new file mode 100644
index 0000000..0dc5d77
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.h
@@ -0,0 +1,392 @@
+/*
+ * arch/arm/kernel/kprobes-test.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define VERBOSE 0 /* Set to '1' for more logging of test cases */
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define NORMAL_ISA "16"
+#else
+#define NORMAL_ISA "32"
+#endif
+
+
+/* Flags used in kprobe_test_flags */
+#define TEST_FLAG_NO_ITBLOCK	(1<<0)
+#define TEST_FLAG_FULL_ITBLOCK	(1<<1)
+#define TEST_FLAG_NARROW_INSTR	(1<<2)
+
+extern int kprobe_test_flags;
+extern int kprobe_test_cc_position;
+
+
+#define TEST_MEMORY_SIZE 256
+
+
+/*
+ * Test case structures.
+ *
+ * The arguments given to test cases can be one of three types.
+ *
+ *   ARG_TYPE_REG
+ *	Load a register with the given value.
+ *
+ *   ARG_TYPE_PTR
+ *	Load a register with a pointer into the stack buffer (SP + given value).
+ *
+ *   ARG_TYPE_MEM
+ *	Store the given value into the stack buffer at [SP+index].
+ *
+ */
+
+#define	ARG_TYPE_END	0
+#define	ARG_TYPE_REG	1
+#define	ARG_TYPE_PTR	2
+#define	ARG_TYPE_MEM	3
+
+#define ARG_FLAG_UNSUPPORTED	0x01
+#define ARG_FLAG_SUPPORTED	0x02
+#define ARG_FLAG_THUMB		0x10	/* Must be 16 so TEST_ISA can be used */
+#define ARG_FLAG_ARM		0x20	/* Must be 32 so TEST_ISA can be used */
+
+struct test_arg {
+	u8	type;		/* ARG_TYPE_x */
+	u8	_padding[7];
+};
+
+struct test_arg_regptr {
+	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR */
+	u8	reg;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_mem {
+	u8	type;		/* ARG_TYPE_MEM */
+	u8	index;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_end {
+	u8	type;		/* ARG_TYPE_END */
+	u8	flags;		/* ARG_FLAG_x */
+	u16	code_offset;
+	u16	branch_offset;
+	u16	end_offset;
+};
+
+
+/*
+ * Building blocks for test cases.
+ *
+ * Each test case is wrapped between TESTCASE_START and TESTCASE_END.
+ *
+ * To specify arguments for a test case the TEST_ARG_{REG,PTR,MEM} macros are
+ * used followed by a terminating TEST_ARG_END.
+ *
+ * After this, the instruction to be tested is defined with TEST_INSTRUCTION.
+ * Or for branches, TEST_BRANCH_B and TEST_BRANCH_F (branch forwards/backwards).
+ *
+ * Some specific test cases may make use of other custom constructs.
+ */
+
+#if VERBOSE
+#define verbose(fmt, ...) pr_info(fmt, ##__VA_ARGS__)
+#else
+#define verbose(fmt, ...)
+#endif
+
+#define TEST_GROUP(title)					\
+	verbose("\n");						\
+	verbose(title"\n");					\
+	verbose("---------------------------------------------------------\n");
+
+#define TESTCASE_START(title)					\
+	__asm__ __volatile__ (					\
+	"bl	__kprobes_test_case_start		\n\t"	\
+	/* don't use .asciz here as 'title' may be */		\
+	/* multiple strings to be concatenated.  */		\
+	".ascii "#title"				\n\t"	\
+	".byte	0					\n\t"	\
+	".align	2					\n\t"
+
+#define	TEST_ARG_REG(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_REG)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_PTR(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_PTR)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_MEM(index, val)				\
+	".byte	"__stringify(ARG_TYPE_MEM)"		\n\t"	\
+	".byte	"#index"				\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_END(flags)					\
+	".byte	"__stringify(ARG_TYPE_END)"		\n\t"	\
+	".byte	"TEST_ISA flags"			\n\t"	\
+	".short	50f-0f					\n\t"	\
+	".short	2f-0f					\n\t"	\
+	".short	99f-0f					\n\t"	\
+	".code "TEST_ISA"				\n\t"	\
+	"0:						\n\t"
+
+#define TEST_INSTRUCTION(instruction)				\
+	"50:	nop					\n\t"	\
+	"1:	"instruction"				\n\t"	\
+	"	nop					\n\t"
+
+#define TEST_BRANCH_F(instruction, xtra_dist)			\
+	TEST_INSTRUCTION(instruction)				\
+	".if "#xtra_dist"				\n\t"	\
+	"	b	99f				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"
+
+#define TEST_BRANCH_B(instruction, xtra_dist)			\
+	"	b	50f				\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"	\
+	"	b	99f				\n\t"	\
+	".if "#xtra_dist"				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	TEST_INSTRUCTION(instruction)
+
+#define TESTCASE_END						\
+	"2:						\n\t"	\
+	"99:						\n\t"	\
+	"	bl __kprobes_test_case_end_"TEST_ISA"	\n\t"	\
+	".code "NORMAL_ISA"				\n\t"	\
+	: :							\
+	: "r0", "r1", "r2", "r3", "ip", "lr", "memory", "cc"	\
+	);
+
+
+/*
+ * Macros to define test cases.
+ *
+ * Those of the form TEST_{R,P,M}* can be used to define test cases
+ * which take combinations of the three basic types of arguments. E.g.
+ *
+ *   TEST_R	One register argument
+ *   TEST_RR	Two register arguments
+ *   TEST_RPR	A register, a pointer, then a register argument
+ *
+ * For testing instructions which may branch, there are macros TEST_BF_*
+ * and TEST_BB_* for branching forwards and backwards.
+ *
+ * TEST_SUPPORTED and TEST_UNSUPPORTED don't cause the code to be executed,
+ * the just verify that a kprobe is or is not allowed on the given instruction.
+ */
+
+#define TEST(code)				\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	TESTCASE_END
+
+#define TEST_UNSUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_UNSUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_SUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_SUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_R(code1, reg, val, code2)			\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 #reg code2)		\
+	TESTCASE_END
+
+#define TEST_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4, reg4, val4)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_REG(reg4, val4)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)	\
+	TESTCASE_END
+
+#define TEST_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code1 #reg1 code2)	\
+	TESTCASE_END
+
+#define TEST_PR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_PTR(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RP(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_PTR(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_PRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_PTR(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RPR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_PTR(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRP(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_PTR(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_BF_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code1 #reg1 code2, 0)	\
+	TESTCASE_END
+
+#define TEST_BF_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BB_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_B(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BF_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_F(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BB_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_B(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BF(code)	TEST_BF_X(code, 0)
+#define TEST_BB(code)	TEST_BB_X(code, 0)
+
+#define TEST_BF_R(code1, reg, val, code2) TEST_BF_RX(code1, reg, val, code2, 0)
+#define TEST_BB_R(code1, reg, val, code2) TEST_BB_RX(code1, reg, val, code2, 0)
+
+#define TEST_BF_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_BRANCH_F(code1 #reg1 code2 #reg2 code3, 0)		\
+	TESTCASE_END
+
+#define TEST_X(code, codex)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	"	b	99f		\n\t"	\
+	"	"codex"			\n\t"	\
+	TESTCASE_END
+
+#define TEST_RX(code1, reg, val, code2, codex)		\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 __stringify(reg) code2)	\
+	"	b	99f		\n\t"		\
+	"	"codex"			\n\t"		\
+	TESTCASE_END
+
+#define TEST_RRX(code1, reg1, val1, code2, reg2, val2, code3, codex)		\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)				\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 __stringify(reg1) code2 __stringify(reg2) code3)	\
+	"	b	99f		\n\t"					\
+	"	"codex"			\n\t"					\
+	TESTCASE_END
+
+
+/* Various values used in test cases... */
+#define N(val)	(val ^ 0xffffffff)
+#define VAL1	0x12345678
+#define VAL2	N(VAL1)
+#define VAL3	0xa5f801
+#define VAL4	N(VAL3)
+#define VALM	0x456789ab
+#define VALR	0xdeaddead
+#define HH1	0x0123fecb
+#define HH2	0xa9874567
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+void kprobe_thumb16_test_cases(void);
+void kprobe_thumb32_test_cases(void);
+#else
+void kprobe_arm_test_cases(void);
+#endif
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 902ca59..8f96ec7 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -943,6 +944,9 @@
 	 */
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb32_table);
+#endif
 
 static void __kprobes
 t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
@@ -1423,6 +1427,9 @@
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb16_table);
+#endif
 
 static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
 {
diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/kernel/kprobes.h
index a6aeda0..38945f7 100644
--- a/arch/arm/kernel/kprobes.h
+++ b/arch/arm/kernel/kprobes.h
@@ -413,6 +413,14 @@
 	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+extern const union decode_item kprobe_decode_thumb16_table[];
+extern const union decode_item kprobe_decode_thumb32_table[];
+#else
+extern const union decode_item kprobe_decode_arm_table[];
+#endif
+
+
 int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 			const union decode_item *table, bool thumb16);
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 53c9c26..e6e5d7c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -26,16 +27,8 @@
 #include <asm/pmu.h>
 #include <asm/stacktrace.h>
 
-static struct platform_device *pmu_device;
-
 /*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
-static DEFINE_RAW_SPINLOCK(pmu_lock);
-
-/*
- * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
+ * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
  * another platform that supports more, we need to increase this to be the
  * largest of all platforms.
  *
@@ -43,62 +36,24 @@
  *  cycle counter CCNT + 31 events counters CNT0..30.
  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
  */
-#define ARMPMU_MAX_HWEVENTS		33
+#define ARMPMU_MAX_HWEVENTS		32
 
-/* The events for a given CPU. */
-struct cpu_hw_events {
-	/*
-	 * The events that are active on the CPU for the given index. Index 0
-	 * is reserved.
-	 */
-	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
+static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is actively being
-	 * used.
-	 */
-	unsigned long		active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
-};
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-struct arm_pmu {
-	enum arm_perf_pmu_ids id;
-	const char	*name;
-	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
-	void		(*reset)(void *);
-	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-				    [PERF_COUNT_HW_CACHE_OP_MAX]
-				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
-	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
-	u32		raw_event_mask;
-	int		num_events;
-	u64		max_period;
-};
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
 /* Set at runtime when we know what CPU type we are. */
-static const struct arm_pmu *armpmu;
+static struct arm_pmu *cpu_pmu;
 
 enum arm_perf_pmu_ids
 armpmu_get_pmu_id(void)
 {
 	int id = -ENODEV;
 
-	if (armpmu != NULL)
-		id = armpmu->id;
+	if (cpu_pmu != NULL)
+		id = cpu_pmu->id;
 
 	return id;
 }
@@ -109,8 +64,8 @@
 {
 	int max_events = 0;
 
-	if (armpmu != NULL)
-		max_events = armpmu->num_events;
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
 
 	return max_events;
 }
@@ -130,7 +85,11 @@
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
 static int
-armpmu_map_cache_event(u64 config)
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
 {
 	unsigned int cache_type, cache_op, cache_result, ret;
 
@@ -146,7 +105,7 @@
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
-	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
@@ -155,23 +114,46 @@
 }
 
 static int
-armpmu_map_event(u64 config)
+armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*armpmu->event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+	int mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
 static int
-armpmu_map_raw_event(u64 config)
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 {
-	return (int)(config & armpmu->raw_event_mask);
+	return (int)(config & raw_event_mask);
 }
 
-static int
+static int map_cpu_event(struct perf_event *event,
+			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+			 const unsigned (*cache_map)
+					[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX],
+			 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int
 armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -202,11 +184,12 @@
 	return ret;
 }
 
-static u64
+u64
 armpmu_event_update(struct perf_event *event,
 		    struct hw_perf_event *hwc,
 		    int idx, int overflow)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
@@ -246,11 +229,9 @@
 static void
 armpmu_stop(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -266,11 +247,9 @@
 static void
 armpmu_start(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -293,16 +272,16 @@
 static void
 armpmu_del(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	WARN_ON(idx < 0);
 
-	clear_bit(idx, cpuc->active_mask);
 	armpmu_stop(event, PERF_EF_UPDATE);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
 
 	perf_event_update_userpage(event);
 }
@@ -310,7 +289,8 @@
 static int
 armpmu_add(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -318,7 +298,7 @@
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(cpuc, hwc);
+	idx = armpmu->get_event_idx(hw_events, hwc);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -330,8 +310,7 @@
 	 */
 	event->hw.idx = idx;
 	armpmu->disable(hwc, idx);
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	if (flags & PERF_EF_START)
@@ -345,25 +324,25 @@
 	return err;
 }
 
-static struct pmu pmu;
-
 static int
-validate_event(struct cpu_hw_events *cpuc,
+validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event fake_event = event->hw;
+	struct pmu *leader_pmu = event->group_leader->pmu;
 
-	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
 static int
 validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct pmu_hw_events fake_pmu;
 
 	memset(&fake_pmu, 0, sizeof(fake_pmu));
 
@@ -383,110 +362,119 @@
 
 static irqreturn_t armpmu_platform_irq(int irq, void *dev)
 {
-	struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);
+	struct arm_pmu *armpmu = (struct arm_pmu *) dev;
+	struct platform_device *plat_device = armpmu->plat_device;
+	struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
 
 	return plat->handle_irq(irq, dev, armpmu->handle_irq);
 }
 
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	for (i = 0; i < irqs; ++i) {
+		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			continue;
+		irq = platform_get_irq(pmu_device, i);
+		if (irq >= 0)
+			free_irq(irq, armpmu);
+	}
+
+	release_pmu(armpmu->type);
+}
+
 static int
-armpmu_reserve_hardware(void)
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
 	struct arm_pmu_platdata *plat;
 	irq_handler_t handle_irq;
-	int i, err = -ENODEV, irq;
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
 
-	pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
-	if (IS_ERR(pmu_device)) {
+	err = reserve_pmu(armpmu->type);
+	if (err) {
 		pr_warning("unable to reserve pmu\n");
-		return PTR_ERR(pmu_device);
+		return err;
 	}
 
-	init_pmu(ARM_PMU_DEVICE_CPU);
-
 	plat = dev_get_platdata(&pmu_device->dev);
 	if (plat && plat->handle_irq)
 		handle_irq = armpmu_platform_irq;
 	else
 		handle_irq = armpmu->handle_irq;
 
-	if (pmu_device->num_resources < 1) {
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < pmu_device->num_resources; ++i) {
+	for (i = 0; i < irqs; ++i) {
+		err = 0;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
 
+		/*
+		 * If we have a single PMU interrupt that we can't shift,
+		 * assume that we're running on a uniprocessor machine and
+		 * continue. Otherwise, continue without this interrupt.
+		 */
+		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				    irq, i);
+			continue;
+		}
+
 		err = request_irq(irq, handle_irq,
 				  IRQF_DISABLED | IRQF_NOBALANCING,
-				  "armpmu", NULL);
+				  "arm-pmu", armpmu);
 		if (err) {
-			pr_warning("unable to request IRQ%d for ARM perf "
-				"counters\n", irq);
-			break;
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			armpmu_release_hardware(armpmu);
+			return err;
 		}
+
+		cpumask_set_cpu(i, &armpmu->active_irqs);
 	}
 
-	if (err) {
-		for (i = i - 1; i >= 0; --i) {
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, NULL);
-		}
-		release_pmu(ARM_PMU_DEVICE_CPU);
-		pmu_device = NULL;
-	}
-
-	return err;
+	return 0;
 }
 
 static void
-armpmu_release_hardware(void)
-{
-	int i, irq;
-
-	for (i = pmu_device->num_resources - 1; i >= 0; --i) {
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, NULL);
-	}
-	armpmu->stop();
-
-	release_pmu(ARM_PMU_DEVICE_CPU);
-	pmu_device = NULL;
-}
-
-static atomic_t active_events = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmu_reserve_mutex);
-
-static void
 hw_perf_event_destroy(struct perf_event *event)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
-		armpmu_release_hardware();
-		mutex_unlock(&pmu_reserve_mutex);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
 	}
 }
 
 static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
+}
+
+static int
 __hw_perf_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping, err;
 
-	/* Decode the generic type into an ARM event identifier. */
-	if (PERF_TYPE_HARDWARE == event->attr.type) {
-		mapping = armpmu_map_event(event->attr.config);
-	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
-		mapping = armpmu_map_cache_event(event->attr.config);
-	} else if (PERF_TYPE_RAW == event->attr.type) {
-		mapping = armpmu_map_raw_event(event->attr.config);
-	} else {
-		pr_debug("event type %x not supported\n", event->attr.type);
-		return -EOPNOTSUPP;
-	}
+	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
 		pr_debug("event %x:%llx not supported\n", event->attr.type,
@@ -495,34 +483,31 @@
 	}
 
 	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 * The ARM performance counters are on all of the time so if someone
-	 * has asked us for some excludes then we have to fail.
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
 	 */
-	if (event->attr.exclude_kernel || event->attr.exclude_user ||
-	    event->attr.exclude_hv || event->attr.exclude_idle) {
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
 		pr_debug("ARM performance counters do not support "
 			 "mode exclusion\n");
 		return -EPERM;
 	}
 
 	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
+	 * Store the event encoding into the config_base field.
 	 */
-	hwc->idx = -1;
-
-	/*
-	 * Store the event encoding into the config_base field. config and
-	 * event_base are unused as the only 2 things we need to know are
-	 * the event mapping and the counter to use. The counter to use is
-	 * also the indx and the config_base is the event type.
-	 */
-	hwc->config_base	    = (unsigned long)mapping;
-	hwc->config		    = 0;
-	hwc->event_base		    = 0;
+	hwc->config_base	    |= (unsigned long)mapping;
 
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
@@ -542,32 +527,23 @@
 
 static int armpmu_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		break;
-
-	default:
+	if (armpmu->map_event(event) == -ENOENT)
 		return -ENOENT;
-	}
-
-	if (!armpmu)
-		return -ENODEV;
 
 	event->destroy = hw_perf_event_destroy;
 
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmu_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			err = armpmu_reserve_hardware();
-		}
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
 
 		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmu_reserve_mutex);
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
 	}
 
 	if (err)
@@ -582,22 +558,9 @@
 
 static void armpmu_enable(struct pmu *pmu)
 {
-	/* Enable all of the perf events on hardware. */
-	int idx, enabled = 0;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	if (!armpmu)
-		return;
-
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-
-		if (!event)
-			continue;
-
-		armpmu->enable(&event->hw, idx);
-		enabled = 1;
-	}
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
 		armpmu->start();
@@ -605,20 +568,32 @@
 
 static void armpmu_disable(struct pmu *pmu)
 {
-	if (armpmu)
-		armpmu->stop();
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop();
 }
 
-static struct pmu pmu = {
-	.pmu_enable	= armpmu_enable,
-	.pmu_disable	= armpmu_disable,
-	.event_init	= armpmu_event_init,
-	.add		= armpmu_add,
-	.del		= armpmu_del,
-	.start		= armpmu_start,
-	.stop		= armpmu_stop,
-	.read		= armpmu_read,
-};
+static void __init armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+{
+	armpmu_init(armpmu);
+	return perf_pmu_register(&armpmu->pmu, name, type);
+}
 
 /* Include the PMU-specific implementations. */
 #include "perf_event_xscale.c"
@@ -630,14 +605,72 @@
  * This requires SMP to be available, so exists as a separate initcall.
  */
 static int __init
-armpmu_reset(void)
+cpu_pmu_reset(void)
 {
-	if (armpmu && armpmu->reset)
-		return on_each_cpu(armpmu->reset, NULL, 1);
+	if (cpu_pmu && cpu_pmu->reset)
+		return on_each_cpu(cpu_pmu->reset, NULL, 1);
 	return 0;
 }
-arch_initcall(armpmu_reset);
+arch_initcall(cpu_pmu_reset);
 
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static struct of_device_id armpmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a9-pmu"},
+	{.compatible = "arm,cortex-a8-pmu"},
+	{.compatible = "arm,arm1136-pmu"},
+	{.compatible = "arm,arm1176-pmu"},
+	{},
+};
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	{.name = "arm-pmu"},
+	{},
+};
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
+{
+	cpu_pmu->plat_device = pdev;
+	return 0;
+}
+
+static struct platform_driver armpmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
+	},
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&armpmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+static struct pmu_hw_events *armpmu_get_cpu_events(void)
+{
+	return &__get_cpu_var(cpu_hw_events);
+}
+
+static void __init cpu_pmu_init(struct arm_pmu *armpmu)
+{
+	int cpu;
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		events->events = per_cpu(hw_events, cpu);
+		events->used_mask = per_cpu(used_mask, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+	}
+	armpmu->get_hw_events = armpmu_get_cpu_events;
+	armpmu->type = ARM_PMU_DEVICE_CPU;
+}
+
+/*
+ * CPU PMU identification and registration.
+ */
 static int __init
 init_hw_perf_events(void)
 {
@@ -651,22 +684,22 @@
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			armpmu = armv6pmu_init();
+			cpu_pmu = armv6pmu_init();
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			armpmu = armv6mpcore_pmu_init();
+			cpu_pmu = armv6mpcore_pmu_init();
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			armpmu = armv7_a8_pmu_init();
+			cpu_pmu = armv7_a8_pmu_init();
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			armpmu = armv7_a9_pmu_init();
+			cpu_pmu = armv7_a9_pmu_init();
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			armpmu = armv7_a5_pmu_init();
+			cpu_pmu = armv7_a5_pmu_init();
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			armpmu = armv7_a15_pmu_init();
+			cpu_pmu = armv7_a15_pmu_init();
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -674,23 +707,23 @@
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			armpmu = xscale1pmu_init();
+			cpu_pmu = xscale1pmu_init();
 			break;
 		case 2:
-			armpmu = xscale2pmu_init();
+			cpu_pmu = xscale2pmu_init();
 			break;
 		}
 	}
 
-	if (armpmu) {
+	if (cpu_pmu) {
 		pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
+			cpu_pmu->name, cpu_pmu->num_events);
+		cpu_pmu_init(cpu_pmu);
+		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-
 	return 0;
 }
 early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index dd7f3b9..e63d811 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -54,7 +54,7 @@
 };
 
 enum armv6_counters {
-	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_CYCLE_COUNTER = 0,
 	ARMV6_COUNTER0,
 	ARMV6_COUNTER1,
 };
@@ -433,6 +433,7 @@
 		      int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -454,12 +455,29 @@
 	 * Mask out the current event and set the counter to count the event
 	 * that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int counter_is_active(unsigned long pmcr, int idx)
+{
+	unsigned long mask = 0;
+	if (idx == ARMV6_CYCLE_COUNTER)
+		mask = ARMV6_PMCR_CCOUNT_IEN;
+	else if (idx == ARMV6_COUNTER0)
+		mask = ARMV6_PMCR_COUNT0_IEN;
+	else if (idx == ARMV6_COUNTER1)
+		mask = ARMV6_PMCR_COUNT1_IEN;
+
+	if (mask)
+		return pmcr & mask;
+
+	WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+	return 0;
 }
 
 static irqreturn_t
@@ -468,7 +486,7 @@
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -487,11 +505,11 @@
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!counter_is_active(pmcr, idx))
 			continue;
 
 		/*
@@ -508,7 +526,7 @@
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -527,28 +545,30 @@
 armv6pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val |= ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 armv6pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 		       struct hw_perf_event *event)
 {
 	/* Always place a cycle counter into the cycle counter. */
@@ -578,6 +598,7 @@
 		       int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -598,12 +619,12 @@
 	 * of ETM bus signal assertion cycles. The external reporting should
 	 * be disabled and so this should never increment.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
@@ -611,6 +632,7 @@
 			      int idx)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -627,15 +649,21 @@
 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
 	 * simply disable the interrupt reporting.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static const struct arm_pmu armv6pmu = {
+static int armv6_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6_perf_map,
+				&armv6_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu armv6pmu = {
 	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -646,14 +674,12 @@
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6_perf_cache_map,
-	.event_map		= &armv6_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return &armv6pmu;
 }
@@ -665,7 +691,14 @@
  * disable the interrupt reporting and update the event. When unthrottling we
  * reset the period and enable the interrupt reporting.
  */
-static const struct arm_pmu armv6mpcore_pmu = {
+
+static int armv6mpcore_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6mpcore_perf_map,
+				&armv6mpcore_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu armv6mpcore_pmu = {
 	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -676,24 +709,22 @@
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6mpcore_perf_cache_map,
-	.event_map		= &armv6mpcore_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6mpcore_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return &armv6mpcore_pmu;
 }
 #else
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 6be3e2e..1ef6d00 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -17,6 +17,9 @@
  */
 
 #ifdef CONFIG_CPU_V7
+
+static struct arm_pmu armv7pmu;
+
 /*
  * Common ARMv7 event types
  *
@@ -676,25 +679,26 @@
 };
 
 /*
- * Perf Events counters
+ * Perf Events' indices
  */
-enum armv7_counters {
-	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
-	ARMV7_COUNTER0			= 2,	/* First event counter */
-};
+#define	ARMV7_IDX_CYCLE_COUNTER	0
+#define	ARMV7_IDX_COUNTER0	1
+#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
-/*
- * The cycle counter is ARMV7_CYCLE_COUNTER.
- * The first event counter is ARMV7_COUNTER0.
- * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
- */
-#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
+#define	ARMV7_MAX_COUNTERS	32
+#define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
 
 /*
  * ARMv7 low level PMNC access
  */
 
 /*
+ * Perf Event to low level counters mapping
+ */
+#define	ARMV7_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
+
+/*
  * Per-CPU PMNC: config reg
  */
 #define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
@@ -708,103 +712,76 @@
 #define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
 
 /*
- * Available counters
- */
-#define ARMV7_CNT0		0	/* First event counter */
-#define ARMV7_CCNT		31	/* Cycle counter */
-
-/* Perf Event to low level counters mapping */
-#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
-
-/*
- * CNTENS: counters enable reg
- */
-#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * CNTENC: counters disable reg
- */
-#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENS: counters overflow interrupt enable reg
- */
-#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENC: counters overflow interrupt disable reg
- */
-#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * EVTSEL: Event selection reg
- */
-#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
-
-/*
- * SELECT: Counter selection reg
- */
-#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
-
-/*
  * FLAG: counters overflow flag status reg
  */
-#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
 #define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
 #define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
 
-static inline unsigned long armv7_pmnc_read(void)
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
+
+/*
+ * Event filters for PMUv2
+ */
+#define	ARMV7_EXCLUDE_PL1	(1 << 31)
+#define	ARMV7_EXCLUDE_USER	(1 << 30)
+#define	ARMV7_INCLUDE_HYP	(1 << 27)
+
+static inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
 	return val;
 }
 
-static inline void armv7_pmnc_write(unsigned long val)
+static inline void armv7_pmnc_write(u32 val)
 {
 	val &= ARMV7_PMNC_MASK;
 	isb();
 	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
 }
 
-static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 {
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum armv7_counters counter)
+static inline int armv7_pmnc_counter_valid(int idx)
+{
+	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
 	int ret = 0;
+	u32 counter;
 
-	if (counter == ARMV7_CYCLE_COUNTER)
-		ret = pmnc & ARMV7_FLAG_C;
-	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
-		ret = pmnc & ARMV7_FLAG_P(counter);
-	else
+	if (!armv7_pmnc_counter_valid(idx)) {
 		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), counter);
+			smp_processor_id(), idx);
+	} else {
+		counter = ARMV7_IDX_TO_COUNTER(idx);
+		ret = pmnc & BIT(counter);
+	}
 
 	return ret;
 }
 
-static inline int armv7_pmnc_select_counter(unsigned int idx)
+static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
-		pr_err("CPU%u selecting wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u selecting wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
 	return idx;
@@ -812,124 +789,95 @@
 
 static inline u32 armv7pmu_read_counter(int idx)
 {
-	unsigned long value = 0;
+	u32 value = 0;
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mrc p15, 0, %0, c9, c13, 2"
-				     : "=r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
 
 	return value;
 }
 
 static inline void armv7pmu_write_counter(int idx, u32 value)
 {
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mcr p15, 0, %0, c9, c13, 2"
-				     : : "r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
 }
 
-static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 {
 	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTSEL_MASK;
+		val &= ARMV7_EVTYPE_MASK;
 		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
 	}
 }
 
-static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENS_C;
-	else
-		val = ARMV7_CNTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENC_C;
-	else
-		val = ARMV7_CNTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENS_C;
-	else
-		val = ARMV7_INTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENC_C;
-	else
-		val = ARMV7_INTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
@@ -973,14 +921,14 @@
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 	}
 }
 #endif
@@ -988,12 +936,13 @@
 static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1002,9 +951,10 @@
 
 	/*
 	 * Set event (if destined for PMNx counters)
-	 * We don't need to set the event if it's a cycle count
+	 * We only need to set the event for the cycle counter if we
+	 * have the ability to perform event filtering.
 	 */
-	if (idx != ARMV7_CYCLE_COUNTER)
+	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1017,17 +967,18 @@
 	 */
 	armv7_pmnc_enable_counter(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Disable counter and interrupt
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1039,14 +990,14 @@
 	 */
 	armv7_pmnc_disable_intens(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
-	unsigned long pmnc;
+	u32 pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -1069,13 +1020,10 @@
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		/*
 		 * We have a single interrupt for all counters. Check that
 		 * each counter has overflowed before we process it.
@@ -1090,7 +1038,7 @@
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -1108,61 +1056,114 @@
 static void armv7pmu_start(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_stop(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
 				  struct hw_perf_event *event)
 {
 	int idx;
+	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
-		return ARMV7_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * the events counters
-		 */
-		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
-			if (!test_and_set_bit(idx, cpuc->used_mask))
-				return idx;
-		}
-
-		/* The counters are all in use. */
-		return -EAGAIN;
+		return ARMV7_IDX_CYCLE_COUNTER;
 	}
+
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV7_EXCLUDE_USER;
+	if (attr->exclude_kernel)
+		config_base |= ARMV7_EXCLUDE_PL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV7_INCLUDE_HYP;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
 }
 
 static void armv7pmu_reset(void *info)
 {
-	u32 idx, nb_cnt = armpmu->num_events;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = 1; idx < nb_cnt; ++idx)
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
 		armv7pmu_disable_event(NULL, idx);
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
 }
 
+static int armv7_a8_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a8_perf_map,
+				&armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a9_perf_map,
+				&armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a5_perf_map,
+				&armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a15_perf_map,
+				&armv7_a15_perf_cache_map, 0xFF);
+}
+
 static struct arm_pmu armv7pmu = {
 	.handle_irq		= armv7pmu_handle_irq,
 	.enable			= armv7pmu_enable_event,
@@ -1173,7 +1174,6 @@
 	.start			= armv7pmu_start,
 	.stop			= armv7pmu_stop,
 	.reset			= armv7pmu_reset,
-	.raw_event_mask		= 0xFF,
 	.max_period		= (1LLU << 32) - 1,
 };
 
@@ -1188,62 +1188,59 @@
 	return nb_cnt + 1;
 }
 
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.cache_map	= &armv7_a5_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a5_perf_map;
+	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.cache_map	= &armv7_a15_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a15_perf_map;
+	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
 	return &armv7pmu;
 }
 #else
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 3c43974..e0cca10 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -40,7 +40,7 @@
 };
 
 enum xscale_counters {
-	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_CYCLE_COUNTER	= 0,
 	XSCALE_COUNTER0,
 	XSCALE_COUNTER1,
 	XSCALE_COUNTER2,
@@ -222,7 +222,7 @@
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -249,13 +249,10 @@
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -266,7 +263,7 @@
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -284,6 +281,7 @@
 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -305,18 +303,19 @@
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -336,16 +335,16 @@
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	if (XSCALE_PERFCTR_CCNT == event->config_base) {
@@ -368,24 +367,26 @@
 xscale1pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val |= XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -424,7 +425,13 @@
 	}
 }
 
-static const struct arm_pmu xscale1pmu = {
+static int xscale_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &xscale_perf_map,
+				&xscale_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu xscale1pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
@@ -435,14 +442,12 @@
 	.get_event_idx	= xscale1pmu_get_event_idx,
 	.start		= xscale1pmu_start,
 	.stop		= xscale1pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 3,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return &xscale1pmu;
 }
@@ -560,7 +565,7 @@
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -581,13 +586,10 @@
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -598,7 +600,7 @@
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -616,6 +618,7 @@
 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -649,16 +652,17 @@
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -692,14 +696,14 @@
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
@@ -718,24 +722,26 @@
 xscale2pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
 	val |= XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -786,7 +792,7 @@
 	}
 }
 
-static const struct arm_pmu xscale2pmu = {
+static struct arm_pmu xscale2pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
@@ -797,24 +803,22 @@
 	.get_event_idx	= xscale2pmu_get_event_idx,
 	.start		= xscale2pmu_start,
 	.stop		= xscale2pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 5,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return &xscale2pmu;
 }
 #else
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index c53474f..2c3407e 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -10,192 +10,26 @@
  *
  */
 
-#define pr_fmt(fmt) "PMU: " fmt
-
-#include <linux/cpumask.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
 
 #include <asm/pmu.h>
 
-static volatile long pmu_lock;
+/*
+ * PMU locking to ensure mutual exclusion between different subsystems.
+ */
+static unsigned long pmu_lock[BITS_TO_LONGS(ARM_NUM_PMU_DEVICES)];
 
-static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
-
-static int __devinit pmu_register(struct platform_device *pdev,
-					enum arm_pmu_type type)
-{
-	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
-		pr_warning("received registration request for unknown "
-				"PMU device type %d\n", type);
-		return -EINVAL;
-	}
-
-	if (pmu_devices[type]) {
-		pr_warning("rejecting duplicate registration of PMU device "
-			"type %d.", type);
-		return -ENOSPC;
-	}
-
-	pr_info("registered new PMU device of type %d\n", type);
-	pmu_devices[type] = pdev;
-	return 0;
-}
-
-#define OF_MATCH_PMU(_name, _type) {	\
-	.compatible = _name,		\
-	.data = (void *)_type,		\
-}
-
-#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
-
-static struct of_device_id armpmu_of_device_ids[] = {
-	OF_MATCH_CPU("arm,cortex-a9-pmu"),
-	OF_MATCH_CPU("arm,cortex-a8-pmu"),
-	OF_MATCH_CPU("arm,arm1136-pmu"),
-	OF_MATCH_CPU("arm,arm1176-pmu"),
-	{},
-};
-
-#define PLAT_MATCH_PMU(_name, _type) {	\
-	.name		= _name,	\
-	.driver_data	= _type,	\
-}
-
-#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
-
-static struct platform_device_id armpmu_plat_device_ids[] = {
-	PLAT_MATCH_CPU("arm-pmu"),
-	{},
-};
-
-enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
-{
-	const struct of_device_id	*of_id;
-	const struct platform_device_id *pdev_id;
-
-	/* provided by of_device_id table */
-	if (pdev->dev.of_node) {
-		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
-		BUG_ON(!of_id);
-		return (enum arm_pmu_type)of_id->data;
-	}
-
-	/* Provided by platform_device_id table */
-	pdev_id = platform_get_device_id(pdev);
-	BUG_ON(!pdev_id);
-	return pdev_id->driver_data;
-}
-
-static int __devinit armpmu_device_probe(struct platform_device *pdev)
-{
-	return pmu_register(pdev, armpmu_device_type(pdev));
-}
-
-static struct platform_driver armpmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
-	},
-	.probe		= armpmu_device_probe,
-	.id_table	= armpmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&armpmu_driver);
-}
-device_initcall(register_pmu_driver);
-
-struct platform_device *
+int
 reserve_pmu(enum arm_pmu_type type)
 {
-	struct platform_device *pdev;
-
-	if (test_and_set_bit_lock(type, &pmu_lock)) {
-		pdev = ERR_PTR(-EBUSY);
-	} else if (pmu_devices[type] == NULL) {
-		clear_bit_unlock(type, &pmu_lock);
-		pdev = ERR_PTR(-ENODEV);
-	} else {
-		pdev = pmu_devices[type];
-	}
-
-	return pdev;
+	return test_and_set_bit_lock(type, pmu_lock) ? -EBUSY : 0;
 }
 EXPORT_SYMBOL_GPL(reserve_pmu);
 
-int
+void
 release_pmu(enum arm_pmu_type type)
 {
-	if (WARN_ON(!pmu_devices[type]))
-		return -EINVAL;
-	clear_bit_unlock(type, &pmu_lock);
-	return 0;
+	clear_bit_unlock(type, pmu_lock);
 }
-EXPORT_SYMBOL_GPL(release_pmu);
-
-static int
-set_irq_affinity(int irq,
-		 unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	int err = irq_set_affinity(irq, cpumask_of(cpu));
-	if (err)
-		pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-			   irq, cpu);
-	return err;
-#else
-	return -EINVAL;
-#endif
-}
-
-static int
-init_cpu_pmu(void)
-{
-	int i, irqs, err = 0;
-	struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
-
-	if (!pdev)
-		return -ENODEV;
-
-	irqs = pdev->num_resources;
-
-	/*
-	 * If we have a single PMU interrupt that we can't shift, assume that
-	 * we're running on a uniprocessor machine and continue.
-	 */
-	if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
-		return 0;
-
-	for (i = 0; i < irqs; ++i) {
-		err = set_irq_affinity(platform_get_irq(pdev, i), i);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-int
-init_pmu(enum arm_pmu_type type)
-{
-	int err = 0;
-
-	switch (type) {
-	case ARM_PMU_DEVICE_CPU:
-		err = init_cpu_pmu();
-		break;
-	default:
-		pr_warning("attempt to initialise PMU of unknown "
-			   "type %d\n", type);
-		err = -EINVAL;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(init_pmu);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3fe93f7..bda0a21 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -849,25 +849,8 @@
 
 	if (__atags_pointer)
 		tags = phys_to_virt(__atags_pointer);
-	else if (mdesc->boot_params) {
-#ifdef CONFIG_MMU
-		/*
-		 * We still are executing with a minimal MMU mapping created
-		 * with the presumption that the machine default for this
-		 * is located in the first MB of RAM.  Anything else will
-		 * fault and silently hang the kernel at this point.
-		 */
-		if (mdesc->boot_params < PHYS_OFFSET ||
-		    mdesc->boot_params >= PHYS_OFFSET + SZ_1M) {
-			printk(KERN_WARNING
-			       "Default boot params at physical 0x%08lx out of reach\n",
-			       mdesc->boot_params);
-		} else
-#endif
-		{
-			tags = phys_to_virt(mdesc->boot_params);
-		}
-	}
+	else if (mdesc->atag_offset)
+		tags = (void *)(PAGE_OFFSET + mdesc->atag_offset);
 
 #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
 	/*
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index dc902f2..020e99c 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -8,92 +8,61 @@
 	.text
 
 /*
- * Save CPU state for a suspend
- *  r1 = v:p offset
- *  r2 = suspend function arg0
- *  r3 = suspend function
+ * Save CPU state for a suspend.  This saves the CPU general purpose
+ * registers, and allocates space on the kernel stack to save the CPU
+ * specific registers and some other data for resume.
+ *  r0 = suspend function arg0
+ *  r1 = suspend function
  */
 ENTRY(__cpu_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 	ldr	r10, =processor
-	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
-	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
+	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 #else
-	ldr	r5, =cpu_suspend_size
-	ldr	ip, =cpu_do_resume
+	ldr	r4, =cpu_suspend_size
 #endif
-	mov	r6, sp			@ current virtual SP
-	sub	sp, sp, r5		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer to CPU save block
-	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
-	ldr	r5, =sleep_save_sp
-	add	r6, sp, r1		@ convert SP to phys
-	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
+	mov	r5, sp			@ current virtual SP
+	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
+	sub	sp, sp, r4		@ allocate CPU state on stack
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
+	add	r0, sp, #8		@ save pointer to save block
+	mov	r1, r4			@ size of save block
+	mov	r2, r5			@ virtual SP
+	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
-	str	r6, [r5, lr, lsl #2]	@ save phys SP
-#else
-	str	r6, [r5]		@ save phys SP
+	add	r3, r3, lr, lsl #2
 #endif
-#ifdef MULTI_CPU
-	mov	lr, pc
-	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
-#else
-	bl	cpu_do_suspend
-#endif
-
-	@ flush data cache
-#ifdef MULTI_CACHE
-	ldr	r10, =cpu_cache
-	mov	lr, pc
-	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
-#else
-	bl	__cpuc_flush_kern_all
-#endif
+	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
 
 cpu_suspend_abort:
-	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	teq	r0, #0
+	moveq	r0, #1			@ force non-zero value
 	mov	sp, r2
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_suspend_abort)
 
 /*
  * r0 = control register value
- * r1 = v:p offset (preserved by cpu_do_resume)
- * r2 = phys page table base
- * r3 = L1 section flags
  */
-ENTRY(cpu_resume_mmu)
-	adr	r4, cpu_resume_turn_mmu_on
-	mov	r4, r4, lsr #20
-	orr	r3, r3, r4, lsl #20
-	ldr	r5, [r2, r4, lsl #2]	@ save old mapping
-	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
-	sub	r2, r2, r1
-	ldr	r3, =cpu_resume_after_mmu
-	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
-	b	cpu_resume_turn_mmu_on
-ENDPROC(cpu_resume_mmu)
-	.ltorg
 	.align	5
-cpu_resume_turn_mmu_on:
-	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
-	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
-	mov	r1, r1
-	mov	r1, r1
+ENTRY(cpu_resume_mmu)
+	ldr	r3, =cpu_resume_after_mmu
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
+	mov	r0, r0
+	mov	r0, r0
 	mov	pc, r3			@ jump to virtual address
-ENDPROC(cpu_resume_turn_mmu_on)
+ENDPROC(cpu_resume_mmu)
 cpu_resume_after_mmu:
-	str	r5, [r2, r4, lsl #2]	@ restore old mapping
-	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
 	bl	cpu_init		@ restore the und/abt/irq banked regs
 	mov	r0, #0			@ return zero on success
 	ldmfd	sp!, {r4 - r11, pc}
@@ -119,7 +88,7 @@
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-	@ load v:p, stack, resume fn
+	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
 THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 94f34a6..ef5640b 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -460,10 +460,6 @@
 	for (i = 0; i < NR_IPI; i++)
 		sum += __get_irq_stat(cpu, ipi_irqs[i]);
 
-#ifdef CONFIG_LOCAL_TIMERS
-	sum += __get_irq_stat(cpu, local_timer_irqs);
-#endif
-
 	return sum;
 }
 
@@ -480,38 +476,6 @@
 	irq_exit();
 }
 
-#ifdef CONFIG_LOCAL_TIMERS
-asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
-{
-	handle_local_timer(regs);
-}
-
-void handle_local_timer(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-	int cpu = smp_processor_id();
-
-	if (local_timer_ack()) {
-		__inc_irq_stat(cpu, local_timer_irqs);
-		ipi_timer();
-	}
-
-	set_irq_regs(old_regs);
-}
-
-void show_local_irqs(struct seq_file *p, int prec)
-{
-	unsigned int cpu;
-
-	seq_printf(p, "%*s: ", prec, "LOC");
-
-	for_each_present_cpu(cpu)
-		seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs));
-
-	seq_printf(p, " Local timer interrupts\n");
-}
-#endif
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static void smp_timer_broadcast(const struct cpumask *mask)
 {
@@ -562,7 +526,7 @@
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
 
-	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	local_timer_stop(evt);
 }
 #endif
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 01c1862..a8a6682 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 
 #include <asm/smp_twd.h>
+#include <asm/localtimer.h>
 #include <asm/hardware/gic.h>
 
 /* set up by the platform code */
@@ -26,6 +27,8 @@
 
 static unsigned long twd_timer_rate;
 
+static struct clock_event_device __percpu **twd_evt;
+
 static void twd_set_mode(enum clock_event_mode mode,
 			struct clock_event_device *clk)
 {
@@ -80,6 +83,12 @@
 	return 0;
 }
 
+void twd_timer_stop(struct clock_event_device *clk)
+{
+	twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk);
+	disable_percpu_irq(clk->irq);
+}
+
 static void __cpuinit twd_calibrate_rate(void)
 {
 	unsigned long count;
@@ -119,11 +128,43 @@
 	}
 }
 
+static irqreturn_t twd_handler(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
+
+	if (twd_timer_ack()) {
+		evt->event_handler(evt);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
 /*
  * Setup the local clock events for a CPU.
  */
 void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
+	struct clock_event_device **this_cpu_clk;
+
+	if (!twd_evt) {
+		int err;
+
+		twd_evt = alloc_percpu(struct clock_event_device *);
+		if (!twd_evt) {
+			pr_err("twd: can't allocate memory\n");
+			return;
+		}
+
+		err = request_percpu_irq(clk->irq, twd_handler,
+					 "twd", twd_evt);
+		if (err) {
+			pr_err("twd: can't register interrupt %d (%d)\n",
+			       clk->irq, err);
+			return;
+		}
+	}
+
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
@@ -137,8 +178,10 @@
 	clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
 	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
+	this_cpu_clk = __this_cpu_ptr(twd_evt);
+	*this_cpu_clk = clk;
+
 	clockevents_register_device(clk);
 
-	/* Make sure our local interrupt controller has this enabled */
-	gic_enable_ppi(clk->irq);
+	enable_percpu_irq(clk->irq, 0);
 }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
new file mode 100644
index 0000000..93a22d2
--- /dev/null
+++ b/arch/arm/kernel/suspend.c
@@ -0,0 +1,72 @@
+#include <linux/init.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+static pgd_t *suspend_pgd;
+
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
+extern void cpu_resume_mmu(void);
+
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ */
+void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
+{
+	*save_ptr = virt_to_phys(ptr);
+
+	/* This must correspond to the LDM in cpu_resume() assembly */
+	*ptr++ = virt_to_phys(suspend_pgd);
+	*ptr++ = sp;
+	*ptr++ = virt_to_phys(cpu_do_resume);
+
+	cpu_do_suspend(ptr);
+
+	flush_cache_all();
+	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
+	outer_clean_range(virt_to_phys(save_ptr),
+			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
+}
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!suspend_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+
+static int __init cpu_suspend_init(void)
+{
+	suspend_pgd = pgd_alloc(&init_mm);
+	if (suspend_pgd) {
+		unsigned long addr = virt_to_phys(cpu_resume_mmu);
+		identity_mapping_add(suspend_pgd, addr, addr + SECTION_SIZE);
+	}
+	return suspend_pgd ? 0 : -ENOMEM;
+}
+core_initcall(cpu_suspend_init);
diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c
index dba0d8d..f87f504 100644
--- a/arch/arm/mach-at91/at91cap9_devices.c
+++ b/arch/arm/mach-at91/at91cap9_devices.c
@@ -16,6 +16,7 @@
 #include <asm/mach/irq.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
@@ -23,7 +24,6 @@
 
 #include <mach/board.h>
 #include <mach/cpu.h>
-#include <mach/gpio.h>
 #include <mach/at91cap9.h>
 #include <mach/at91cap9_matrix.h>
 #include <mach/at91sam9_smc.h>
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index 7227755..978be95 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -14,11 +14,11 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200.h>
 #include <mach/at91rm9200_mc.h>
 
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index 39f81f4..3c2b580 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -13,11 +13,11 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 #include <mach/at91sam9260.h>
 #include <mach/at91sam9260_matrix.h>
@@ -319,7 +319,7 @@
 	if (!data)
 		return;
 
-	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+	for (i = 0; i < ATMCI_MAX_NR_SLOTS; i++) {
 		if (data->slot[i].bus_width) {
 			/* input/irq */
 			if (data->slot[i].detect_pin) {
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 0f91792..4e647b6 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -14,6 +14,7 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
@@ -21,7 +22,6 @@
 #include <video/atmel_lcdc.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9261.h>
 #include <mach/at91sam9261_matrix.h>
 #include <mach/at91sam9_smc.h>
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index a050f41..dd7662b 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -13,6 +13,7 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
@@ -20,7 +21,6 @@
 #include <video/atmel_lcdc.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9263.h>
 #include <mach/at91sam9263_matrix.h>
 #include <mach/at91sam9_smc.h>
diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c
index e04c5fb..1532b50 100644
--- a/arch/arm/mach-at91/at91sam9g45.c
+++ b/arch/arm/mach-at91/at91sam9g45.c
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/pm.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/irq.h>
 #include <asm/mach/arch.h>
@@ -319,6 +320,7 @@
 static void __init at91sam9g45_map_io(void)
 {
 	at91_init_sram(0, AT91SAM9G45_SRAM_BASE, AT91SAM9G45_SRAM_SIZE);
+	init_consistent_dma_size(SZ_4M);
 }
 
 static void __init at91sam9g45_initialize(void)
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 600bffb..c3dfb1b 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -13,6 +13,7 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 #include <linux/atmel-mci.h>
@@ -21,7 +22,6 @@
 #include <video/atmel_lcdc.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9g45.h>
 #include <mach/at91sam9g45_matrix.h>
 #include <mach/at91sam9_smc.h>
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index aacb19d..305a851 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -10,6 +10,7 @@
 #include <asm/mach/map.h>
 
 #include <linux/dma-mapping.h>
+#include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
@@ -17,7 +18,6 @@
 #include <video/atmel_lcdc.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9rl.h>
 #include <mach/at91sam9rl_matrix.h>
 #include <mach/at91sam9_smc.h>
diff --git a/arch/arm/mach-at91/board-1arm.c b/arch/arm/mach-at91/board-1arm.c
index 5aa5885..367d5cd 100644
--- a/arch/arm/mach-at91/board-1arm.c
+++ b/arch/arm/mach-at91/board-1arm.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -34,7 +35,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index b0c796d..0487ea1 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -43,7 +44,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index d1abd58..747b2ea 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -38,7 +39,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 
 #include "sam9_smc.h"
diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c
index 679b0b7..0626703 100644
--- a/arch/arm/mach-at91/board-cap9adk.c
+++ b/arch/arm/mach-at91/board-cap9adk.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -41,7 +42,6 @@
 #include <asm/mach/map.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91cap9_matrix.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/system_rev.h>
diff --git a/arch/arm/mach-at91/board-carmeva.c b/arch/arm/mach-at91/board-carmeva.c
index c578c5d..774c87f 100644
--- a/arch/arm/mach-at91/board-carmeva.c
+++ b/arch/arm/mach-at91/board-carmeva.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -35,7 +36,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/board-cpu9krea.c b/arch/arm/mach-at91/board-cpu9krea.c
index f4da8a1..fc885a4 100644
--- a/arch/arm/mach-at91/board-cpu9krea.c
+++ b/arch/arm/mach-at91/board-cpu9krea.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -40,7 +41,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91sam9260_matrix.h>
 
diff --git a/arch/arm/mach-at91/board-cpuat91.c b/arch/arm/mach-at91/board-cpuat91.c
index 2d919f5..d35e65b 100644
--- a/arch/arm/mach-at91/board-cpuat91.c
+++ b/arch/arm/mach-at91/board-cpuat91.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -36,7 +37,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200_mc.h>
 #include <mach/cpu.h>
 
diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c
index 17654d5..c393666 100644
--- a/arch/arm/mach-at91/board-csb337.c
+++ b/arch/arm/mach-at91/board-csb337.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -38,7 +39,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/board-csb637.c b/arch/arm/mach-at91/board-csb637.c
index 72b5567..586100e 100644
--- a/arch/arm/mach-at91/board-csb637.c
+++ b/arch/arm/mach-at91/board-csb637.c
@@ -20,6 +20,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/gpio.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -35,7 +36,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c
index 01170a2..45db7a3 100644
--- a/arch/arm/mach-at91/board-eb9200.c
+++ b/arch/arm/mach-at91/board-eb9200.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/board-ecbat91.c b/arch/arm/mach-at91/board-ecbat91.c
index 7c0313c..2f9c16d2 100644
--- a/arch/arm/mach-at91/board-ecbat91.c
+++ b/arch/arm/mach-at91/board-ecbat91.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-kafa.c b/arch/arm/mach-at91/board-kafa.c
index 4a17089..3bae73e 100644
--- a/arch/arm/mach-at91/board-kafa.c
+++ b/arch/arm/mach-at91/board-kafa.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -34,7 +35,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-kb9202.c b/arch/arm/mach-at91/board-kb9202.c
index 9dc8d49..15a3f1a 100644
--- a/arch/arm/mach-at91/board-kb9202.c
+++ b/arch/arm/mach-at91/board-kb9202.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 #include <mach/at91rm9200_mc.h>
 
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index 9bc6ab3..6094496 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -44,7 +45,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 
 #include "sam9_smc.h"
diff --git a/arch/arm/mach-at91/board-picotux200.c b/arch/arm/mach-at91/board-picotux200.c
index b7b8390..0a8fe6a 100644
--- a/arch/arm/mach-at91/board-picotux200.c
+++ b/arch/arm/mach-at91/board-picotux200.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200_mc.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 81f9110..938cc39 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -40,7 +41,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 
diff --git a/arch/arm/mach-at91/board-rm9200dk.c b/arch/arm/mach-at91/board-rm9200dk.c
index 6f08faa..b4ac30e 100644
--- a/arch/arm/mach-at91/board-rm9200dk.c
+++ b/arch/arm/mach-at91/board-rm9200dk.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -39,7 +40,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200_mc.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-rm9200ek.c b/arch/arm/mach-at91/board-rm9200ek.c
index 85bcccd..99fd7f8 100644
--- a/arch/arm/mach-at91/board-rm9200ek.c
+++ b/arch/arm/mach-at91/board-rm9200ek.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -39,7 +40,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200_mc.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-at91/board-sam9-l9260.c b/arch/arm/mach-at91/board-sam9-l9260.c
index 4d3a02f..2a21e79 100644
--- a/arch/arm/mach-at91/board-sam9-l9260.c
+++ b/arch/arm/mach-at91/board-sam9-l9260.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 
 #include "sam9_smc.h"
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index 8a50c3e..89c8b57 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -41,7 +42,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 #include <mach/system_rev.h>
diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c
index 5096a0e..3741f43 100644
--- a/arch/arm/mach-at91/board-sam9261ek.c
+++ b/arch/arm/mach-at91/board-sam9261ek.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -45,7 +46,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 #include <mach/system_rev.h>
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index ea8f185..a580dd4 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -44,7 +45,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 #include <mach/system_rev.h>
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index 817f59d..8d77c2f 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -41,7 +42,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/system_rev.h>
 
diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c
index ad234cc..2d6203a 100644
--- a/arch/arm/mach-at91/board-sam9m10g45ek.c
+++ b/arch/arm/mach-at91/board-sam9m10g45ek.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -38,7 +39,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 #include <mach/system_rev.h>
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 4f14b54..39a28ef 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -30,7 +31,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 
diff --git a/arch/arm/mach-at91/board-usb-a9260.c b/arch/arm/mach-at91/board-usb-a9260.c
index 8c4c1a0..bac9b65 100644
--- a/arch/arm/mach-at91/board-usb-a9260.c
+++ b/arch/arm/mach-at91/board-usb-a9260.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -40,7 +41,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 
diff --git a/arch/arm/mach-at91/board-usb-a9263.c b/arch/arm/mach-at91/board-usb-a9263.c
index 25e7937..5bd7357 100644
--- a/arch/arm/mach-at91/board-usb-a9263.c
+++ b/arch/arm/mach-at91/board-usb-a9263.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -39,7 +40,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91sam9_smc.h>
 #include <mach/at91_shdwc.h>
 
diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c
index 95edcbd..3c288b3 100644
--- a/arch/arm/mach-at91/board-yl-9200.c
+++ b/arch/arm/mach-at91/board-yl-9200.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -43,7 +44,6 @@
 
 #include <mach/hardware.h>
 #include <mach/board.h>
-#include <mach/gpio.h>
 #include <mach/at91rm9200_mc.h>
 #include <mach/cpu.h>
 
diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c
index 4615528..224e9e2 100644
--- a/arch/arm/mach-at91/gpio.c
+++ b/arch/arm/mach-at91/gpio.c
@@ -11,6 +11,7 @@
 
 #include <linux/clk.h>
 #include <linux/errno.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/debugfs.h>
@@ -22,9 +23,6 @@
 
 #include <mach/hardware.h>
 #include <mach/at91_pio.h>
-#include <mach/gpio.h>
-
-#include <asm/gpio.h>
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-at91/include/mach/at91sam9g45.h b/arch/arm/mach-at91/include/mach/at91sam9g45.h
index 2c611b9..406bb64 100644
--- a/arch/arm/mach-at91/include/mach/at91sam9g45.h
+++ b/arch/arm/mach-at91/include/mach/at91sam9g45.h
@@ -128,8 +128,6 @@
 #define AT91SAM9G45_EHCI_BASE	0x00800000	/* USB Host controller (EHCI) */
 #define AT91SAM9G45_VDEC_BASE	0x00900000	/* Video Decoder Controller */
 
-#define CONSISTENT_DMA_SIZE	SZ_4M
-
 /*
  * DMA peripheral identifiers
  * for hardware handshaking interface
diff --git a/arch/arm/mach-at91/include/mach/debug-macro.S b/arch/arm/mach-at91/include/mach/debug-macro.S
index bc1e0b2..0ed8648 100644
--- a/arch/arm/mach-at91/include/mach/debug-macro.S
+++ b/arch/arm/mach-at91/include/mach/debug-macro.S
@@ -14,7 +14,7 @@
 #include <mach/hardware.h>
 #include <mach/at91_dbgu.h>
 
-	.macro	addruart, rp, rv
+	.macro	addruart, rp, rv, tmp
 	ldr	\rp, =(AT91_BASE_SYS + AT91_DBGU)	@ System peripherals (phys address)
 	ldr	\rv, =(AT91_VA_BASE_SYS	+ AT91_DBGU)	@ System peripherals (virt address)
 	.endm
diff --git a/arch/arm/mach-at91/include/mach/gpio.h b/arch/arm/mach-at91/include/mach/gpio.h
index 056dc66..2b9a1f5 100644
--- a/arch/arm/mach-at91/include/mach/gpio.h
+++ b/arch/arm/mach-at91/include/mach/gpio.h
@@ -214,11 +214,6 @@
  */
 
 #include <asm/errno.h>
-#include <asm-generic/gpio.h>		/* cansleep wrappers */
-
-#define gpio_get_value	__gpio_get_value
-#define gpio_set_value	__gpio_set_value
-#define gpio_cansleep	__gpio_cansleep
 
 #define gpio_to_irq(gpio) (gpio)
 #define irq_to_gpio(irq)  (irq)
diff --git a/arch/arm/mach-at91/leds.c b/arch/arm/mach-at91/leds.c
index 0415a839..8dfafe7 100644
--- a/arch/arm/mach-at91/leds.c
+++ b/arch/arm/mach-at91/leds.c
@@ -9,13 +9,13 @@
  * 2 of the License, or (at your option) any later version.
 */
 
+#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 
 #include <mach/board.h>
-#include <mach/gpio.h>
 
 
 /* ------------------------------------------------------------------------- */
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 4159eca..7046158 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -10,6 +10,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/gpio.h>
 #include <linux/suspend.h>
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
@@ -25,7 +26,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/at91_pmc.h>
-#include <mach/gpio.h>
 #include <mach/cpu.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-bcmring/include/mach/hardware.h b/arch/arm/mach-bcmring/include/mach/hardware.h
index ed78aab..6ae20a6 100644
--- a/arch/arm/mach-bcmring/include/mach/hardware.h
+++ b/arch/arm/mach-bcmring/include/mach/hardware.h
@@ -22,7 +22,6 @@
 #define __ASM_ARCH_HARDWARE_H
 
 #include <asm/sizes.h>
-#include <mach/memory.h>
 #include <cfg_global.h>
 #include <mach/csp/mm_io.h>
 
@@ -31,7 +30,7 @@
  *  *_SIZE  is the size of the region
  *  *_BASE  is the virtual address
  */
-#define RAM_START               PLAT_PHYS_OFFSET
+#define RAM_START               PHYS_OFFSET
 
 #define RAM_SIZE                (CFG_GLOBAL_RAM_SIZE-CFG_GLOBAL_RAM_SIZE_RESERVED)
 #define RAM_BASE                PAGE_OFFSET
diff --git a/arch/arm/mach-bcmring/include/mach/memory.h b/arch/arm/mach-bcmring/include/mach/memory.h
deleted file mode 100644
index 15162e4..0000000
--- a/arch/arm/mach-bcmring/include/mach/memory.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*****************************************************************************
-* Copyright 2005 - 2008 Broadcom Corporation.  All rights reserved.
-*
-* Unless you and Broadcom execute a separate written software license
-* agreement governing use of this software, this software is licensed to you
-* under the terms of the GNU General Public License version 2, available at
-* http://www.broadcom.com/licenses/GPLv2.php (the "GPL").
-*
-* Notwithstanding the above, under no circumstances may you combine this
-* software in any way with any other Broadcom software provided under a
-* license other than the GPL, without Broadcom's express prior written
-* consent.
-*****************************************************************************/
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#include <cfg_global.h>
-
-/*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-
-#define PLAT_PHYS_OFFSET CFG_GLOBAL_RAM_BASE
-
-/*
- * Maximum DMA memory allowed is 14M
- */
-#define CONSISTENT_DMA_SIZE (SZ_16M - SZ_2M)
-
-#endif
diff --git a/arch/arm/mach-bcmring/mm.c b/arch/arm/mach-bcmring/mm.c
index 0f1c37e..8616876 100644
--- a/arch/arm/mach-bcmring/mm.c
+++ b/arch/arm/mach-bcmring/mm.c
@@ -13,6 +13,7 @@
 *****************************************************************************/
 
 #include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
 #include <asm/mach/map.h>
 
 #include <mach/hardware.h>
@@ -53,4 +54,6 @@
 {
 
 	iotable_init(bcmring_io_desc, ARRAY_SIZE(bcmring_io_desc));
+	/* Maximum DMA memory allowed is 14M */
+	init_consistent_dma_size(14 << 20);
 }
diff --git a/arch/arm/mach-clps711x/autcpu12.c b/arch/arm/mach-clps711x/autcpu12.c
index 4a74b2c..0276091 100644
--- a/arch/arm/mach-clps711x/autcpu12.c
+++ b/arch/arm/mach-clps711x/autcpu12.c
@@ -64,7 +64,7 @@
 
 MACHINE_START(AUTCPU12, "autronix autcpu12")
 	/* Maintainer: Thomas Gleixner */
-	.boot_params	= 0xc0020000,
+	.atag_offset	= 0x20000,
 	.map_io		= autcpu12_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,
diff --git a/arch/arm/mach-clps711x/cdb89712.c b/arch/arm/mach-clps711x/cdb89712.c
index 5a1689d..25b3bfd 100644
--- a/arch/arm/mach-clps711x/cdb89712.c
+++ b/arch/arm/mach-clps711x/cdb89712.c
@@ -55,7 +55,7 @@
 
 MACHINE_START(CDB89712, "Cirrus-CDB89712")
 	/* Maintainer: Ray Lehtiniemi */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.map_io		= cdb89712_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,
diff --git a/arch/arm/mach-clps711x/ceiva.c b/arch/arm/mach-clps711x/ceiva.c
index 16481cf..1df9ec6 100644
--- a/arch/arm/mach-clps711x/ceiva.c
+++ b/arch/arm/mach-clps711x/ceiva.c
@@ -56,7 +56,7 @@
 
 MACHINE_START(CEIVA, "CEIVA/Polaroid Photo MAX Digital Picture Frame")
 	/* Maintainer: Rob Scott */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.map_io		= ceiva_map_io,
 	.init_irq	= clps711x_init_irq,
 	.timer		= &clps711x_timer,
diff --git a/arch/arm/mach-clps711x/clep7312.c b/arch/arm/mach-clps711x/clep7312.c
index 0a2e74f..80496c0 100644
--- a/arch/arm/mach-clps711x/clep7312.c
+++ b/arch/arm/mach-clps711x/clep7312.c
@@ -36,7 +36,7 @@
 
 MACHINE_START(CLEP7212, "Cirrus Logic 7212/7312")
 	/* Maintainer: Nobody */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x0100,
 	.fixup		= fixup_clep7312,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
diff --git a/arch/arm/mach-clps711x/edb7211-arch.c b/arch/arm/mach-clps711x/edb7211-arch.c
index 725a7a5..9721f61 100644
--- a/arch/arm/mach-clps711x/edb7211-arch.c
+++ b/arch/arm/mach-clps711x/edb7211-arch.c
@@ -56,7 +56,7 @@
 
 MACHINE_START(EDB7211, "CL-EDB7211 (EP7211 eval board)")
 	/* Maintainer: Jon McClintock */
-	.boot_params	= 0xc0020100,	/* 0xc0000000 - 0xc001ffff can be video RAM */
+	.atag_offset	= 0x20100,	/* 0xc0000000 - 0xc001ffff can be video RAM */
 	.fixup		= fixup_edb7211,
 	.map_io		= edb7211_map_io,
 	.reserve	= edb7211_reserve,
diff --git a/arch/arm/mach-clps711x/fortunet.c b/arch/arm/mach-clps711x/fortunet.c
index 1947b30..d992566 100644
--- a/arch/arm/mach-clps711x/fortunet.c
+++ b/arch/arm/mach-clps711x/fortunet.c
@@ -74,7 +74,6 @@
 
 MACHINE_START(FORTUNET, "ARM-FortuNet")
 	/* Maintainer: FortuNet Inc. */
-	.boot_params	= 0x00000000,
 	.fixup		= fortunet_fixup,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
diff --git a/arch/arm/mach-clps711x/include/mach/debug-macro.S b/arch/arm/mach-clps711x/include/mach/debug-macro.S
index 507c687..b802e8a 100644
--- a/arch/arm/mach-clps711x/include/mach/debug-macro.S
+++ b/arch/arm/mach-clps711x/include/mach/debug-macro.S
@@ -14,7 +14,7 @@
 #include <mach/hardware.h>
 #include <asm/hardware/clps7111.h>
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 #ifndef CONFIG_DEBUG_CLPS711X_UART2
 		mov	\rp, #0x0000	@ UART1
 #else
diff --git a/arch/arm/mach-clps711x/p720t.c b/arch/arm/mach-clps711x/p720t.c
index 3f796e0..6ecea95 100644
--- a/arch/arm/mach-clps711x/p720t.c
+++ b/arch/arm/mach-clps711x/p720t.c
@@ -88,7 +88,7 @@
 
 MACHINE_START(P720T, "ARM-Prospector720T")
 	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
-	.boot_params	= 0xc0000100,
+	.atag_offset	= 0x100,
 	.fixup		= fixup_p720t,
 	.map_io		= p720t_map_io,
 	.init_irq	= clps711x_init_irq,
diff --git a/arch/arm/mach-cns3xxx/cns3420vb.c b/arch/arm/mach-cns3xxx/cns3420vb.c
index 3e7d149..55f7b4b 100644
--- a/arch/arm/mach-cns3xxx/cns3420vb.c
+++ b/arch/arm/mach-cns3xxx/cns3420vb.c
@@ -197,7 +197,7 @@
 }
 
 MACHINE_START(CNS3420VB, "Cavium Networks CNS3420 Validation Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.map_io		= cns3420_map_io,
 	.init_irq	= cns3xxx_init_irq,
 	.timer		= &cns3xxx_timer,
diff --git a/arch/arm/mach-cns3xxx/include/mach/debug-macro.S b/arch/arm/mach-cns3xxx/include/mach/debug-macro.S
index 56d8286..d04c150 100644
--- a/arch/arm/mach-cns3xxx/include/mach/debug-macro.S
+++ b/arch/arm/mach-cns3xxx/include/mach/debug-macro.S
@@ -10,7 +10,7 @@
  * published by the Free Software Foundation.
  */
 
-		.macro	addruart,rp,rv
+		.macro	addruart,rp,rv,tmp
 		mov	\rp, #0x00009000
 		orr	\rv, \rp, #0xf0000000	@ virtual base
 		orr	\rp, \rp, #0x10000000
diff --git a/arch/arm/mach-cns3xxx/include/mach/memory.h b/arch/arm/mach-cns3xxx/include/mach/memory.h
deleted file mode 100644
index dc16c5c..0000000
--- a/arch/arm/mach-cns3xxx/include/mach/memory.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2003 ARM Limited
- * Copyright 2008 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MEMORY_H
-#define __MACH_MEMORY_H
-
-/*
- * Physical DRAM offset.
- */
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-
-#define __phys_to_bus(x)	((x) + PHYS_OFFSET)
-#define __bus_to_phys(x)	((x) - PHYS_OFFSET)
-
-#define __virt_to_bus(v)	__phys_to_bus(__virt_to_phys(v))
-#define __bus_to_virt(b)	__phys_to_virt(__bus_to_phys(b))
-#define __pfn_to_bus(p)		__phys_to_bus(__pfn_to_phys(p))
-#define __bus_to_pfn(b)		__phys_to_pfn(__bus_to_phys(b))
-
-#endif
diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 0b87a1c..495e313 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -5,7 +5,7 @@
 
 # Common objects
 obj-y 			:= time.o clock.o serial.o io.o psc.o \
-			   gpio.o dma.o usb.o common.o sram.o aemif.o
+			   dma.o usb.o common.o sram.o aemif.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
 
@@ -17,7 +17,6 @@
 obj-$(CONFIG_ARCH_DAVINCI_DA830)        += da830.o devices-da8xx.o
 obj-$(CONFIG_ARCH_DAVINCI_DA850)        += da850.o devices-da8xx.o
 obj-$(CONFIG_ARCH_DAVINCI_TNETV107X)    += tnetv107x.o devices-tnetv107x.o
-obj-$(CONFIG_ARCH_DAVINCI_TNETV107X)    += gpio-tnetv107x.o
 
 obj-$(CONFIG_AINTC)			+= irq.o
 obj-$(CONFIG_CP_INTC)			+= cp_intc.o
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 84fd786..26d94c0 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -676,7 +676,7 @@
 }
 
 MACHINE_START(DAVINCI_DA830_EVM, "DaVinci DA830/OMAP-L137/AM17x EVM")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= da830_evm_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 008d514..6e41cb5 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -1291,7 +1291,7 @@
 }
 
 MACHINE_START(DAVINCI_DA850_EVM, "DaVinci DA850/OMAP-L138/AM18x EVM")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= da850_evm_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 241a6bd..6556628 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -351,7 +351,7 @@
 }
 
 MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io	      = dm355_evm_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index bee284c..b307470 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -270,7 +270,7 @@
 }
 
 MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io	      = dm355_leopard_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 9818f21..04c43ab 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -612,7 +612,7 @@
 }
 
 MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM")
-	.boot_params	= (0x80000100),
+	.atag_offset	= 0x100,
 	.map_io		= dm365_evm_map_io,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 95607a1..a005e769 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -712,7 +712,7 @@
 
 MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM")
 	/* Maintainer: MontaVista Software <source@mvista.com> */
-	.boot_params  = (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset  = 0x100,
 	.map_io	      = davinci_evm_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 993a314..337c45e 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -792,7 +792,7 @@
 }
 
 MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,
@@ -801,7 +801,7 @@
 MACHINE_END
 
 MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
-	.boot_params  = (0x80000100),
+	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer        = &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index c278226..6efc84cc 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -566,7 +566,7 @@
 }
 
 MACHINE_START(MITYOMAPL138, "MityDSP-L138/MityARM-1808")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= mityomapl138_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index d60a800..38d6f64 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -272,7 +272,7 @@
 
 MACHINE_START(NEUROS_OSD2, "Neuros OSD2")
 	/* Maintainer: Neuros Technologies <neuros@groups.google.com> */
-	.boot_params	= (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		 = davinci_ntosd2_map_io,
 	.init_irq	= davinci_irq_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index 237332a..c6701e4 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -338,7 +338,7 @@
 }
 
 MACHINE_START(OMAPL138_HAWKBOARD, "AM18x/OMAP-L138 Hawkboard")
-	.boot_params	= (DA8XX_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= omapl138_hawk_map_io,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index 5f4385c..5dd4da9 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -151,7 +151,7 @@
 
 MACHINE_START(SFFSDR, "Lyrtech SFFSDR")
 	/* Maintainer: Hugo Villeneuve hugo.villeneuve@lyrtech.com */
-	.boot_params  = (DAVINCI_DDR_BASE + 0x100),
+	.atag_offset  = 0x100,
 	.map_io	      = davinci_sffsdr_map_io,
 	.init_irq     = davinci_irq_init,
 	.timer	      = &davinci_timer,
diff --git a/arch/arm/mach-davinci/board-tnetv107x-evm.c b/arch/arm/mach-davinci/board-tnetv107x-evm.c
index 7828920..90ee7b5 100644
--- a/arch/arm/mach-davinci/board-tnetv107x-evm.c
+++ b/arch/arm/mach-davinci/board-tnetv107x-evm.c
@@ -277,7 +277,7 @@
 #endif
 
 MACHINE_START(TNETV107X, "TNETV107X EVM")
-	.boot_params	= (TNETV107X_DDR_BASE + 0x100),
+	.atag_offset	= 0x100,
 	.map_io		= tnetv107x_init,
 	.init_irq	= cp_intc_init,
 	.timer		= &davinci_timer,
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index 1d25573..865ffe5 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/etherdevice.h>
 #include <linux/davinci_emac.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
@@ -86,6 +87,8 @@
 		iotable_init(davinci_soc_info.io_desc,
 				davinci_soc_info.io_desc_num);
 
+	init_consistent_dma_size(14 << 20);
+
 	/*
 	 * Normally devicemaps_init() would flush caches and tlb after
 	 * mdesc->map_io(), but we must also do it here because of the CPU
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index bd59f31..0b314bf 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -19,7 +19,7 @@
 #include <asm/proc-fns.h>
 
 #include <mach/cpuidle.h>
-#include <mach/memory.h>
+#include <mach/ddr2.h>
 
 #define DAVINCI_CPUIDLE_MAX_STATES	2
 
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 2ed2f82..a6bf5dc 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -8,6 +8,7 @@
  * is licensed "as is" without any warranty of any kind, whether express
  * or implied.
  */
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/clk.h>
 
@@ -19,7 +20,7 @@
 #include <mach/common.h>
 #include <mach/time.h>
 #include <mach/da8xx.h>
-#include <mach/gpio.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 935dbed..4aae015 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -11,6 +11,7 @@
  * is licensed "as is" without any warranty of any kind, whether express
  * or implied.
  */
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
@@ -27,7 +28,7 @@
 #include <mach/da8xx.h>
 #include <mach/cpufreq.h>
 #include <mach/pm.h>
-#include <mach/gpio.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index a3a94e9..c143f43 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -13,7 +13,6 @@
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/gpio.h>
 
 #include <linux/spi/spi.h>
 
@@ -30,6 +29,7 @@
 #include <mach/common.h>
 #include <mach/asp.h>
 #include <mach/spi.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 4604e72..679e168 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -17,7 +17,6 @@
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 
 #include <asm/mach/map.h>
@@ -34,7 +33,7 @@
 #include <mach/asp.h>
 #include <mach/keyscan.h>
 #include <mach/spi.h>
-
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 4c82c27..9a27466 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -12,7 +12,6 @@
 #include <linux/clk.h>
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
 
 #include <asm/mach/map.h>
 
@@ -26,6 +25,7 @@
 #include <mach/serial.h>
 #include <mach/common.h>
 #include <mach/asp.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 1802e71..03e5f49 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -13,7 +13,6 @@
 #include <linux/clk.h>
 #include <linux/serial_8250.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
 
 #include <asm/mach/map.h>
 
@@ -27,6 +26,7 @@
 #include <mach/serial.h>
 #include <mach/common.h>
 #include <mach/asp.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-davinci/include/mach/ddr2.h b/arch/arm/mach-davinci/include/mach/ddr2.h
new file mode 100644
index 0000000..c19e047
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/ddr2.h
@@ -0,0 +1,4 @@
+#define DDR2_SDRCR_OFFSET	0xc
+#define DDR2_SRPD_BIT		(1 << 23)
+#define DDR2_MCLKSTOPEN_BIT	(1 << 30)
+#define DDR2_LPMODEN_BIT	(1 << 31)
diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S
index f8b7ea4..cf94552 100644
--- a/arch/arm/mach-davinci/include/mach/debug-macro.S
+++ b/arch/arm/mach-davinci/include/mach/debug-macro.S
@@ -18,56 +18,50 @@
 
 #include <linux/serial_reg.h>
 
-#include <asm/memory.h>
-
 #include <mach/serial.h>
 
 #define UART_SHIFT	2
 
-#define davinci_uart_v2p(x)	((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET)
-#define davinci_uart_p2v(x)	((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET)
-
 		.pushsection .data
 davinci_uart_phys:	.word	0
 davinci_uart_virt:	.word	0
 		.popsection
 
-		.macro addruart, rp, rv
+		.macro addruart, rp, rv, tmp
 
 		/* Use davinci_uart_phys/virt if already configured */
-10:		mrc	p15, 0, \rp, c1, c0
-		tst	\rp, #1			@ MMU enabled?
-		ldreq	\rp, =davinci_uart_v2p(davinci_uart_phys)
-		ldrne	\rp, =davinci_uart_phys
-		add	\rv, \rp, #4		@ davinci_uart_virt
-		ldr	\rp, [\rp, #0]
-		ldr	\rv, [\rv, #0]
+10:		adr	\rp, 99f		@ get effective addr of 99f
+		ldr	\rv, [\rp]		@ get absolute addr of 99f
+		sub	\rv, \rv, \rp		@ offset between the two
+		ldr	\rp, [\rp, #4]		@ abs addr of omap_uart_phys
+		sub	\tmp, \rp, \rv		@ make it effective
+		ldr	\rp, [\tmp, #0]		@ davinci_uart_phys
+		ldr	\rv, [\tmp, #4]		@ davinci_uart_virt
 		cmp	\rp, #0			@ is port configured?
 		cmpne	\rv, #0
-		bne	99f			@ already configured
+		bne	100f			@ already configured
 
 		/* Check the debug UART address set in uncompress.h */
-		mrc	p15, 0, \rp, c1, c0
-		tst	\rp, #1			@ MMU enabled?
+		and	\rp, pc, #0xff000000
+		ldr	\rv, =DAVINCI_UART_INFO_OFS
+		add	\rp, \rp, \rv
 
 		/* Copy uart phys address from decompressor uart info */
-		ldreq	\rv, =davinci_uart_v2p(davinci_uart_phys)
-		ldrne	\rv, =davinci_uart_phys
-		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
-		ldr	\rp, [\rp, #0]
-		str	\rp, [\rv]
+		ldr	\rv, [\rp, #0]
+		str	\rv, [\tmp, #0]
 
 		/* Copy uart virt address from decompressor uart info */
-		ldreq	\rv, =davinci_uart_v2p(davinci_uart_virt)
-		ldrne	\rv, =davinci_uart_virt
-		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
-		ldr	\rp, [\rp, #4]
-		str	\rp, [\rv]
+		ldr	\rv, [\rp, #4]
+		str	\rv, [\tmp, #4]
 
 		b	10b
-99:
+
+		.align
+99:		.word	.
+		.word	davinci_uart_phys
+		.ltorg
+
+100:
 		.endm
 
 		.macro	senduart,rd,rx
diff --git a/arch/arm/mach-davinci/include/mach/gpio-davinci.h b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
new file mode 100644
index 0000000..1fdd1fd
--- /dev/null
+++ b/arch/arm/mach-davinci/include/mach/gpio-davinci.h
@@ -0,0 +1,91 @@
+/*
+ * TI DaVinci GPIO Support
+ *
+ * Copyright (c) 2006 David Brownell
+ * Copyright (c) 2007, MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef	__DAVINCI_DAVINCI_GPIO_H
+#define	__DAVINCI_DAVINCI_GPIO_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+#include <asm-generic/gpio.h>
+
+#include <mach/irqs.h>
+#include <mach/common.h>
+
+#define DAVINCI_GPIO_BASE 0x01C67000
+
+enum davinci_gpio_type {
+	GPIO_TYPE_DAVINCI = 0,
+	GPIO_TYPE_TNETV107X,
+};
+
+/*
+ * basic gpio routines
+ *
+ * board-specific init should be done by arch/.../.../board-XXX.c (maybe
+ * initializing banks together) rather than boot loaders; kexec() won't
+ * go through boot loaders.
+ *
+ * the gpio clock will be turned on when gpios are used, and you may also
+ * need to pay attention to PINMUX registers to be sure those pins are
+ * used as gpios, not with other peripherals.
+ *
+ * On-chip GPIOs are numbered 0..(DAVINCI_N_GPIO-1).  For documentation,
+ * and maybe for later updates, code may write GPIO(N).  These may be
+ * all 1.8V signals, all 3.3V ones, or a mix of the two.  A given chip
+ * may not support all the GPIOs in that range.
+ *
+ * GPIOs can also be on external chips, numbered after the ones built-in
+ * to the DaVinci chip.  For now, they won't be usable as IRQ sources.
+ */
+#define	GPIO(X)		(X)		/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
+
+/* Convert GPIO signal to GPIO pin number */
+#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
+
+struct davinci_gpio_controller {
+	struct gpio_chip	chip;
+	int			irq_base;
+	spinlock_t		lock;
+	void __iomem		*regs;
+	void __iomem		*set_data;
+	void __iomem		*clr_data;
+	void __iomem		*in_data;
+};
+
+/* The __gpio_to_controller() and __gpio_mask() functions inline to constants
+ * with constant parameters; or in outlined code they execute at runtime.
+ *
+ * You'd access the controller directly when reading or writing more than
+ * one gpio value at a time, and to support wired logic where the value
+ * being driven by the cpu need not match the value read back.
+ *
+ * These are NOT part of the cross-platform GPIO interface
+ */
+static inline struct davinci_gpio_controller *
+__gpio_to_controller(unsigned gpio)
+{
+	struct davinci_gpio_controller *ctlrs = davinci_soc_info.gpio_ctlrs;
+	int index = gpio / 32;
+
+	if (!ctlrs || index >= davinci_soc_info.gpio_ctlrs_num)
+		return NULL;
+
+	return ctlrs + index;
+}
+
+static inline u32 __gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
+
+#endif	/* __DAVINCI_DAVINCI_GPIO_H */
diff --git a/arch/arm/mach-davinci/include/mach/gpio.h b/arch/arm/mach-davinci/include/mach/gpio.h
index fbece12..fbaae47 100644
--- a/arch/arm/mach-davinci/include/mach/gpio.h
+++ b/arch/arm/mach-davinci/include/mach/gpio.h
@@ -13,80 +13,10 @@
 #ifndef	__DAVINCI_GPIO_H
 #define	__DAVINCI_GPIO_H
 
-#include <linux/io.h>
-#include <linux/spinlock.h>
-
 #include <asm-generic/gpio.h>
 
-#include <mach/irqs.h>
-#include <mach/common.h>
-
-#define DAVINCI_GPIO_BASE 0x01C67000
-
-enum davinci_gpio_type {
-	GPIO_TYPE_DAVINCI = 0,
-	GPIO_TYPE_TNETV107X,
-};
-
-/*
- * basic gpio routines
- *
- * board-specific init should be done by arch/.../.../board-XXX.c (maybe
- * initializing banks together) rather than boot loaders; kexec() won't
- * go through boot loaders.
- *
- * the gpio clock will be turned on when gpios are used, and you may also
- * need to pay attention to PINMUX registers to be sure those pins are
- * used as gpios, not with other peripherals.
- *
- * On-chip GPIOs are numbered 0..(DAVINCI_N_GPIO-1).  For documentation,
- * and maybe for later updates, code may write GPIO(N).  These may be
- * all 1.8V signals, all 3.3V ones, or a mix of the two.  A given chip
- * may not support all the GPIOs in that range.
- *
- * GPIOs can also be on external chips, numbered after the ones built-in
- * to the DaVinci chip.  For now, they won't be usable as IRQ sources.
- */
-#define	GPIO(X)		(X)		/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
-
-/* Convert GPIO signal to GPIO pin number */
-#define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
-
-struct davinci_gpio_controller {
-	struct gpio_chip	chip;
-	int			irq_base;
-	spinlock_t		lock;
-	void __iomem		*regs;
-	void __iomem		*set_data;
-	void __iomem		*clr_data;
-	void __iomem		*in_data;
-};
-
-/* The __gpio_to_controller() and __gpio_mask() functions inline to constants
- * with constant parameters; or in outlined code they execute at runtime.
- *
- * You'd access the controller directly when reading or writing more than
- * one gpio value at a time, and to support wired logic where the value
- * being driven by the cpu need not match the value read back.
- *
- * These are NOT part of the cross-platform GPIO interface
- */
-static inline struct davinci_gpio_controller *
-__gpio_to_controller(unsigned gpio)
-{
-	struct davinci_gpio_controller *ctlrs = davinci_soc_info.gpio_ctlrs;
-	int index = gpio / 32;
-
-	if (!ctlrs || index >= davinci_soc_info.gpio_ctlrs_num)
-		return NULL;
-
-	return ctlrs + index;
-}
-
-static inline u32 __gpio_mask(unsigned gpio)
-{
-	return 1 << (gpio % 32);
-}
+/* The inline versions use the static inlines in the driver header */
+#include "gpio-davinci.h"
 
 /*
  * The get/set/clear functions will inline when called with constant
@@ -147,11 +77,6 @@
 		return __gpio_cansleep(gpio);
 }
 
-static inline int gpio_to_irq(unsigned gpio)
-{
-	return __gpio_to_irq(gpio);
-}
-
 static inline int irq_to_gpio(unsigned irq)
 {
 	/* don't support the reverse mapping */
diff --git a/arch/arm/mach-davinci/include/mach/memory.h b/arch/arm/mach-davinci/include/mach/memory.h
deleted file mode 100644
index 7873194..0000000
--- a/arch/arm/mach-davinci/include/mach/memory.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * DaVinci memory space definitions
- *
- * Author: Kevin Hilman, MontaVista Software, Inc. <source@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-/**************************************************************************
- * Included Files
- **************************************************************************/
-#include <asm/page.h>
-#include <asm/sizes.h>
-
-/**************************************************************************
- * Definitions
- **************************************************************************/
-#define DAVINCI_DDR_BASE	0x80000000
-#define DA8XX_DDR_BASE		0xc0000000
-
-#if defined(CONFIG_ARCH_DAVINCI_DA8XX) && defined(CONFIG_ARCH_DAVINCI_DMx)
-#error Cannot enable DaVinci and DA8XX platforms concurrently
-#elif defined(CONFIG_ARCH_DAVINCI_DA8XX)
-#define PLAT_PHYS_OFFSET DA8XX_DDR_BASE
-#else
-#define PLAT_PHYS_OFFSET DAVINCI_DDR_BASE
-#endif
-
-#define DDR2_SDRCR_OFFSET	0xc
-#define DDR2_SRPD_BIT		BIT(23)
-#define DDR2_MCLKSTOPEN_BIT	BIT(30)
-#define DDR2_LPMODEN_BIT	BIT(31)
-
-/*
- * Increase size of DMA-consistent memory region
- */
-#define CONSISTENT_DMA_SIZE (14<<20)
-
-#endif /* __ASM_ARCH_MEMORY_H */
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index c9e6ce1..e347d88 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -21,8 +21,9 @@
  * macros in debug-macro.S.
  *
  * This area sits just below the page tables (see arch/arm/kernel/head.S).
+ * We define it as a relative offset from start of usable RAM.
  */
-#define DAVINCI_UART_INFO	(PLAT_PHYS_OFFSET + 0x3ff8)
+#define DAVINCI_UART_INFO_OFS	0x3ff8
 
 #define DAVINCI_UART0_BASE	(IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h
index 78d8068..9dc7cf9 100644
--- a/arch/arm/mach-davinci/include/mach/uncompress.h
+++ b/arch/arm/mach-davinci/include/mach/uncompress.h
@@ -43,7 +43,12 @@
 
 static inline void set_uart_info(u32 phys, void * __iomem virt)
 {
-	u32 *uart_info = (u32 *)(DAVINCI_UART_INFO);
+	/*
+	 * Get address of some.bss variable and round it down
+	 * a la CONFIG_AUTO_ZRELADDR.
+	 */
+	u32 ram_start = (u32)&uart & 0xf8000000;
+	u32 *uart_info = (u32 *)(ram_start + DAVINCI_UART_INFO_OFS);
 
 	uart = (u32 *)phys;
 	uart_info[0] = phys;
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
index 5f1e045..d4e9316 100644
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -22,7 +22,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <mach/psc.h>
-#include <mach/memory.h>
+#include <mach/ddr2.h>
 
 #include "clock.h"
 
diff --git a/arch/arm/mach-davinci/tnetv107x.c b/arch/arm/mach-davinci/tnetv107x.c
index 1b28fdd..409bb86 100644
--- a/arch/arm/mach-davinci/tnetv107x.c
+++ b/arch/arm/mach-davinci/tnetv107x.c
@@ -12,6 +12,7 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/clk.h>
@@ -27,9 +28,9 @@
 #include <mach/psc.h>
 #include <mach/cp_intc.h>
 #include <mach/irqs.h>
-#include <mach/gpio.h>
 #include <mach/hardware.h>
 #include <mach/tnetv107x.h>
+#include <mach/gpio-davinci.h>
 
 #include "clock.h"
 #include "mux.h"
diff --git a/arch/arm/mach-dove/cm-a510.c b/arch/arm/mach-dove/cm-a510.c
index 03e11f9..c8a406f 100644
--- a/arch/arm/mach-dove/cm-a510.c
+++ b/arch/arm/mach-dove/cm-a510.c
@@ -87,7 +87,7 @@
 }
 
 MACHINE_START(CM_A510, "Compulab CM-A510 Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.init_machine	= cm_a510_init,
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,
diff --git a/arch/arm/mach-dove/dove-db-setup.c b/arch/arm/mach-dove/dove-db-setup.c
index 2ac34ec..11ea34e 100644
--- a/arch/arm/mach-dove/dove-db-setup.c
+++ b/arch/arm/mach-dove/dove-db-setup.c
@@ -94,7 +94,7 @@
 }
 
 MACHINE_START(DOVE_DB, "Marvell DB-MV88AP510-BP Development Board")
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.init_machine	= dove_db_init,
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,
diff --git a/arch/arm/mach-dove/include/mach/debug-macro.S b/arch/arm/mach-dove/include/mach/debug-macro.S
index da8bf2b..5929cbc 100644
--- a/arch/arm/mach-dove/include/mach/debug-macro.S
+++ b/arch/arm/mach-dove/include/mach/debug-macro.S
@@ -8,7 +8,7 @@
 
 #include <mach/bridge-regs.h>
 
-	.macro	addruart, rp, rv
+	.macro	addruart, rp, rv, tmp
 	ldr	\rp, =DOVE_SB_REGS_PHYS_BASE
 	ldr	\rv, =DOVE_SB_REGS_VIRT_BASE
 	orr	\rp, \rp, #0x00012000
diff --git a/arch/arm/mach-dove/include/mach/memory.h b/arch/arm/mach-dove/include/mach/memory.h
deleted file mode 100644
index bbc93fe..0000000
--- a/arch/arm/mach-dove/include/mach/memory.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * arch/arm/mach-dove/include/mach/memory.h
- */
-
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#define PLAT_PHYS_OFFSET		UL(0x00000000)
-
-#endif
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index 087bc77..d0ce8ab 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -280,7 +280,7 @@
 
 MACHINE_START(EBSA110, "EBSA110")
 	/* Maintainer: Russell King */
-	.boot_params	= 0x00000400,
+	.atag_offset	= 0x400,
 	.reserve_lp0	= 1,
 	.reserve_lp2	= 1,
 	.soft_reboot	= 1,
diff --git a/arch/arm/mach-ebsa110/include/mach/debug-macro.S b/arch/arm/mach-ebsa110/include/mach/debug-macro.S
index 7ef5690..bb02c05 100644
--- a/arch/arm/mach-ebsa110/include/mach/debug-macro.S
+++ b/arch/arm/mach-ebsa110/include/mach/debug-macro.S
@@ -11,7 +11,7 @@
  *
 **/
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		mov	\rp, #0xf0000000
 		orr	\rp, \rp, #0x00000be0
 		mov	\rp, \rv
diff --git a/arch/arm/mach-ep93xx/adssphere.c b/arch/arm/mach-ep93xx/adssphere.c
index 61b98ce..0713448 100644
--- a/arch/arm/mach-ep93xx/adssphere.c
+++ b/arch/arm/mach-ep93xx/adssphere.c
@@ -33,7 +33,7 @@
 
 MACHINE_START(ADSSPHERE, "ADS Sphere board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index c60f081..94c78bc 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -38,6 +38,7 @@
 #include <mach/fb.h>
 #include <mach/ep93xx_keypad.h>
 #include <mach/ep93xx_spi.h>
+#include <mach/gpio-ep93xx.h>
 
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c
index 9969bb1..70ef8c5 100644
--- a/arch/arm/mach-ep93xx/edb93xx.c
+++ b/arch/arm/mach-ep93xx/edb93xx.c
@@ -37,6 +37,7 @@
 #include <mach/hardware.h>
 #include <mach/fb.h>
 #include <mach/ep93xx_spi.h>
+#include <mach/gpio-ep93xx.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -159,6 +160,11 @@
 /*************************************************************************
  * EDB93xx I2S
  *************************************************************************/
+static struct platform_device edb93xx_audio_device = {
+	.name		= "edb93xx-audio",
+	.id		= -1,
+};
+
 static int __init edb93xx_has_audio(void)
 {
 	return (machine_is_edb9301() || machine_is_edb9302() ||
@@ -170,6 +176,7 @@
 {
 	if (edb93xx_has_audio()) {
 		ep93xx_register_i2s();
+		platform_device_register(&edb93xx_audio_device);
 	}
 }
 
@@ -240,7 +247,7 @@
 #ifdef CONFIG_MACH_EDB9301
 MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board")
 	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -251,7 +258,7 @@
 #ifdef CONFIG_MACH_EDB9302
 MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board")
 	/* Maintainer: George Kashperko <george@chas.com.ua> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -262,7 +269,7 @@
 #ifdef CONFIG_MACH_EDB9302A
 MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -273,7 +280,7 @@
 #ifdef CONFIG_MACH_EDB9307
 MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board")
 	/* Maintainer: Herbert Valerio Riedel <hvr@gnu.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -284,7 +291,7 @@
 #ifdef CONFIG_MACH_EDB9307A
 MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board")
 	/* Maintainer: H Hartley Sweeten <hsweeten@visionengravers.com> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -295,7 +302,7 @@
 #ifdef CONFIG_MACH_EDB9312
 MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board")
 	/* Maintainer: Toufeeq Hussain <toufeeq_hussain@infosys.com> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -306,7 +313,7 @@
 #ifdef CONFIG_MACH_EDB9315
 MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -317,7 +324,7 @@
 #ifdef CONFIG_MACH_EDB9315A
 MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/gesbc9312.c b/arch/arm/mach-ep93xx/gesbc9312.c
index 9bd3152..45ee205 100644
--- a/arch/arm/mach-ep93xx/gesbc9312.c
+++ b/arch/arm/mach-ep93xx/gesbc9312.c
@@ -33,7 +33,7 @@
 
 MACHINE_START(GESBC9312, "Glomation GESBC-9312-sx")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/include/mach/debug-macro.S b/arch/arm/mach-ep93xx/include/mach/debug-macro.S
index b25bc907..af54e43 100644
--- a/arch/arm/mach-ep93xx/include/mach/debug-macro.S
+++ b/arch/arm/mach-ep93xx/include/mach/debug-macro.S
@@ -11,7 +11,7 @@
  */
 #include <mach/ep93xx-regs.h>
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		ldr	\rp, =EP93XX_APB_PHYS_BASE	@ Physical base
 		ldr	\rv, =EP93XX_APB_VIRT_BASE	@ virtual base
 		orr	\rp, \rp, #0x000c0000
diff --git a/arch/arm/mach-ep93xx/include/mach/gpio-ep93xx.h b/arch/arm/mach-ep93xx/include/mach/gpio-ep93xx.h
new file mode 100644
index 0000000..8aff2ea
--- /dev/null
+++ b/arch/arm/mach-ep93xx/include/mach/gpio-ep93xx.h
@@ -0,0 +1,100 @@
+/* Include file for the EP93XX GPIO controller machine specifics */
+
+#ifndef __GPIO_EP93XX_H
+#define __GPIO_EP93XX_H
+
+/* GPIO port A.  */
+#define EP93XX_GPIO_LINE_A(x)		((x) + 0)
+#define EP93XX_GPIO_LINE_EGPIO0		EP93XX_GPIO_LINE_A(0)
+#define EP93XX_GPIO_LINE_EGPIO1		EP93XX_GPIO_LINE_A(1)
+#define EP93XX_GPIO_LINE_EGPIO2		EP93XX_GPIO_LINE_A(2)
+#define EP93XX_GPIO_LINE_EGPIO3		EP93XX_GPIO_LINE_A(3)
+#define EP93XX_GPIO_LINE_EGPIO4		EP93XX_GPIO_LINE_A(4)
+#define EP93XX_GPIO_LINE_EGPIO5		EP93XX_GPIO_LINE_A(5)
+#define EP93XX_GPIO_LINE_EGPIO6		EP93XX_GPIO_LINE_A(6)
+#define EP93XX_GPIO_LINE_EGPIO7		EP93XX_GPIO_LINE_A(7)
+
+/* GPIO port B.  */
+#define EP93XX_GPIO_LINE_B(x)		((x) + 8)
+#define EP93XX_GPIO_LINE_EGPIO8		EP93XX_GPIO_LINE_B(0)
+#define EP93XX_GPIO_LINE_EGPIO9		EP93XX_GPIO_LINE_B(1)
+#define EP93XX_GPIO_LINE_EGPIO10	EP93XX_GPIO_LINE_B(2)
+#define EP93XX_GPIO_LINE_EGPIO11	EP93XX_GPIO_LINE_B(3)
+#define EP93XX_GPIO_LINE_EGPIO12	EP93XX_GPIO_LINE_B(4)
+#define EP93XX_GPIO_LINE_EGPIO13	EP93XX_GPIO_LINE_B(5)
+#define EP93XX_GPIO_LINE_EGPIO14	EP93XX_GPIO_LINE_B(6)
+#define EP93XX_GPIO_LINE_EGPIO15	EP93XX_GPIO_LINE_B(7)
+
+/* GPIO port C.  */
+#define EP93XX_GPIO_LINE_C(x)		((x) + 40)
+#define EP93XX_GPIO_LINE_ROW0		EP93XX_GPIO_LINE_C(0)
+#define EP93XX_GPIO_LINE_ROW1		EP93XX_GPIO_LINE_C(1)
+#define EP93XX_GPIO_LINE_ROW2		EP93XX_GPIO_LINE_C(2)
+#define EP93XX_GPIO_LINE_ROW3		EP93XX_GPIO_LINE_C(3)
+#define EP93XX_GPIO_LINE_ROW4		EP93XX_GPIO_LINE_C(4)
+#define EP93XX_GPIO_LINE_ROW5		EP93XX_GPIO_LINE_C(5)
+#define EP93XX_GPIO_LINE_ROW6		EP93XX_GPIO_LINE_C(6)
+#define EP93XX_GPIO_LINE_ROW7		EP93XX_GPIO_LINE_C(7)
+
+/* GPIO port D.  */
+#define EP93XX_GPIO_LINE_D(x)		((x) + 24)
+#define EP93XX_GPIO_LINE_COL0		EP93XX_GPIO_LINE_D(0)
+#define EP93XX_GPIO_LINE_COL1		EP93XX_GPIO_LINE_D(1)
+#define EP93XX_GPIO_LINE_COL2		EP93XX_GPIO_LINE_D(2)
+#define EP93XX_GPIO_LINE_COL3		EP93XX_GPIO_LINE_D(3)
+#define EP93XX_GPIO_LINE_COL4		EP93XX_GPIO_LINE_D(4)
+#define EP93XX_GPIO_LINE_COL5		EP93XX_GPIO_LINE_D(5)
+#define EP93XX_GPIO_LINE_COL6		EP93XX_GPIO_LINE_D(6)
+#define EP93XX_GPIO_LINE_COL7		EP93XX_GPIO_LINE_D(7)
+
+/* GPIO port E.  */
+#define EP93XX_GPIO_LINE_E(x)		((x) + 32)
+#define EP93XX_GPIO_LINE_GRLED		EP93XX_GPIO_LINE_E(0)
+#define EP93XX_GPIO_LINE_RDLED		EP93XX_GPIO_LINE_E(1)
+#define EP93XX_GPIO_LINE_DIORn		EP93XX_GPIO_LINE_E(2)
+#define EP93XX_GPIO_LINE_IDECS1n	EP93XX_GPIO_LINE_E(3)
+#define EP93XX_GPIO_LINE_IDECS2n	EP93XX_GPIO_LINE_E(4)
+#define EP93XX_GPIO_LINE_IDEDA0		EP93XX_GPIO_LINE_E(5)
+#define EP93XX_GPIO_LINE_IDEDA1		EP93XX_GPIO_LINE_E(6)
+#define EP93XX_GPIO_LINE_IDEDA2		EP93XX_GPIO_LINE_E(7)
+
+/* GPIO port F.  */
+#define EP93XX_GPIO_LINE_F(x)		((x) + 16)
+#define EP93XX_GPIO_LINE_WP		EP93XX_GPIO_LINE_F(0)
+#define EP93XX_GPIO_LINE_MCCD1		EP93XX_GPIO_LINE_F(1)
+#define EP93XX_GPIO_LINE_MCCD2		EP93XX_GPIO_LINE_F(2)
+#define EP93XX_GPIO_LINE_MCBVD1		EP93XX_GPIO_LINE_F(3)
+#define EP93XX_GPIO_LINE_MCBVD2		EP93XX_GPIO_LINE_F(4)
+#define EP93XX_GPIO_LINE_VS1		EP93XX_GPIO_LINE_F(5)
+#define EP93XX_GPIO_LINE_READY		EP93XX_GPIO_LINE_F(6)
+#define EP93XX_GPIO_LINE_VS2		EP93XX_GPIO_LINE_F(7)
+
+/* GPIO port G.  */
+#define EP93XX_GPIO_LINE_G(x)		((x) + 48)
+#define EP93XX_GPIO_LINE_EECLK		EP93XX_GPIO_LINE_G(0)
+#define EP93XX_GPIO_LINE_EEDAT		EP93XX_GPIO_LINE_G(1)
+#define EP93XX_GPIO_LINE_SLA0		EP93XX_GPIO_LINE_G(2)
+#define EP93XX_GPIO_LINE_SLA1		EP93XX_GPIO_LINE_G(3)
+#define EP93XX_GPIO_LINE_DD12		EP93XX_GPIO_LINE_G(4)
+#define EP93XX_GPIO_LINE_DD13		EP93XX_GPIO_LINE_G(5)
+#define EP93XX_GPIO_LINE_DD14		EP93XX_GPIO_LINE_G(6)
+#define EP93XX_GPIO_LINE_DD15		EP93XX_GPIO_LINE_G(7)
+
+/* GPIO port H.  */
+#define EP93XX_GPIO_LINE_H(x)		((x) + 56)
+#define EP93XX_GPIO_LINE_DD0		EP93XX_GPIO_LINE_H(0)
+#define EP93XX_GPIO_LINE_DD1		EP93XX_GPIO_LINE_H(1)
+#define EP93XX_GPIO_LINE_DD2		EP93XX_GPIO_LINE_H(2)
+#define EP93XX_GPIO_LINE_DD3		EP93XX_GPIO_LINE_H(3)
+#define EP93XX_GPIO_LINE_DD4		EP93XX_GPIO_LINE_H(4)
+#define EP93XX_GPIO_LINE_DD5		EP93XX_GPIO_LINE_H(5)
+#define EP93XX_GPIO_LINE_DD6		EP93XX_GPIO_LINE_H(6)
+#define EP93XX_GPIO_LINE_DD7		EP93XX_GPIO_LINE_H(7)
+
+/* maximum value for gpio line identifiers */
+#define EP93XX_GPIO_LINE_MAX		EP93XX_GPIO_LINE_H(7)
+
+/* maximum value for irq capable line identifiers */
+#define EP93XX_GPIO_LINE_MAX_IRQ	EP93XX_GPIO_LINE_F(7)
+
+#endif /* __GPIO_EP93XX_H */
diff --git a/arch/arm/mach-ep93xx/include/mach/gpio.h b/arch/arm/mach-ep93xx/include/mach/gpio.h
index c57152c..40a8c17 100644
--- a/arch/arm/mach-ep93xx/include/mach/gpio.h
+++ b/arch/arm/mach-ep93xx/include/mach/gpio.h
@@ -1,120 +1 @@
-/*
- * arch/arm/mach-ep93xx/include/mach/gpio.h
- */
-
-#ifndef __ASM_ARCH_GPIO_H
-#define __ASM_ARCH_GPIO_H
-
-/* GPIO port A.  */
-#define EP93XX_GPIO_LINE_A(x)		((x) + 0)
-#define EP93XX_GPIO_LINE_EGPIO0		EP93XX_GPIO_LINE_A(0)
-#define EP93XX_GPIO_LINE_EGPIO1		EP93XX_GPIO_LINE_A(1)
-#define EP93XX_GPIO_LINE_EGPIO2		EP93XX_GPIO_LINE_A(2)
-#define EP93XX_GPIO_LINE_EGPIO3		EP93XX_GPIO_LINE_A(3)
-#define EP93XX_GPIO_LINE_EGPIO4		EP93XX_GPIO_LINE_A(4)
-#define EP93XX_GPIO_LINE_EGPIO5		EP93XX_GPIO_LINE_A(5)
-#define EP93XX_GPIO_LINE_EGPIO6		EP93XX_GPIO_LINE_A(6)
-#define EP93XX_GPIO_LINE_EGPIO7		EP93XX_GPIO_LINE_A(7)
-
-/* GPIO port B.  */
-#define EP93XX_GPIO_LINE_B(x)		((x) + 8)
-#define EP93XX_GPIO_LINE_EGPIO8		EP93XX_GPIO_LINE_B(0)
-#define EP93XX_GPIO_LINE_EGPIO9		EP93XX_GPIO_LINE_B(1)
-#define EP93XX_GPIO_LINE_EGPIO10	EP93XX_GPIO_LINE_B(2)
-#define EP93XX_GPIO_LINE_EGPIO11	EP93XX_GPIO_LINE_B(3)
-#define EP93XX_GPIO_LINE_EGPIO12	EP93XX_GPIO_LINE_B(4)
-#define EP93XX_GPIO_LINE_EGPIO13	EP93XX_GPIO_LINE_B(5)
-#define EP93XX_GPIO_LINE_EGPIO14	EP93XX_GPIO_LINE_B(6)
-#define EP93XX_GPIO_LINE_EGPIO15	EP93XX_GPIO_LINE_B(7)
-
-/* GPIO port C.  */
-#define EP93XX_GPIO_LINE_C(x)		((x) + 40)
-#define EP93XX_GPIO_LINE_ROW0		EP93XX_GPIO_LINE_C(0)
-#define EP93XX_GPIO_LINE_ROW1		EP93XX_GPIO_LINE_C(1)
-#define EP93XX_GPIO_LINE_ROW2		EP93XX_GPIO_LINE_C(2)
-#define EP93XX_GPIO_LINE_ROW3		EP93XX_GPIO_LINE_C(3)
-#define EP93XX_GPIO_LINE_ROW4		EP93XX_GPIO_LINE_C(4)
-#define EP93XX_GPIO_LINE_ROW5		EP93XX_GPIO_LINE_C(5)
-#define EP93XX_GPIO_LINE_ROW6		EP93XX_GPIO_LINE_C(6)
-#define EP93XX_GPIO_LINE_ROW7		EP93XX_GPIO_LINE_C(7)
-
-/* GPIO port D.  */
-#define EP93XX_GPIO_LINE_D(x)		((x) + 24)
-#define EP93XX_GPIO_LINE_COL0		EP93XX_GPIO_LINE_D(0)
-#define EP93XX_GPIO_LINE_COL1		EP93XX_GPIO_LINE_D(1)
-#define EP93XX_GPIO_LINE_COL2		EP93XX_GPIO_LINE_D(2)
-#define EP93XX_GPIO_LINE_COL3		EP93XX_GPIO_LINE_D(3)
-#define EP93XX_GPIO_LINE_COL4		EP93XX_GPIO_LINE_D(4)
-#define EP93XX_GPIO_LINE_COL5		EP93XX_GPIO_LINE_D(5)
-#define EP93XX_GPIO_LINE_COL6		EP93XX_GPIO_LINE_D(6)
-#define EP93XX_GPIO_LINE_COL7		EP93XX_GPIO_LINE_D(7)
-
-/* GPIO port E.  */
-#define EP93XX_GPIO_LINE_E(x)		((x) + 32)
-#define EP93XX_GPIO_LINE_GRLED		EP93XX_GPIO_LINE_E(0)
-#define EP93XX_GPIO_LINE_RDLED		EP93XX_GPIO_LINE_E(1)
-#define EP93XX_GPIO_LINE_DIORn		EP93XX_GPIO_LINE_E(2)
-#define EP93XX_GPIO_LINE_IDECS1n	EP93XX_GPIO_LINE_E(3)
-#define EP93XX_GPIO_LINE_IDECS2n	EP93XX_GPIO_LINE_E(4)
-#define EP93XX_GPIO_LINE_IDEDA0		EP93XX_GPIO_LINE_E(5)
-#define EP93XX_GPIO_LINE_IDEDA1		EP93XX_GPIO_LINE_E(6)
-#define EP93XX_GPIO_LINE_IDEDA2		EP93XX_GPIO_LINE_E(7)
-
-/* GPIO port F.  */
-#define EP93XX_GPIO_LINE_F(x)		((x) + 16)
-#define EP93XX_GPIO_LINE_WP		EP93XX_GPIO_LINE_F(0)
-#define EP93XX_GPIO_LINE_MCCD1		EP93XX_GPIO_LINE_F(1)
-#define EP93XX_GPIO_LINE_MCCD2		EP93XX_GPIO_LINE_F(2)
-#define EP93XX_GPIO_LINE_MCBVD1		EP93XX_GPIO_LINE_F(3)
-#define EP93XX_GPIO_LINE_MCBVD2		EP93XX_GPIO_LINE_F(4)
-#define EP93XX_GPIO_LINE_VS1		EP93XX_GPIO_LINE_F(5)
-#define EP93XX_GPIO_LINE_READY		EP93XX_GPIO_LINE_F(6)
-#define EP93XX_GPIO_LINE_VS2		EP93XX_GPIO_LINE_F(7)
-
-/* GPIO port G.  */
-#define EP93XX_GPIO_LINE_G(x)		((x) + 48)
-#define EP93XX_GPIO_LINE_EECLK		EP93XX_GPIO_LINE_G(0)
-#define EP93XX_GPIO_LINE_EEDAT		EP93XX_GPIO_LINE_G(1)
-#define EP93XX_GPIO_LINE_SLA0		EP93XX_GPIO_LINE_G(2)
-#define EP93XX_GPIO_LINE_SLA1		EP93XX_GPIO_LINE_G(3)
-#define EP93XX_GPIO_LINE_DD12		EP93XX_GPIO_LINE_G(4)
-#define EP93XX_GPIO_LINE_DD13		EP93XX_GPIO_LINE_G(5)
-#define EP93XX_GPIO_LINE_DD14		EP93XX_GPIO_LINE_G(6)
-#define EP93XX_GPIO_LINE_DD15		EP93XX_GPIO_LINE_G(7)
-
-/* GPIO port H.  */
-#define EP93XX_GPIO_LINE_H(x)		((x) + 56)
-#define EP93XX_GPIO_LINE_DD0		EP93XX_GPIO_LINE_H(0)
-#define EP93XX_GPIO_LINE_DD1		EP93XX_GPIO_LINE_H(1)
-#define EP93XX_GPIO_LINE_DD2		EP93XX_GPIO_LINE_H(2)
-#define EP93XX_GPIO_LINE_DD3		EP93XX_GPIO_LINE_H(3)
-#define EP93XX_GPIO_LINE_DD4		EP93XX_GPIO_LINE_H(4)
-#define EP93XX_GPIO_LINE_DD5		EP93XX_GPIO_LINE_H(5)
-#define EP93XX_GPIO_LINE_DD6		EP93XX_GPIO_LINE_H(6)
-#define EP93XX_GPIO_LINE_DD7		EP93XX_GPIO_LINE_H(7)
-
-/* maximum value for gpio line identifiers */
-#define EP93XX_GPIO_LINE_MAX		EP93XX_GPIO_LINE_H(7)
-
-/* maximum value for irq capable line identifiers */
-#define EP93XX_GPIO_LINE_MAX_IRQ	EP93XX_GPIO_LINE_F(7)
-
-/* new generic GPIO API - see Documentation/gpio.txt */
-
-#include <asm-generic/gpio.h>
-
-#define gpio_get_value	__gpio_get_value
-#define gpio_set_value	__gpio_set_value
-#define gpio_cansleep	__gpio_cansleep
-
-/*
- * Map GPIO A0..A7  (0..7)  to irq 64..71,
- *          B0..B7  (7..15) to irq 72..79, and
- *          F0..F7 (16..24) to irq 80..87.
- */
-#define gpio_to_irq(gpio)	\
-	(((gpio) <= EP93XX_GPIO_LINE_MAX_IRQ) ? (64 + (gpio)) : -EINVAL)
-
-#define irq_to_gpio(irq)	((irq) - gpio_to_irq(0))
-
-#endif
+/* empty */
diff --git a/arch/arm/mach-ep93xx/micro9.c b/arch/arm/mach-ep93xx/micro9.c
index 7adea62..e72f736 100644
--- a/arch/arm/mach-ep93xx/micro9.c
+++ b/arch/arm/mach-ep93xx/micro9.c
@@ -77,7 +77,7 @@
 #ifdef CONFIG_MACH_MICRO9H
 MACHINE_START(MICRO9, "Contec Micro9-High")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -88,7 +88,7 @@
 #ifdef CONFIG_MACH_MICRO9M
 MACHINE_START(MICRO9M, "Contec Micro9-Mid")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_ASYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -99,7 +99,7 @@
 #ifdef CONFIG_MACH_MICRO9L
 MACHINE_START(MICRO9L, "Contec Micro9-Lite")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
@@ -110,7 +110,7 @@
 #ifdef CONFIG_MACH_MICRO9S
 MACHINE_START(MICRO9S, "Contec Micro9-Slim")
 	/* Maintainer: Hubert Feurstein <hubert.feurstein@contec.at> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_ASYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c
index 8392e95..52e090d 100644
--- a/arch/arm/mach-ep93xx/simone.c
+++ b/arch/arm/mach-ep93xx/simone.c
@@ -18,12 +18,12 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
 
 #include <mach/hardware.h>
 #include <mach/fb.h>
+#include <mach/gpio-ep93xx.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -53,6 +53,17 @@
 	},
 };
 
+static struct platform_device simone_audio_device = {
+	.name		= "simone-audio",
+	.id		= -1,
+};
+
+static void __init simone_register_audio(void)
+{
+	ep93xx_register_ac97();
+	platform_device_register(&simone_audio_device);
+}
+
 static void __init simone_init_machine(void)
 {
 	ep93xx_init_devices();
@@ -61,12 +72,12 @@
 	ep93xx_register_fb(&simone_fb_info);
 	ep93xx_register_i2c(&simone_i2c_gpio_data, simone_i2c_board_info,
 			    ARRAY_SIZE(simone_i2c_board_info));
-	ep93xx_register_ac97();
+	simone_register_audio();
 }
 
 MACHINE_START(SIM_ONE, "Simplemachines Sim.One Board")
-/* Maintainer: Ryan Mallon */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	/* Maintainer: Ryan Mallon */
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 2e9c614..8121e3a 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/i2c-gpio.h>
 #include <linux/fb.h>
@@ -30,6 +29,7 @@
 
 #include <mach/hardware.h>
 #include <mach/fb.h>
+#include <mach/gpio-ep93xx.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -150,6 +150,17 @@
 	.bpp			= 16,
 };
 
+static struct platform_device snappercl15_audio_device = {
+	.name		= "snappercl15-audio",
+	.id		= -1,
+};
+
+static void __init snappercl15_register_audio(void)
+{
+	ep93xx_register_i2s();
+	platform_device_register(&snappercl15_audio_device);
+}
+
 static void __init snappercl15_init_machine(void)
 {
 	ep93xx_init_devices();
@@ -157,13 +168,13 @@
 	ep93xx_register_i2c(&snappercl15_i2c_gpio_data, snappercl15_i2c_data,
 			    ARRAY_SIZE(snappercl15_i2c_data));
 	ep93xx_register_fb(&snappercl15_fb_info);
-	ep93xx_register_i2s();
+	snappercl15_register_audio();
 	platform_device_register(&snappercl15_nand_device);
 }
 
 MACHINE_START(SNAPPER_CL15, "Bluewater Systems Snapper CL15")
 	/* Maintainer: Ryan Mallon */
-	.boot_params	= EP93XX_SDCE0_PHYS_BASE + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer 		= &ep93xx_timer,
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index c2d2cf4..1ade3c3 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -257,7 +257,7 @@
 
 MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.boot_params	= EP93XX_SDCE3_PHYS_BASE_SYNC + 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= ts72xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.timer		= &ep93xx_timer,
diff --git a/arch/arm/mach-exynos4/include/mach/debug-macro.S b/arch/arm/mach-exynos4/include/mach/debug-macro.S
index a442ef8..6cacf16 100644
--- a/arch/arm/mach-exynos4/include/mach/debug-macro.S
+++ b/arch/arm/mach-exynos4/include/mach/debug-macro.S
@@ -20,7 +20,7 @@
 	 * aligned and add in the offset when we load the value here.
 	 */
 
-	.macro addruart, rp, rv
+	.macro addruart, rp, rv, tmp
 		ldr	\rp, = S3C_PA_UART
 		ldr	\rv, = S3C_VA_UART
 #if CONFIG_DEBUG_S3C_UART != 0
diff --git a/arch/arm/mach-exynos4/include/mach/entry-macro.S b/arch/arm/mach-exynos4/include/mach/entry-macro.S
index d7a1e28..006a4f4 100644
--- a/arch/arm/mach-exynos4/include/mach/entry-macro.S
+++ b/arch/arm/mach-exynos4/include/mach/entry-macro.S
@@ -55,7 +55,7 @@
 
 		bic     \irqnr, \irqstat, #0x1c00
 
-		cmp     \irqnr, #29
+		cmp     \irqnr, #15
 		cmpcc	\irqnr, \irqnr
 		cmpne	\irqnr, \tmp
 		cmpcs	\irqnr, \irqnr
@@ -76,8 +76,3 @@
 		strcc	\irqstat, [\base, #GIC_CPU_EOI]
 		cmpcs	\irqnr, \irqnr
 		.endm
-
-		/* As above, this assumes that irqstat and base are preserved.. */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		.endm
diff --git a/arch/arm/mach-exynos4/include/mach/gpio.h b/arch/arm/mach-exynos4/include/mach/gpio.h
index be9266b..80523ca 100644
--- a/arch/arm/mach-exynos4/include/mach/gpio.h
+++ b/arch/arm/mach-exynos4/include/mach/gpio.h
@@ -13,11 +13,6 @@
 #ifndef __ASM_ARCH_GPIO_H
 #define __ASM_ARCH_GPIO_H __FILE__
 
-#define gpio_get_value	__gpio_get_value
-#define gpio_set_value	__gpio_set_value
-#define gpio_cansleep	__gpio_cansleep
-#define gpio_to_irq	__gpio_to_irq
-
 /* Practically, GPIO banks up to GPZ are the configurable gpio banks */
 
 /* GPIO bank sizes */
@@ -151,6 +146,4 @@
 #define ARCH_NR_GPIOS		(EXYNOS4_GPZ(EXYNOS4_GPIO_Z_NR) +	\
 				 CONFIG_SAMSUNG_GPIO_EXTRA + 1)
 
-#include <asm-generic/gpio.h>
-
 #endif /* __ASM_ARCH_GPIO_H */
diff --git a/arch/arm/mach-exynos4/mach-armlex4210.c b/arch/arm/mach-exynos4/mach-armlex4210.c
index b482c62..f0ca6c1 100644
--- a/arch/arm/mach-exynos4/mach-armlex4210.c
+++ b/arch/arm/mach-exynos4/mach-armlex4210.c
@@ -207,7 +207,7 @@
 
 MACHINE_START(ARMLEX4210, "ARMLEX4210")
 	/* Maintainer: Alim Akhtar <alim.akhtar@samsung.com> */
-	.boot_params	= S5P_PA_SDRAM + 0x100,
+	.atag_offset	= 0x100,
 	.init_irq	= exynos4_init_irq,
 	.map_io		= armlex4210_map_io,
 	.init_machine	= armlex4210_machine_init,
diff --git a/arch/arm/mach-exynos4/mach-nuri.c b/arch/arm/mach-exynos4/mach-nuri.c
index 43be71b..6e05368 100644
--- a/arch/arm/mach-exynos4/mach-nuri.c
+++ b/arch/arm/mach-exynos4/mach-nuri.c
@@ -1152,7 +1152,7 @@
 
 MACHINE_START(NURI, "NURI")
 	/* Maintainer: Kyungmin Park <kyungmin.park@samsung.com> */
-	.boot_params	= S5P_PA_SDRAM + 0x100,
+	.atag_offset	= 0x100,
 	.init_irq	= exynos4_init_irq,
 	.map_io		= nuri_map_io,
 	.init_machine	= nuri_machine_init,
diff --git a/arch/arm/mach-exynos4/mach-smdkc210.c b/arch/arm/mach-exynos4/mach-smdkc210.c
index a7c65e0..b24ddd7 100644
--- a/arch/arm/mach-exynos4/mach-smdkc210.c
+++ b/arch/arm/mach-exynos4/mach-smdkc210.c
@@ -301,7 +301,7 @@
 
 MACHINE_START(SMDKC210, "SMDKC210")
 	/* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */
-	.boot_params	= S5P_PA_SDRAM + 0x100,
+	.atag_offset	= 0x100,
 	.init_irq	= exynos4_init_irq,
 	.map_io		= smdkc210_map_io,
 	.init_machine	= smdkc210_machine_init,
diff --git a/arch/arm/mach-exynos4/mach-smdkv310.c b/arch/arm/mach-exynos4/mach-smdkv310.c
index ea41495..d90fcdd 100644
--- a/arch/arm/mach-exynos4/mach-smdkv310.c
+++ b/arch/arm/mach-exynos4/mach-smdkv310.c
@@ -255,7 +255,7 @@
 MACHINE_START(SMDKV310, "SMDKV310")
 	/* Maintainer: Kukjin Kim <kgene.kim@samsung.com> */
 	/* Maintainer: Changhwan Youn <chaos.youn@samsung.com> */
-	.boot_params	= S5P_PA_SDRAM + 0x100,
+	.atag_offset	= 0x100,
 	.init_irq	= exynos4_init_irq,
 	.map_io		= smdkv310_map_io,
 	.init_machine	= smdkv310_machine_init,
diff --git a/arch/arm/mach-exynos4/mach-universal_c210.c b/arch/arm/mach-exynos4/mach-universal_c210.c
index b3b5d89..2aac6f7 100644
--- a/arch/arm/mach-exynos4/mach-universal_c210.c
+++ b/arch/arm/mach-exynos4/mach-universal_c210.c
@@ -762,7 +762,7 @@
 
 MACHINE_START(UNIVERSAL_C210, "UNIVERSAL_C210")
 	/* Maintainer: Kyungmin Park <kyungmin.park@samsung.com> */
-	.boot_params	= S5P_PA_SDRAM + 0x100,
+	.atag_offset	= 0x100,
 	.init_irq	= exynos4_init_irq,
 	.map_io		= universal_map_io,
 	.init_machine	= universal_machine_init,
diff --git a/arch/arm/mach-exynos4/mct.c b/arch/arm/mach-exynos4/mct.c
index ddd8686..582b874 100644
--- a/arch/arm/mach-exynos4/mct.c
+++ b/arch/arm/mach-exynos4/mct.c
@@ -386,9 +386,11 @@
 
 	if (cpu == 0) {
 		mct_tick0_event_irq.dev_id = &mct_tick[cpu];
+		evt->irq = IRQ_MCT_L0;
 		setup_irq(IRQ_MCT_L0, &mct_tick0_event_irq);
 	} else {
 		mct_tick1_event_irq.dev_id = &mct_tick[cpu];
+		evt->irq = IRQ_MCT_L1;
 		setup_irq(IRQ_MCT_L1, &mct_tick1_event_irq);
 		irq_set_affinity(IRQ_MCT_L1, cpumask_of(1));
 	}
@@ -402,9 +404,10 @@
 	return 0;
 }
 
-int local_timer_ack(void)
+void local_timer_stop(struct clock_event_device *evt)
 {
-	return 0;
+	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	disable_irq(evt->irq);
 }
 
 #endif /* CONFIG_LOCAL_TIMERS */
diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c
index 206ff2f..d5f1785 100644
--- a/arch/arm/mach-footbridge/cats-hw.c
+++ b/arch/arm/mach-footbridge/cats-hw.c
@@ -85,7 +85,7 @@
 
 MACHINE_START(CATS, "Chalice-CATS")
 	/* Maintainer: Philip Blundell */
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.soft_reboot	= 1,
 	.fixup		= fixup_cats,
 	.map_io		= footbridge_map_io,
diff --git a/arch/arm/mach-footbridge/ebsa285.c b/arch/arm/mach-footbridge/ebsa285.c
index 2ef69ff..012210c 100644
--- a/arch/arm/mach-footbridge/ebsa285.c
+++ b/arch/arm/mach-footbridge/ebsa285.c
@@ -15,7 +15,7 @@
 
 MACHINE_START(EBSA285, "EBSA285")
 	/* Maintainer: Russell King */
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.video_start	= 0x000a0000,
 	.video_end	= 0x000bffff,
 	.map_io		= footbridge_map_io,
diff --git a/arch/arm/mach-footbridge/include/mach/debug-macro.S b/arch/arm/mach-footbridge/include/mach/debug-macro.S
index 1be2eeb..e5acde2 100644
--- a/arch/arm/mach-footbridge/include/mach/debug-macro.S
+++ b/arch/arm/mach-footbridge/include/mach/debug-macro.S
@@ -15,7 +15,7 @@
 
 #ifndef CONFIG_DEBUG_DC21285_PORT
 	/* For NetWinder debugging */
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		mov	\rp, #0x000003f8
 		orr	\rv, \rp, #0xff000000	@ virtual
 		orr	\rp, \rp, #0x7c000000	@ physical
@@ -31,7 +31,7 @@
 		.equ	dc21285_high, ARMCSR_BASE & 0xff000000
 		.equ	dc21285_low,  ARMCSR_BASE & 0x00ffffff
 
-		.macro	addruart, rp, rv
+		.macro	addruart, rp, rv, tmp
 		.if	dc21285_low
 		mov	\rp, #dc21285_low
 		.else
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index 0f7aeff..0d3846f 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -647,7 +647,7 @@
 
 MACHINE_START(NETWINDER, "Rebel-NetWinder")
 	/* Maintainer: Russell King/Rebel.com */
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.video_start	= 0x000a0000,
 	.video_end	= 0x000bffff,
 	.reserve_lp0	= 1,
diff --git a/arch/arm/mach-footbridge/personal.c b/arch/arm/mach-footbridge/personal.c
index 3285e91..f41dba3 100644
--- a/arch/arm/mach-footbridge/personal.c
+++ b/arch/arm/mach-footbridge/personal.c
@@ -15,7 +15,7 @@
 
 MACHINE_START(PERSONAL_SERVER, "Compaq-PersonalServer")
 	/* Maintainer: Jamey Hicks / George France */
-	.boot_params	= 0x00000100,
+	.atag_offset	= 0x100,
 	.map_io		= footbridge_map_io,
 	.init_irq	= footbridge_init_irq,
 	.timer		= &footbridge_timer,
diff --git a/arch/arm/mach-gemini/board-nas4220b.c b/arch/arm/mach-gemini/board-nas4220b.c
index 0cf7a07..5927d3c 100644
--- a/arch/arm/mach-gemini/board-nas4220b.c
+++ b/arch/arm/mach-gemini/board-nas4220b.c
@@ -102,7 +102,7 @@
 }
 
 MACHINE_START(NAS4220B, "Raidsonic NAS IB-4220-B")
-	.boot_params	= 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
 	.timer		= &ib4220b_timer,
diff --git a/arch/arm/mach-gemini/board-rut1xx.c b/arch/arm/mach-gemini/board-rut1xx.c
index 4fa09af..cd7437a 100644
--- a/arch/arm/mach-gemini/board-rut1xx.c
+++ b/arch/arm/mach-gemini/board-rut1xx.c
@@ -86,7 +86,7 @@
 }
 
 MACHINE_START(RUT100, "Teltonika RUT100")
-	.boot_params	= 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
 	.timer		= &rut1xx_timer,
diff --git a/arch/arm/mach-gemini/board-wbd111.c b/arch/arm/mach-gemini/board-wbd111.c
index 88cc422..a367880 100644
--- a/arch/arm/mach-gemini/board-wbd111.c
+++ b/arch/arm/mach-gemini/board-wbd111.c
@@ -129,7 +129,7 @@
 }
 
 MACHINE_START(WBD111, "Wiliboard WBD-111")
-	.boot_params	= 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
 	.timer		= &wbd111_timer,
diff --git a/arch/arm/mach-gemini/board-wbd222.c b/arch/arm/mach-gemini/board-wbd222.c
index 3a22034..f382811 100644
--- a/arch/arm/mach-gemini/board-wbd222.c
+++ b/arch/arm/mach-gemini/board-wbd222.c
@@ -129,7 +129,7 @@
 }
 
 MACHINE_START(WBD222, "Wiliboard WBD-222")
-	.boot_params	= 0x100,
+	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
 	.timer		= &wbd222_timer,
diff --git a/arch/arm/mach-gemini/include/mach/debug-macro.S b/arch/arm/mach-gemini/include/mach/debug-macro.S
index f40e006..8376707 100644
--- a/arch/arm/mach-gemini/include/mach/debug-macro.S
+++ b/arch/arm/mach-gemini/include/mach/debug-macro.S
@@ -11,7 +11,7 @@
  */
 #include <mach/hardware.h>
 
-	.macro	addruart, rp, rv
+	.macro	addruart, rp, rv, tmp
 	ldr	\rp, =GEMINI_UART_BASE			@ physical
 	ldr	\rv, =IO_ADDRESS(GEMINI_UART_BASE)	@ virtual
 	.endm
diff --git a/arch/arm/mach-gemini/include/mach/gpio.h b/arch/arm/mach-gemini/include/mach/gpio.h
index 3bc2c70..40a0527 100644
--- a/arch/arm/mach-gemini/include/mach/gpio.h
+++ b/arch/arm/mach-gemini/include/mach/gpio.h
@@ -13,11 +13,6 @@
 #define __MACH_GPIO_H__
 
 #include <mach/irqs.h>
-#include <asm-generic/gpio.h>
-
-#define gpio_get_value	__gpio_get_value
-#define gpio_set_value	__gpio_set_value
-#define gpio_cansleep	__gpio_cansleep
 
 #define gpio_to_irq(x)	((x) + GPIO_IRQ_BASE)
 #define irq_to_gpio(x)	((x) - GPIO_IRQ_BASE)
diff --git a/arch/arm/mach-gemini/include/mach/memory.h b/arch/arm/mach-gemini/include/mach/memory.h
deleted file mode 100644
index a50915f..0000000
--- a/arch/arm/mach-gemini/include/mach/memory.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Copyright (C) 2001-2006 Storlink, Corp.
- *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-#ifndef __MACH_MEMORY_H
-#define __MACH_MEMORY_H
-
-#ifdef CONFIG_GEMINI_MEM_SWAP
-# define PLAT_PHYS_OFFSET	UL(0x00000000)
-#else
-# define PLAT_PHYS_OFFSET	UL(0x10000000)
-#endif
-
-#endif /* __MACH_MEMORY_H */
diff --git a/arch/arm/mach-h720x/h7201-eval.c b/arch/arm/mach-h720x/h7201-eval.c
index 65f1bea..9886f19 100644
--- a/arch/arm/mach-h720x/h7201-eval.c
+++ b/arch/arm/mach-h720x/h7201-eval.c
@@ -29,7 +29,7 @@
 
 MACHINE_START(H7201, "Hynix GMS30C7201")
 	/* Maintainer: Robert Schwebel, Pengutronix */
-	.boot_params	= 0xc0001000,
+	.atag_offset	= 0x1000,
 	.map_io		= h720x_map_io,
 	.init_irq	= h720x_init_irq,
 	.timer		= &h7201_timer,
diff --git a/arch/arm/mach-h720x/h7202-eval.c b/arch/arm/mach-h720x/h7202-eval.c
index 884584a..284a134 100644
--- a/arch/arm/mach-h720x/h7202-eval.c
+++ b/arch/arm/mach-h720x/h7202-eval.c
@@ -71,7 +71,7 @@
 
 MACHINE_START(H7202, "Hynix HMS30C7202")
 	/* Maintainer: Robert Schwebel, Pengutronix */
-	.boot_params	= 0x40000100,
+	.atag_offset	= 0x100,
 	.map_io		= h720x_map_io,
 	.init_irq	= h7202_init_irq,
 	.timer		= &h7202_timer,
diff --git a/arch/arm/mach-h720x/include/mach/debug-macro.S b/arch/arm/mach-h720x/include/mach/debug-macro.S
index c2093e8..8a46157 100644
--- a/arch/arm/mach-h720x/include/mach/debug-macro.S
+++ b/arch/arm/mach-h720x/include/mach/debug-macro.S
@@ -16,7 +16,7 @@
 		.equ    io_virt, IO_VIRT
 		.equ    io_phys, IO_PHYS
 
-		.macro  addruart, rp, rv
+		.macro  addruart, rp, rv, tmp
 		mov     \rp, #0x00020000	@ UART1
 		add     \rv, \rp, #io_virt	@ virtual address
 		add     \rp, \rp, #io_phys	@ physical base address
diff --git a/arch/arm/mach-h720x/include/mach/memory.h b/arch/arm/mach-h720x/include/mach/memory.h
deleted file mode 100644
index 96dcf50..0000000
--- a/arch/arm/mach-h720x/include/mach/memory.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * arch/arm/mach-h720x/include/mach/memory.h
- *
- * Copyright (c) 2000 Jungjun Kim
- *
- */
-#ifndef __ASM_ARCH_MEMORY_H
-#define __ASM_ARCH_MEMORY_H
-
-#define PLAT_PHYS_OFFSET	UL(0x40000000)
-#endif
diff --git a/arch/arm/mach-imx/iomux-imx31.c b/arch/arm/mach-imx/iomux-imx31.c
index cf8f809..82bd440 100644
--- a/arch/arm/mach-imx/iomux-imx31.c
+++ b/arch/arm/mach-imx/iomux-imx31.c
@@ -17,13 +17,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  * MA 02110-1301, USA.
  */
-
+#include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <mach/hardware.h>
-#include <mach/gpio.h>
 #include <mach/iomux-mx3.h>
 
 /*
diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c
index ede2710..2152590 100644
--- a/arch/arm/mach-imx/mach-armadillo5x0.c
+++ b/arch/arm/mach-imx/mach-armadillo5x0.c
@@ -558,7 +558,7 @@
 
 MACHINE_START(ARMADILLO5X0, "Armadillo-500")
 	/* Maintainer: Alberto Panizzo  */
-	.boot_params = MX3x_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx31_map_io,
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index f851fe9..b1ec2cf 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -311,7 +311,7 @@
 };
 
 MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27")
-	.boot_params = MX27_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 4bd083b..470b654 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -194,7 +194,7 @@
 
 MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35")
 	/* Maintainer: Eukrea Electromatique */
-	.boot_params = MX3x_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx35_map_io,
 	.init_early = imx35_init_early,
 	.init_irq = mx35_init_irq,
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
index 2442d5d..9163318 100644
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -163,7 +163,7 @@
 
 MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25")
 	/* Maintainer: Eukrea Electromatique */
-	.boot_params = MX25_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx25_map_io,
 	.init_early = imx25_init_early,
 	.init_irq = mx25_init_irq,
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
index 6778f81..22306ce 100644
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -275,7 +275,7 @@
 };
 
 MACHINE_START(IMX27_VISSTRIM_M10, "Vista Silicon Visstrim_M10")
-	.boot_params = MX27_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
diff --git a/arch/arm/mach-imx/mach-imx27ipcam.c b/arch/arm/mach-imx/mach-imx27ipcam.c
index 272f793..8da48b3 100644
--- a/arch/arm/mach-imx/mach-imx27ipcam.c
+++ b/arch/arm/mach-imx/mach-imx27ipcam.c
@@ -71,7 +71,7 @@
 
 MACHINE_START(IMX27IPCAM, "Freescale IMX27IPCAM")
 	/* maintainer: Freescale Semiconductor, Inc. */
-	.boot_params = MX27_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
diff --git a/arch/arm/mach-imx/mach-imx27lite.c b/arch/arm/mach-imx/mach-imx27lite.c
index d81a769..21a14a2 100644
--- a/arch/arm/mach-imx/mach-imx27lite.c
+++ b/arch/arm/mach-imx/mach-imx27lite.c
@@ -77,7 +77,7 @@
 };
 
 MACHINE_START(IMX27LITE, "LogicPD i.MX27LITE")
-	.boot_params = MX27_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
diff --git a/arch/arm/mach-imx/mach-kzm_arm11_01.c b/arch/arm/mach-imx/mach-kzm_arm11_01.c
index e472a1d..7c20e9e 100644
--- a/arch/arm/mach-imx/mach-kzm_arm11_01.c
+++ b/arch/arm/mach-imx/mach-kzm_arm11_01.c
@@ -271,7 +271,7 @@
 };
 
 MACHINE_START(KZM_ARM11_01, "Kyoto Microcomputer Co., Ltd. KZM-ARM11-01")
-	.boot_params = MX3x_PHYS_OFFSET + 0x100,
+	.atag_offset = 0x100,
 	.map_io = kzm_map_io,
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
diff --git a/arch/arm/mach-imx/mach-mx1ads.c b/arch/arm/ma