Merge branch 'fixes' of git://git.armlinux.org.uk/~rmk/linux-arm

Pull ARM fixes from Russell King:
 "A few more ARM fixes:

   - the assembly backtrace code suffers problems with the new printk()
     implementation which assumes that kernel messages without KERN_CONT
     should have newlines inserted between them. Fix this.
   - fix a section naming error - ".init.text" rather than ".text.init"
   - preallocate DMA debug memory at core_initcall() time rather than
     fs_initcall(), as we have some core drivers that need to use DMA
     mapping - and that triggers a kernel warning from the DMA debug
     code.
   - fix XIP kernels after the ro_after_init changes made this data
     permanently read-only"

* 'fixes' of git://git.armlinux.org.uk/~rmk/linux-arm:
  ARM: Fix XIP kernels
  ARM: 8628/1: dma-mapping: preallocate DMA-debug hash tables in core_initcall
  ARM: 8624/1: proc-v7m.S: fix init section name
  ARM: fix backtrace
diff --git a/CREDITS b/CREDITS
index 513aaa3..8373676 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1864,10 +1864,11 @@
 
 N: Martin Kepplinger
 E: martink@posteo.de
-E: martin.kepplinger@theobroma-systems.com
+E: martin.kepplinger@ginzinger.com
 W: http://www.martinkepplinger.com
 D: mma8452 accelerators iio driver
-D: Kernel cleanups
+D: pegasus_notetaker input driver
+D: Kernel fixes and cleanups
 S: Garnisonstraße 26
 S: 4020 Linz
 S: Austria
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index 4ba0a2a..640f65e 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -220,8 +220,11 @@
 Date:           October 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    write only
-                Writing 1 will issue a PERST to card which may cause the card
-                to reload the FPGA depending on load_image_on_perst.
+                Writing 1 will issue a PERST to card provided there are no
+                contexts active on any one of the card AFUs. This may cause
+                the card to reload the FPGA depending on load_image_on_perst.
+                Writing -1 will do a force PERST irrespective of any active
+                contexts on the card AFUs.
 Users:		https://github.com/ibm-capi/libcxl
 
 What:		/sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
diff --git a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
index b82deea..470def0 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
+++ b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
@@ -1,4 +1,4 @@
-What:           state
+What:           /sys/devices/system/ibm_rtl/state
 Date:           Sep 2010
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
@@ -10,7 +10,7 @@
 Users:          The ibm-prtm userspace daemon uses this interface.
 
 
-What:           version
+What:           /sys/devices/system/ibm_rtl/version
 Date:           Sep 2010
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index e5b6497..c75b64a 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -309,3 +309,4 @@
 	with a reshape in progress.
 1.9.0   Add support for RAID level takeover/reshape/region size
 	and set size reduction.
+1.9.1   Fix activation of existing RAID 4/10 mapped devices
diff --git a/Documentation/devicetree/bindings/clock/uniphier-clock.txt b/Documentation/devicetree/bindings/clock/uniphier-clock.txt
index c7179d3..8121630 100644
--- a/Documentation/devicetree/bindings/clock/uniphier-clock.txt
+++ b/Documentation/devicetree/bindings/clock/uniphier-clock.txt
@@ -24,7 +24,7 @@
 		reg = <0x61840000 0x4000>;
 
 		clock {
-			compatible = "socionext,uniphier-ld20-clock";
+			compatible = "socionext,uniphier-ld11-clock";
 			#clock-cells = <1>;
 		};
 
@@ -43,8 +43,8 @@
 21: USB3 ch1 PHY1
 
 
-Media I/O (MIO) clock
----------------------
+Media I/O (MIO) clock, SD clock
+-------------------------------
 
 Required properties:
 - compatible: should be one of the following:
@@ -52,10 +52,10 @@
     "socionext,uniphier-ld4-mio-clock"  - for LD4 SoC.
     "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC.
     "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC.
-    "socionext,uniphier-pro5-mio-clock" - for Pro5 SoC.
-    "socionext,uniphier-pxs2-mio-clock" - for PXs2/LD6b SoC.
+    "socionext,uniphier-pro5-sd-clock"  - for Pro5 SoC.
+    "socionext,uniphier-pxs2-sd-clock"  - for PXs2/LD6b SoC.
     "socionext,uniphier-ld11-mio-clock" - for LD11 SoC.
-    "socionext,uniphier-ld20-mio-clock" - for LD20 SoC.
+    "socionext,uniphier-ld20-sd-clock"  - for LD20 SoC.
 - #clock-cells: should be 1.
 
 Example:
@@ -66,7 +66,7 @@
 		reg = <0x59810000 0x800>;
 
 		clock {
-			compatible = "socionext,uniphier-ld20-mio-clock";
+			compatible = "socionext,uniphier-ld11-mio-clock";
 			#clock-cells = <1>;
 		};
 
@@ -112,7 +112,7 @@
 		reg = <0x59820000 0x200>;
 
 		clock {
-			compatible = "socionext,uniphier-ld20-peri-clock";
+			compatible = "socionext,uniphier-ld11-peri-clock";
 			#clock-cells = <1>;
 		};
 
diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt
new file mode 100644
index 0000000..6f28969
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt
@@ -0,0 +1,23 @@
+* Aspeed BT (Block Transfer) IPMI interface
+
+The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
+(BaseBoard Management Controllers) and the BT interface can be used to
+perform in-band IPMI communication with their host.
+
+Required properties:
+
+- compatible : should be "aspeed,ast2400-ibt-bmc"
+- reg: physical address and size of the registers
+
+Optional properties:
+
+- interrupts: interrupt generated by the BT interface. without an
+  interrupt, the driver will operate in poll mode.
+
+Example:
+
+	ibt@1e789140 {
+		compatible = "aspeed,ast2400-ibt-bmc";
+		reg = <0x1e789140 0x18>;
+		interrupts = <8>;
+	};
diff --git a/Documentation/devicetree/bindings/ipmi.txt b/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
similarity index 100%
rename from Documentation/devicetree/bindings/ipmi.txt
rename to Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 4e00e85..bfa461a 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -43,6 +43,9 @@
   reset signal present internally in some host controller IC designs.
   See Documentation/devicetree/bindings/reset/reset.txt for details.
 
+* reset-names: request name for using "resets" property. Must be "reset".
+	(It will be used together with "resets" property.)
+
 * clocks: from common clock binding: handle to biu and ciu clocks for the
   bus interface unit clock and the card interface unit clock.
 
@@ -103,6 +106,8 @@
 		interrupts = <0 75 0>;
 		#address-cells = <1>;
 		#size-cells = <0>;
+		resets = <&rst 20>;
+		reset-names = "reset";
 	};
 
 [board specific internal DMA resources]
diff --git a/Documentation/devicetree/bindings/net/marvell-orion-net.txt b/Documentation/devicetree/bindings/net/marvell-orion-net.txt
index bce52b2..6fd988c 100644
--- a/Documentation/devicetree/bindings/net/marvell-orion-net.txt
+++ b/Documentation/devicetree/bindings/net/marvell-orion-net.txt
@@ -49,6 +49,7 @@
 and
 
  - phy-handle: See ethernet.txt file in the same directory.
+ - phy-mode: See ethernet.txt file in the same directory.
 
 or
 
diff --git a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
index ba67b39..71aeda1 100644
--- a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt
@@ -26,13 +26,16 @@
 	- "sys"
 	- "legacy"
 	- "client"
-- resets: Must contain five entries for each entry in reset-names.
+- resets: Must contain seven entries for each entry in reset-names.
 	   See ../reset/reset.txt for details.
 - reset-names: Must include the following names
 	- "core"
 	- "mgmt"
 	- "mgmt-sticky"
 	- "pipe"
+	- "pm"
+	- "aclk"
+	- "pclk"
 - pinctrl-names : The pin control state names
 - pinctrl-0: The "default" pinctrl state
 - #interrupt-cells: specifies the number of cells needed to encode an
@@ -86,8 +89,10 @@
 	reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>;
 	reg-names = "axi-base", "apb-base";
 	resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-		 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-	reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+		 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE> ,
+		 <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>;
+	reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+		      "pm", "pclk", "aclk";
 	phys = <&pcie_phy>;
 	phy-names = "pcie-phy";
 	pinctrl-names = "default";
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
index 5e60ad1..2ad18c4 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
@@ -43,7 +43,9 @@
 
 GPID0 GPID2 GPIE0 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6 I2C7 I2C8
 I2C9 MAC1LINK MDIO1 MDIO2 OSCCLK PEWAKE PWM0 PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7
-RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8
+RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 SPI1DEBUG SPI1PASSTHRU TIMER4 TIMER5 TIMER6
+TIMER7 TIMER8 VGABIOSROM
+
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
index f9753c4..b24583a 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
@@ -14,11 +14,6 @@
  - #size-cells	: The value of this property must be 1
  - ranges	: defines mapping between pin controller node (parent) to
    gpio-bank node (children).
- - interrupt-parent: phandle of the interrupt parent to which the external
-   GPIO interrupts are forwarded to.
- - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
-   which includes IRQ mux selection register, and the offset of the IRQ mux
-   selection register.
  - pins-are-numbered: Specify the subnodes are using numbered pinmux to
    specify pins.
 
@@ -37,6 +32,11 @@
 
 Optional properties:
  - reset:	  : Reference to the reset controller
+ - interrupt-parent: phandle of the interrupt parent to which the external
+   GPIO interrupts are forwarded to.
+ - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
+   which includes IRQ mux selection register, and the offset of the IRQ mux
+   selection register.
 
 Example:
 #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
diff --git a/Documentation/devicetree/bindings/reset/uniphier-reset.txt b/Documentation/devicetree/bindings/reset/uniphier-reset.txt
index e6bbfcc..5020524 100644
--- a/Documentation/devicetree/bindings/reset/uniphier-reset.txt
+++ b/Documentation/devicetree/bindings/reset/uniphier-reset.txt
@@ -6,25 +6,25 @@
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-sld3-reset" - for PH1-sLD3 SoC.
-    "socionext,uniphier-ld4-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-sld3-reset" - for sLD3 SoC.
+    "socionext,uniphier-ld4-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-reset" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-reset" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-reset" - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
 	sysctrl@61840000 {
-		compatible = "socionext,uniphier-ld20-sysctrl",
+		compatible = "socionext,uniphier-ld11-sysctrl",
 			     "simple-mfd", "syscon";
 		reg = <0x61840000 0x4000>;
 
 		reset {
-			compatible = "socionext,uniphier-ld20-reset";
+			compatible = "socionext,uniphier-ld11-reset";
 			#reset-cells = <1>;
 		};
 
@@ -32,30 +32,30 @@
 	};
 
 
-Media I/O (MIO) reset
----------------------
+Media I/O (MIO) reset, SD reset
+-------------------------------
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-sld3-mio-reset" - for PH1-sLD3 SoC.
-    "socionext,uniphier-ld4-mio-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-mio-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-mio-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-mio-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-mio-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-mio-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-mio-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-sld3-mio-reset" - for sLD3 SoC.
+    "socionext,uniphier-ld4-mio-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-mio-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-mio-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-sd-reset"  - for Pro5 SoC.
+    "socionext,uniphier-pxs2-sd-reset"  - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-mio-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-sd-reset"  - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
 	mioctrl@59810000 {
-		compatible = "socionext,uniphier-ld20-mioctrl",
+		compatible = "socionext,uniphier-ld11-mioctrl",
 			     "simple-mfd", "syscon";
 		reg = <0x59810000 0x800>;
 
 		reset {
-			compatible = "socionext,uniphier-ld20-mio-reset";
+			compatible = "socionext,uniphier-ld11-mio-reset";
 			#reset-cells = <1>;
 		};
 
@@ -68,24 +68,24 @@
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-ld4-peri-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-peri-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-peri-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-peri-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-peri-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-peri-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-peri-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-ld4-peri-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-peri-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-peri-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-peri-reset" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-peri-reset" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-peri-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-peri-reset" - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
 	perictrl@59820000 {
-		compatible = "socionext,uniphier-ld20-perictrl",
+		compatible = "socionext,uniphier-ld11-perictrl",
 			     "simple-mfd", "syscon";
 		reg = <0x59820000 0x200>;
 
 		reset {
-			compatible = "socionext,uniphier-ld20-peri-reset";
+			compatible = "socionext,uniphier-ld11-peri-reset";
 			#reset-cells = <1>;
 		};
 
diff --git a/Documentation/devicetree/bindings/serial/cdns,uart.txt b/Documentation/devicetree/bindings/serial/cdns,uart.txt
index a3eb154..227bb77 100644
--- a/Documentation/devicetree/bindings/serial/cdns,uart.txt
+++ b/Documentation/devicetree/bindings/serial/cdns,uart.txt
@@ -1,7 +1,9 @@
 Binding for Cadence UART Controller
 
 Required properties:
-- compatible : should be "cdns,uart-r1p8", or "xlnx,xuartps"
+- compatible :
+  Use "xlnx,xuartps","cdns,uart-r1p8" for Zynq-7xxx SoC.
+  Use "xlnx,zynqmp-uart","cdns,uart-r1p12" for Zynq Ultrascale+ MPSoC.
 - reg: Should contain UART controller registers location and length.
 - interrupts: Should contain UART controller interrupts.
 - clocks: Must contain phandles to the UART clocks
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
index 1e4000d..8d27d1a 100644
--- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -9,6 +9,14 @@
     - "renesas,scifb-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFB compatible UART.
     - "renesas,scifa-r8a7740" for R8A7740 (R-Mobile A1) SCIFA compatible UART.
     - "renesas,scifb-r8a7740" for R8A7740 (R-Mobile A1) SCIFB compatible UART.
+    - "renesas,scif-r8a7743" for R8A7743 (RZ/G1M) SCIF compatible UART.
+    - "renesas,scifa-r8a7743" for R8A7743 (RZ/G1M) SCIFA compatible UART.
+    - "renesas,scifb-r8a7743" for R8A7743 (RZ/G1M) SCIFB compatible UART.
+    - "renesas,hscif-r8a7743" for R8A7743 (RZ/G1M) HSCIF compatible UART.
+    - "renesas,scif-r8a7745" for R8A7745 (RZ/G1E) SCIF compatible UART.
+    - "renesas,scifa-r8a7745" for R8A7745 (RZ/G1E) SCIFA compatible UART.
+    - "renesas,scifb-r8a7745" for R8A7745 (RZ/G1E) SCIFB compatible UART.
+    - "renesas,hscif-r8a7745" for R8A7745 (RZ/G1E) HSCIF compatible UART.
     - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
     - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART.
     - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART.
diff --git a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
index fd40c85..462b04e8 100644
--- a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
+++ b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
@@ -12,7 +12,7 @@
 
 Optional properties:
 - ti,dmic: phandle for the OMAP dmic node if the machine have it connected
-- ti,jack_detection: Need to be present if the board capable to detect jack
+- ti,jack-detection: Need to be present if the board capable to detect jack
   insertion, removal.
 
 Available audio endpoints for the audio-routing table:
diff --git a/Documentation/devicetree/bindings/timer/jcore,pit.txt b/Documentation/devicetree/bindings/timer/jcore,pit.txt
new file mode 100644
index 0000000..af5dd35
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/jcore,pit.txt
@@ -0,0 +1,24 @@
+J-Core Programmable Interval Timer and Clocksource
+
+Required properties:
+
+- compatible: Must be "jcore,pit".
+
+- reg: Memory region(s) for timer/clocksource registers. For SMP,
+  there should be one region per cpu, indexed by the sequential,
+  zero-based hardware cpu number.
+
+- interrupts: An interrupt to assign for the timer. The actual pit
+  core is integrated with the aic and allows the timer interrupt
+  assignment to be programmed by software, but this property is
+  required in order to reserve an interrupt number that doesn't
+  conflict with other devices.
+
+
+Example:
+
+timer@200 {
+	compatible = "jcore,pit";
+	reg = < 0x200 0x30 0x500 0x30 >;
+	interrupts = < 0x48 >;
+};
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index 455f2c3..2c30a54 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -28,10 +28,7 @@
 - g-use-dma: enable dma usage in gadget driver.
 - g-rx-fifo-size: size of rx fifo size in gadget mode.
 - g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
-
-Deprecated properties:
-- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0)
-  in gadget mode.
+- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
 
 Example:
 
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 14cdc10..1b5f156 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -447,7 +447,6 @@
 	int (*flush) (struct file *);
 	int (*release) (struct inode *, struct file *);
 	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
-	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
 	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 219ffd4..74329fd 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -395,32 +395,6 @@
 
  or if empty, the mapping is anonymous.
 
-The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
-of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. Hence, for the example above, the
-task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
-
-08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
-08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
-0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
-a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
-a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack]
-a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
-a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
-a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
-a800b000-a800e000 rw-p 00000000 00:00 0
-a800e000-a8022000 r-xp 00000000 03:00 14462      /lib/libpthread.so.0
-a8022000-a8023000 r--p 00013000 03:00 14462      /lib/libpthread.so.0
-a8023000-a8024000 rw-p 00014000 03:00 14462      /lib/libpthread.so.0
-a8024000-a8027000 rw-p 00000000 00:00 0
-a8027000-a8043000 r-xp 00000000 03:00 8317       /lib/ld-linux.so.2
-a8043000-a8044000 r--p 0001b000 03:00 8317       /lib/ld-linux.so.2
-a8044000-a8045000 rw-p 0001c000 03:00 8317       /lib/ld-linux.so.2
-aff35000-aff4a000 rw-p 00000000 00:00 0
-ffffe000-fffff000 r-xp 00000000 00:00 0          [vdso]
-
 The /proc/PID/smaps is an extension based on maps, showing the memory
 consumption for each of the process's mappings. For each of mappings there
 is a series of lines such as the following:
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index d619c8d..b5039a0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -828,7 +828,6 @@
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
 	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
-	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
index 40884c4..a0f6189 100644
--- a/Documentation/gpio/board.txt
+++ b/Documentation/gpio/board.txt
@@ -6,7 +6,7 @@
 description of the deprecated integer-based GPIO interface please refer to
 gpio-legacy.txt (actually, there is no real mapping possible with the old
 interface; you just fetch an integer from somewhere and request the
-corresponding GPIO.
+corresponding GPIO).
 
 All platforms can enable the GPIO library, but if the platform strictly
 requires GPIO functionality to be present, it needs to select GPIOLIB from its
@@ -162,6 +162,9 @@
 
 Since the "led" GPIOs are mapped as active-high, this example will switch their
 signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped
-as active-low, its actual signal will be 0 after this code. Contrary to the legacy
-integer GPIO interface, the active-low property is handled during mapping and is
-thus transparent to GPIO consumers.
+as active-low, its actual signal will be 0 after this code. Contrary to the
+legacy integer GPIO interface, the active-low property is handled during
+mapping and is thus transparent to GPIO consumers.
+
+A set of functions such as gpiod_set_value() is available to work with
+the new descriptor-oriented interface.
diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology
index e0aefee..1a014fe 100644
--- a/Documentation/i2c/i2c-topology
+++ b/Documentation/i2c/i2c-topology
@@ -326,7 +326,7 @@
 
 This is a good topology.
 
-                                   .--------.
+                                    .--------.
                    .----------.  .--| dev D1 |
                    |  parent- |--'  '--------'
                 .--|  locked  |     .--------.
@@ -350,7 +350,7 @@
 
 This is a good topology.
 
-                                   .--------.
+                                    .--------.
                    .----------.  .--| dev D1 |
                    |   mux-   |--'  '--------'
                 .--|  locked  |     .--------.
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 6d6c07c..63912ef3 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -67,13 +67,14 @@
 Switch tagging protocols
 ------------------------
 
-DSA currently supports 4 different tagging protocols, and a tag-less mode as
+DSA currently supports 5 different tagging protocols, and a tag-less mode as
 well. The different protocols are implemented in:
 
 net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy)
 net/dsa/tag_dsa.c: Marvell's original DSA tag
 net/dsa/tag_edsa.c: Marvell's enhanced DSA tag
 net/dsa/tag_brcm.c: Broadcom's 4 bytes tag
+net/dsa/tag_qca.c: Qualcomm's 2 bytes tag
 
 The exact format of the tag protocol is vendor specific, but in general, they
 all contain something which:
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index 0fe1c6e..a20b2fa 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -29,8 +29,8 @@
    Linus, and net-next is where the new code goes for the future release.
    You can find the trees here:
 
-	http://git.kernel.org/?p=linux/kernel/git/davem/net.git
-	http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
 
 Q: How often do changes from these trees make it to the mainline Linus tree?
 
@@ -76,7 +76,7 @@
 
 A: Load the mainline (Linus) page here:
 
-	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
+	https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 
    and note the top of the "tags" section.  If it is rc1, it is early
    in the dev cycle.  If it was tagged rc7 a week ago, then a release
@@ -123,7 +123,7 @@
 
    It contains the patches which Dave has selected, but not yet handed
    off to Greg.  If Greg already has the patch, then it will be here:
-	http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git
+	https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
 
    A quick way to find whether the patch is in this stable-queue is
    to simply clone the repo, and then git grep the mainline commit ID, e.g.
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 4fb51d3..399e4e8 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -33,24 +33,6 @@
 	If this option is enabled, the connection tracking code will
 	provide userspace with connection tracking events via ctnetlink.
 
-nf_conntrack_events_retry_timeout - INTEGER (seconds)
-	default 15
-
-	This option is only relevant when "reliable connection tracking
-	events" are used.  Normally, ctnetlink is "lossy", that is,
-	events are normally dropped when userspace listeners can't keep up.
-
-	Userspace can request "reliable event mode".  When this mode is
-	active, the conntrack will only be destroyed after the event was
-	delivered.  If event delivery fails, the kernel periodically
-	re-tries to send the event to userspace.
-
-	This is the maximum interval the kernel should use when re-trying
-	to deliver the destroy event.
-
-	A higher number means there will be fewer delivery retries and it
-	will take longer for a backlog to be processed.
-
 nf_conntrack_expect_max - INTEGER
 	Maximum size of expectation table.  Default value is
 	nf_conntrack_buckets / 256. Minimum is 1.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 739db9a..6bbceb9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -777,6 +777,17 @@
 conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
 such as migration.
 
+When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
+set of bits that KVM can return in struct kvm_clock_data's flag member.
+
+The only flag defined now is KVM_CLOCK_TSC_STABLE.  If set, the returned
+value is the exact kvmclock value seen by all VCPUs at the instant
+when KVM_GET_CLOCK was called.  If clear, the returned value is simply
+CLOCK_MONOTONIC plus a constant offset; the offset can be modified
+with KVM_SET_CLOCK.  KVM will try to make all VCPUs follow this clock,
+but the exact value read by each VCPU could differ, because the host
+TSC is not stable.
+
 struct kvm_clock_data {
 	__u64 clock;  /* kvmclock current value */
 	__u32 flags;
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index f2491a8..e5dd9f4 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -4,7 +4,17 @@
 1. Acquisition Orders
 ---------------------
 
-(to be written)
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+  them together is quite rare.
+
+For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
+else is a leaf: no other lock is taken inside the critical sections.
 
 2: Exception
 ------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 1cd38a7..ad9b965 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1442,6 +1442,7 @@
 F:	arch/arm/configs/mvebu_*_defconfig
 
 ARM/Marvell Berlin SoC support
+M:	Jisheng Zhang <jszhang@marvell.com>
 M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
@@ -2551,15 +2552,18 @@
 F:	drivers/net/ethernet/broadcom/genet/
 
 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
-M:	Sony Chacko <sony.chacko@qlogic.com>
-M:	Dept-HSGLinuxNICDev@qlogic.com
+M:	Rasesh Mody <rasesh.mody@cavium.com>
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2.*
 F:	drivers/net/ethernet/broadcom/bnx2_*
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Ariel Elior <ariel.elior@qlogic.com>
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <ariel.elior@cavium.com>
+M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2x/
@@ -2766,7 +2770,9 @@
 F:	drivers/scsi/bfa/
 
 BROCADE BNA 10 GIGABIT ETHERNET DRIVER
-M:	Rasesh Mody <rasesh.mody@qlogic.com>
+M:	Rasesh Mody <rasesh.mody@cavium.com>
+M:	Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/brocade/bna/
@@ -4620,8 +4626,9 @@
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
 M:	Matt Fleming <matt@codeblueprint.co.uk>
+M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
 L:	linux-efi@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 S:	Maintained
 F:	Documentation/efi-stub.txt
 F:	arch/ia64/kernel/efi.c
@@ -5286,6 +5293,12 @@
 S:	Maintained
 F:	scripts/get_maintainer.pl
 
+GENWQE (IBM Generic Workqueue Card)
+M:	Frank Haverkamp <haver@linux.vnet.ibm.com>
+M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+S:	Supported
+F:	drivers/misc/genwqe/
+
 GFS2 FILE SYSTEM
 M:	Steven Whitehouse <swhiteho@redhat.com>
 M:	Bob Peterson <rpeterso@redhat.com>
@@ -7071,6 +7084,7 @@
 LED SUBSYSTEM
 M:	Richard Purdie <rpurdie@rpsys.net>
 M:	Jacek Anaszewski <j.anaszewski@samsung.com>
+M:	Pavel Machek <pavel@ucw.cz>
 L:	linux-leds@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
 S:	Maintained
@@ -7912,6 +7926,10 @@
 MEMORY TECHNOLOGY DEVICES (MTD)
 M:	David Woodhouse <dwmw2@infradead.org>
 M:	Brian Norris <computersforpeace@gmail.com>
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+M:	Marek Vasut <marek.vasut@gmail.com>
+M:	Richard Weinberger <richard@nod.at>
+M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -8040,6 +8058,7 @@
 F:	include/linux/mlx4/
 
 MELLANOX MLX5 core VPI driver
+M:	Saeed Mahameed <saeedm@mellanox.com>
 M:	Matan Barak <matanb@mellanox.com>
 M:	Leon Romanovsky <leonro@mellanox.com>
 L:	netdev@vger.kernel.org
@@ -8099,6 +8118,7 @@
 F:	drivers/media/dvb-frontends/mn88473*
 
 MODULE SUPPORT
+M:	Jessica Yu <jeyu@redhat.com>
 M:	Rusty Russell <rusty@rustcorp.com.au>
 S:	Maintained
 F:	include/linux/module.h
@@ -8212,7 +8232,7 @@
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
 M:	Ulf Hansson <ulf.hansson@linaro.org>
 L:	linux-mmc@vger.kernel.org
-T:	git git://git.linaro.org/people/ulf.hansson/mmc.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/mmc/
 F:	drivers/mmc/
@@ -8508,11 +8528,10 @@
 F:	drivers/net/wireless/
 
 NETXEN (1/10) GbE SUPPORT
-M:	Manish Chopra <manish.chopra@qlogic.com>
-M:	Sony Chacko <sony.chacko@qlogic.com>
-M:	Rajesh Borundia <rajesh.borundia@qlogic.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Rahul Verma <rahul.verma@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
-W:	http://www.qlogic.com
 S:	Supported
 F:	drivers/net/ethernet/qlogic/netxen/
 
@@ -9299,7 +9318,7 @@
 F:	drivers/pci/host/*designware*
 
 PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE
-M:	Joao Pinto <jpinto@synopsys.com>
+M:	Jose Abreu <Jose.Abreu@synopsys.com>
 L:	linux-pci@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -9318,7 +9337,7 @@
 M:	Keith Busch <keith.busch@intel.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
-F:	arch/x86/pci/vmd.c
+F:	drivers/pci/host/vmd.c
 
 PCIE DRIVER FOR ST SPEAR13XX
 M:	Pratyush Anand <pratyush.anand@gmail.com>
@@ -9888,33 +9907,32 @@
 F:	drivers/scsi/qla4xxx/
 
 QLOGIC QLA3XXX NETWORK DRIVER
-M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
-M:	Ron Mercer <ron.mercer@qlogic.com>
-M:	linux-driver@qlogic.com
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Dept-GELinuxNICDev@qlogic.com
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlcnic/
 
 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:	Harish Patil <harish.patil@qlogic.com>
-M:	Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
-M:	Dept-GELinuxNICDev@qlogic.com
-M:	linux-driver@qlogic.com
+M:	Harish Patil <harish.patil@cavium.com>
+M:	Manish Chopra <manish.chopra@cavium.com>
+M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qlge/
 
 QLOGIC QL4xxx ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@qlogic.com>
-M:	Ariel Elior <Ariel.Elior@qlogic.com>
-M:	everest-linux-l2@qlogic.com
+M:	Yuval Mintz <Yuval.Mintz@cavium.com>
+M:	Ariel Elior <Ariel.Elior@cavium.com>
+M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qed/
@@ -11392,6 +11410,17 @@
 S:	Maintained
 F:	drivers/clk/spear/
 
+SPI NOR SUBSYSTEM
+M:	Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:	Marek Vasut <marek.vasut@gmail.com>
+L:	linux-mtd@lists.infradead.org
+W:	http://www.linux-mtd.infradead.org/
+Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:	git git://github.com/spi-nor/linux.git
+S:	Maintained
+F:	drivers/mtd/spi-nor/
+F:	include/linux/mtd/spi-nor.h
+
 SPI SUBSYSTEM
 M:	Mark Brown <broonie@kernel.org>
 L:	linux-spi@vger.kernel.org
@@ -12771,6 +12800,7 @@
 
 VIRTIO CORE, NET AND BLOCK DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
+M:	Jason Wang <jasowang@redhat.com>
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/virtio/
@@ -12801,6 +12831,7 @@
 
 VIRTIO HOST (VHOST)
 M:	"Michael S. Tsirkin" <mst@redhat.com>
+M:	Jason Wang <jasowang@redhat.com>
 L:	kvm@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	netdev@vger.kernel.org
diff --git a/Makefile b/Makefile
index 512e47a..9bc877d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc5
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -370,7 +370,7 @@
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 LDFLAGS_vmlinux =
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 CFLAGS_KCOV	:= $(call cc-option,-fsanitize-coverage=trace-pc,)
 
 
@@ -399,11 +399,12 @@
 		   -fno-strict-aliasing -fno-common \
 		   -Werror-implicit-function-declaration \
 		   -Wno-format-security \
-		   -std=gnu89
+		   -std=gnu89 $(call cc-option,-fno-PIE)
+
 
 KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
-KBUILD_AFLAGS   := -D__ASSEMBLY__
+KBUILD_AFLAGS   := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS_MODULE  := -DMODULE
 KBUILD_CFLAGS_MODULE  := -DMODULE
 KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
@@ -620,7 +621,6 @@
 include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
-KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
@@ -629,15 +629,18 @@
 endif
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os
+KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
-KBUILD_CFLAGS	+= -O2
+KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
 else
 KBUILD_CFLAGS   += -O2
 endif
 endif
 
+KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
+			$(call cc-disable-warning,maybe-uninitialized,))
+
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index d9ee817..940dfb4 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -157,14 +157,16 @@
 static inline int
 read_int(struct task_struct *task, unsigned long addr, int * data)
 {
-	int copied = access_process_vm(task, addr, data, sizeof(int), 0);
+	int copied = access_process_vm(task, addr, data, sizeof(int),
+			FOLL_FORCE);
 	return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
 static inline int
 write_int(struct task_struct *task, unsigned long addr, int data)
 {
-	int copied = access_process_vm(task, addr, &data, sizeof(int), 1);
+	int copied = access_process_vm(task, addr, &data, sizeof(int),
+			FOLL_FORCE | FOLL_WRITE);
 	return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
@@ -281,7 +283,8 @@
 	/* When I and D space are separate, these will need to be fixed.  */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */
 	case PTRACE_PEEKDATA:
-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
+				FOLL_FORCE);
 		ret = -EIO;
 		if (copied != sizeof(tmp))
 			break;
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ecd1237..bd204bf 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -41,6 +41,8 @@
 	select PERF_USE_VMALLOC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZMA
 
 config MIGHT_HAVE_PCI
 	bool
@@ -186,14 +188,6 @@
 config ARC_HAS_COH_CACHES
 	def_bool n
 
-config ARC_MCIP
-	bool "ARConnect Multicore IP (MCIP) Support "
-	depends on ISA_ARCV2
-	help
-	  This IP block enables SMP in ARC-HS38 cores.
-	  It provides for cross-core interrupts, multi-core debug
-	  hardware semaphores, shared memory,....
-
 config NR_CPUS
 	int "Maximum number of CPUs (2-4096)"
 	range 2 4096
@@ -211,6 +205,15 @@
 
 endif	#SMP
 
+config ARC_MCIP
+	bool "ARConnect Multicore IP (MCIP) Support "
+	depends on ISA_ARCV2
+	default y if SMP
+	help
+	  This IP block enables SMP in ARC-HS38 cores.
+	  It provides for cross-core interrupts, multi-core debug
+	  hardware semaphores, shared memory,....
+
 menuconfig ARC_CACHE
 	bool "Enable Cache Support"
 	default y
@@ -537,14 +540,6 @@
 	bool "Paranoia Checks in Low Level TLB Handlers"
 	default n
 
-config ARC_DBG_TLB_MISS_COUNT
-	bool "Profile TLB Misses"
-	default n
-	select DEBUG_FS
-	help
-	  Counts number of I and D TLB Misses and exports them via Debugfs
-	  The counters can be cleared via Debugfs as well
-
 endif
 
 config ARC_UBOOT_SUPPORT
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index aa82d13..19cce22 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -71,7 +71,9 @@
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
 # Note: No need to add to cflags-y as that happens anyways
-ARCH_CFLAGS += -O3
+#
+# Disable the false maybe-uninitialized warings gcc spits out at -O3
+ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,)
 endif
 
 # small data is default for elf32 tool-chain. If not usable, disable it
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index e597cb34..f94cf15 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -14,9 +14,15 @@
 
 suffix-y := bin
 suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
 
-targets += uImage uImage.bin uImage.gz
-extra-y += vmlinux.bin vmlinux.bin.gz
+targets += uImage
+targets += uImage.bin
+targets += uImage.gz
+targets += uImage.lzma
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.lzma
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
@@ -24,12 +30,18 @@
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,gzip)
 
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzma)
+
 $(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,uimage,none)
 
 $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,uimage,gzip)
 
+$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
+	$(call if_changed,uimage,lzma)
+
 $(obj)/uImage: $(obj)/uImage.$(suffix-y)
 	@ln -sf $(notdir $<) $@
 	@echo '  Image $@ is ready'
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 6ae2c47..53ce226 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -71,7 +71,7 @@
 			reg-io-width = <4>;
 		};
 
-		arcpmu0: pmu {
+		arcpct0: pct {
 			compatible = "snps,arc700-pct";
 		};
 	};
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index ce0ccd20..5ee96b0 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -69,7 +69,7 @@
 			};
 		};
 
-		arcpmu0: pmu {
+		arcpct0: pct {
 			compatible = "snps,arc700-pct";
 		};
 	};
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index bcf6031..3c391ba 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -83,5 +83,9 @@
 			reg = <0xf0003000 0x44>;
 			interrupts = <7>;
 		};
+
+		arcpct0: pct {
+			compatible = "snps,arc700-pct";
+		};
 	};
 };
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 7314f53..b0066a7 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -14,6 +14,7 @@
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index 65ab9fb..ebe9ebb 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -14,6 +14,7 @@
 CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 3b3990c..4bde432 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -12,6 +12,7 @@
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 98cf209..f6fb3d26 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -14,6 +14,7 @@
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index ddf8b96..b9f0fe0 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -14,6 +14,7 @@
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index ceb9074..6da71ba 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -10,6 +10,7 @@
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
@@ -34,7 +35,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -72,7 +72,6 @@
 # CONFIG_HWMON is not set
 CONFIG_DRM=y
 CONFIG_DRM_ARCPGU=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index db25c65..1bd24ec 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -43,12 +43,14 @@
 #define STATUS_AE_BIT		5	/* Exception active */
 #define STATUS_DE_BIT		6	/* PC is in delay slot */
 #define STATUS_U_BIT		7	/* User/Kernel mode */
+#define STATUS_Z_BIT            11
 #define STATUS_L_BIT		12	/* Loop inhibit */
 
 /* These masks correspond to the status word(STATUS_32) bits */
 #define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_DE_MASK		(1<<STATUS_DE_BIT)
 #define STATUS_U_MASK		(1<<STATUS_U_BIT)
+#define STATUS_Z_MASK		(1<<STATUS_Z_BIT)
 #define STATUS_L_MASK		(1<<STATUS_L_BIT)
 
 /*
@@ -349,10 +351,11 @@
 	struct cpuinfo_arc_bpu bpu;
 	struct bcr_identity core;
 	struct bcr_isa isa;
+	const char *details, *name;
 	unsigned int vec_base;
 	struct cpuinfo_arc_ccm iccm, dccm;
 	struct {
-		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
+		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
 			     fpu_sp:1, fpu_dp:1, pad2:6,
 			     debug:1, ap:1, smart:1, rtt:1, pad3:4,
 			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index fb781e3..b3410ff 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -53,7 +53,7 @@
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
 extern void read_decode_cache_bcr(void);
 
-extern int ioc_exists;
+extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index 7096f97..aa2d6da 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -54,7 +54,7 @@
  * the loader.  We need to make sure that it is out of the way of the program
  * that it will "exec", and that there is sufficient room for the brk.
  */
-#define ELF_ET_DYN_BASE		(2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE		(2UL * TASK_SIZE / 3)
 
 /*
  * When the program starts, a1 contains a pointer to a function to be
diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h
index 847e3bb..c8fbe41 100644
--- a/arch/arc/include/asm/mcip.h
+++ b/arch/arc/include/asm/mcip.h
@@ -55,6 +55,22 @@
 #define IDU_M_DISTRI_DEST		0x2
 };
 
+struct mcip_bcr {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad3:8,
+			     idu:1, llm:1, num_cores:6,
+			     iocoh:1,  gfrc:1, dbg:1, pad2:1,
+			     msg:1, sem:1, ipi:1, pad:1,
+			     ver:8;
+#else
+		unsigned int ver:8,
+			     pad:1, ipi:1, sem:1, msg:1,
+			     pad2:1, dbg:1, gfrc:1, iocoh:1,
+			     num_cores:6, llm:1, idu:1,
+			     pad3:8;
+#endif
+};
+
 /*
  * MCIP programming model
  *
diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h
index 518222b..6e91d8b 100644
--- a/arch/arc/include/asm/module.h
+++ b/arch/arc/include/asm/module.h
@@ -18,6 +18,7 @@
 struct mod_arch_specific {
 	void *unw_info;
 	int unw_sec_idx;
+	const char *secstr;
 };
 #endif
 
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 48b37c6..cb954cd 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -27,11 +27,6 @@
 	const char *str;
 };
 
-struct cpuinfo_data {
-	struct id_to_str info;
-	int up_range;
-};
-
 extern int root_mountflags, end_mem;
 
 void setup_processor(void);
@@ -43,5 +38,6 @@
 #define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
 #define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
 #define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
+#define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 89fdd1b..0861007 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -37,9 +37,9 @@
  * API expected BY platform smp code (FROM arch smp code)
  *
  * smp_ipi_irq_setup:
- *	Takes @cpu and @irq to which the arch-common ISR is hooked up
+ *	Takes @cpu and @hwirq to which the arch-common ISR is hooked up
  */
-extern int smp_ipi_irq_setup(int cpu, int irq);
+extern int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq);
 
 /*
  * struct plat_smp_ops	- SMP callbacks provided by platform to ARC SMP
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index e56f9fc..772b67c 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -17,6 +17,7 @@
 int sys_cacheflush(uint32_t, uint32_t uint32_t);
 int sys_arc_settls(void *);
 int sys_arc_gettls(void);
+int sys_arc_usr_cmpxchg(int *, int, int);
 
 #include <asm-generic/syscalls.h>
 
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 41fa2ec..9a34136 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -27,18 +27,19 @@
 
 #define NR_syscalls	__NR_syscalls
 
+/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
+#define __NR_sysfs		(__NR_arch_specific_syscall + 3)
+
 /* ARC specific syscall */
 #define __NR_cacheflush		(__NR_arch_specific_syscall + 0)
 #define __NR_arc_settls		(__NR_arch_specific_syscall + 1)
 #define __NR_arc_gettls		(__NR_arch_specific_syscall + 2)
+#define __NR_arc_usr_cmpxchg	(__NR_arch_specific_syscall + 4)
 
 __SYSCALL(__NR_cacheflush, sys_cacheflush)
 __SYSCALL(__NR_arc_settls, sys_arc_settls)
 __SYSCALL(__NR_arc_gettls, sys_arc_gettls)
-
-
-/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
-#define __NR_sysfs		(__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
 __SYSCALL(__NR_sysfs, sys_sysfs)
 
 #undef __SYSCALL
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index f1e07c2..3b67f53 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -31,6 +31,8 @@
 		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x) */
+	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
+		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
 	else
 		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 }
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 72f9179..f39142a 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -15,11 +15,12 @@
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
-static char smp_cpuinfo_buf[128];
-static int idu_detected;
-
 static DEFINE_RAW_SPINLOCK(mcip_lock);
 
+#ifdef CONFIG_SMP
+
+static char smp_cpuinfo_buf[128];
+
 static void mcip_setup_per_cpu(int cpu)
 {
 	smp_ipi_irq_setup(cpu, IPI_IRQ);
@@ -86,21 +87,7 @@
 
 static void mcip_probe_n_setup(void)
 {
-	struct mcip_bcr {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad3:8,
-			     idu:1, llm:1, num_cores:6,
-			     iocoh:1,  gfrc:1, dbg:1, pad2:1,
-			     msg:1, sem:1, ipi:1, pad:1,
-			     ver:8;
-#else
-		unsigned int ver:8,
-			     pad:1, ipi:1, sem:1, msg:1,
-			     pad2:1, dbg:1, gfrc:1, iocoh:1,
-			     num_cores:6, llm:1, idu:1,
-			     pad3:8;
-#endif
-	} mp;
+	struct mcip_bcr mp;
 
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
 
@@ -114,7 +101,6 @@
 		IS_AVAIL1(mp.gfrc, "GFRC"));
 
 	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
-	idu_detected = mp.idu;
 
 	if (mp.dbg) {
 		__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
@@ -130,6 +116,8 @@
 	.ipi_clear	= mcip_ipi_clear,
 };
 
+#endif
+
 /***************************************************************************
  * ARCv2 Interrupt Distribution Unit (IDU)
  *
@@ -193,6 +181,8 @@
 {
 	unsigned long flags;
 	cpumask_t online;
+	unsigned int destination_bits;
+	unsigned int distribution_mode;
 
 	/* errout if no online cpu per @cpumask */
 	if (!cpumask_and(&online, cpumask, cpu_online_mask))
@@ -200,8 +190,15 @@
 
 	raw_spin_lock_irqsave(&mcip_lock, flags);
 
-	idu_set_dest(data->hwirq, cpumask_bits(&online)[0]);
-	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+	destination_bits = cpumask_bits(&online)[0];
+	idu_set_dest(data->hwirq, destination_bits);
+
+	if (ffs(destination_bits) == fls(destination_bits))
+		distribution_mode = IDU_M_DISTRI_DEST;
+	else
+		distribution_mode = IDU_M_DISTRI_RR;
+
+	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode);
 
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 
@@ -219,16 +216,15 @@
 
 };
 
-static int idu_first_irq;
+static irq_hw_number_t idu_first_hwirq;
 
 static void idu_cascade_isr(struct irq_desc *desc)
 {
-	struct irq_domain *domain = irq_desc_get_handler_data(desc);
-	unsigned int core_irq = irq_desc_get_irq(desc);
-	unsigned int idu_irq;
+	struct irq_domain *idu_domain = irq_desc_get_handler_data(desc);
+	irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc));
+	irq_hw_number_t idu_hwirq = core_hwirq - idu_first_hwirq;
 
-	idu_irq = core_irq - idu_first_irq;
-	generic_handle_irq(irq_find_mapping(domain, idu_irq));
+	generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
 }
 
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
@@ -294,9 +290,12 @@
 	struct irq_domain *domain;
 	/* Read IDU BCR to confirm nr_irqs */
 	int nr_irqs = of_irq_count(intc);
-	int i, irq;
+	int i, virq;
+	struct mcip_bcr mp;
 
-	if (!idu_detected)
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	if (!mp.idu)
 		panic("IDU not detected, but DeviceTree using it");
 
 	pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs);
@@ -312,11 +311,11 @@
 		 * however we need it to get the parent virq and set IDU handler
 		 * as first level isr
 		 */
-		irq = irq_of_parse_and_map(intc, i);
+		virq = irq_of_parse_and_map(intc, i);
 		if (!i)
-			idu_first_irq = irq;
+			idu_first_hwirq = irqd_to_hwirq(irq_get_irq_data(virq));
 
-		irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain);
+		irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain);
 	}
 
 	__mcip_cmd(CMD_IDU_ENABLE, 0);
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
index 9a28497..42e964d 100644
--- a/arch/arc/kernel/module.c
+++ b/arch/arc/kernel/module.c
@@ -30,17 +30,9 @@
 			      char *secstr, struct module *mod)
 {
 #ifdef CONFIG_ARC_DW2_UNWIND
-	int i;
-
 	mod->arch.unw_sec_idx = 0;
 	mod->arch.unw_info = NULL;
-
-	for (i = 1; i < hdr->e_shnum; i++) {
-		if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) {
-			mod->arch.unw_sec_idx = i;
-			break;
-		}
-	}
+	mod->arch.secstr = secstr;
 #endif
 	return 0;
 }
@@ -59,29 +51,33 @@
 		       unsigned int relsec,	/* sec index for relo sec */
 		       struct module *module)
 {
-	int i, n;
+	int i, n, relo_type;
 	Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr;
 	Elf32_Sym *sym_entry, *sym_sec;
-	Elf32_Addr relocation;
-	Elf32_Addr location;
-	Elf32_Addr sec_to_patch;
-	int relo_type;
+	Elf32_Addr relocation, location, tgt_addr;
+	unsigned int tgtsec;
 
-	sec_to_patch = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	/*
+	 * @relsec has relocations e.g. .rela.init.text
+	 * @tgtsec is section to patch e.g. .init.text
+	 */
+	tgtsec = sechdrs[relsec].sh_info;
+	tgt_addr = sechdrs[tgtsec].sh_addr;
 	sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr;
 	n = sechdrs[relsec].sh_size / sizeof(*rel_entry);
 
-	pr_debug("\n========== Module Sym reloc ===========================\n");
-	pr_debug("Section to fixup %x\n", sec_to_patch);
+	pr_debug("\nSection to fixup %s @%x\n",
+		 module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr);
 	pr_debug("=========================================================\n");
-	pr_debug("rela->r_off | rela->addend | sym->st_value | ADDR | VALUE\n");
+	pr_debug("r_off\tr_add\tst_value ADDRESS  VALUE\n");
 	pr_debug("=========================================================\n");
 
 	/* Loop thru entries in relocation section */
 	for (i = 0; i < n; i++) {
+		const char *s;
 
 		/* This is where to make the change */
-		location = sec_to_patch + rel_entry[i].r_offset;
+		location = tgt_addr + rel_entry[i].r_offset;
 
 		/* This is the symbol it is referring to.  Note that all
 		   undefined symbols have been resolved.  */
@@ -89,10 +85,15 @@
 
 		relocation = sym_entry->st_value + rel_entry[i].r_addend;
 
-		pr_debug("\t%x\t\t%x\t\t%x  %x %x [%s]\n",
-			rel_entry[i].r_offset, rel_entry[i].r_addend,
-			sym_entry->st_value, location, relocation,
-			strtab + sym_entry->st_name);
+		if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) {
+			s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name;
+		} else {
+			s = strtab + sym_entry->st_name;
+		}
+
+		pr_debug("   %x\t%x\t%x %x %x [%s]\n",
+			 rel_entry[i].r_offset, rel_entry[i].r_addend,
+			 sym_entry->st_value, location, relocation, s);
 
 		/* This assumes modules are built with -mlong-calls
 		 * so any branches/jumps are absolute 32 bit jmps
@@ -111,6 +112,10 @@
 			goto relo_err;
 
 	}
+
+	if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0)
+		module->arch.unw_sec_idx = tgtsec;
+
 	return 0;
 
 relo_err:
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index be1972b..a41a79a 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -41,6 +41,41 @@
 	return task_thread_info(current)->thr_ptr;
 }
 
+SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
+{
+	struct pt_regs *regs = current_pt_regs();
+	int uval = -EFAULT;
+
+	/*
+	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
+	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
+	 * And this also helps reduce the overhead for serializing in
+	 * the UP case
+	 */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
+
+	/* Z indicates to userspace if operation succeded */
+	regs->status32 &= ~STATUS_Z_MASK;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	preempt_disable();
+
+	if (__get_user(uval, uaddr))
+		goto done;
+
+	if (uval == expected) {
+		if (!__put_user(new, uaddr))
+			regs->status32 |= STATUS_Z_MASK;
+	}
+
+done:
+	preempt_enable();
+
+	return uval;
+}
+
 void arch_cpu_idle(void)
 {
 	/* sleep, but enable all interrupts before committing */
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 3df7f9c..0385df7 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -40,6 +40,29 @@
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
+static const struct id_to_str arc_cpu_rel[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+	{ 0x34, "R4.10"},
+	{ 0x35, "R4.11"},
+#else
+	{ 0x51, "R2.0" },
+	{ 0x52, "R2.1" },
+	{ 0x53, "R3.0" },
+#endif
+	{ 0x00, NULL   }
+};
+
+static const struct id_to_str arc_cpu_nm[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+	{ 0x20, "ARC 600"   },
+	{ 0x30, "ARC 770"   },  /* 750 identified seperately */
+#else
+	{ 0x40, "ARC EM"  },
+	{ 0x50, "ARC HS38"  },
+#endif
+	{ 0x00, "Unknown"   }
+};
+
 static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
 {
 	if (is_isa_arcompact()) {
@@ -92,11 +115,26 @@
 	struct bcr_timer timer;
 	struct bcr_generic bcr;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	const struct id_to_str *tbl;
+
 	FIX_PTR(cpu);
 
 	READ_BCR(AUX_IDENTITY, cpu->core);
 	READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
 
+	for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) {
+		if (cpu->core.family == tbl->id) {
+			cpu->details = tbl->str;
+			break;
+		}
+	}
+
+	for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) {
+		if ((cpu->core.family & 0xF0) == tbl->id)
+			break;
+	}
+	cpu->name = tbl->str;
+
 	READ_BCR(ARC_REG_TIMERS_BCR, timer);
 	cpu->extn.timer0 = timer.t0;
 	cpu->extn.timer1 = timer.t1;
@@ -111,6 +149,9 @@
 	cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
 	cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
 	cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
+	cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 :
+				IS_ENABLED(CONFIG_ARC_HAS_SWAPE);
+
 	READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
 
 	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
@@ -160,64 +201,38 @@
 	cpu->extn.rtt = bcr.ver ? 1 : 0;
 
 	cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
+
+	/* some hacks for lack of feature BCR info in old ARC700 cores */
+	if (is_isa_arcompact()) {
+		if (!cpu->isa.ver)	/* ISA BCR absent, use Kconfig info */
+			cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+		else
+			cpu->isa.atomic = cpu->isa.atomic1;
+
+		cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+
+		 /* there's no direct way to distinguish 750 vs. 770 */
+		if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
+			cpu->name = "ARC750";
+	}
 }
 
-static const struct cpuinfo_data arc_cpu_tbl[] = {
-#ifdef CONFIG_ISA_ARCOMPACT
-	{ {0x20, "ARC 600"      }, 0x2F},
-	{ {0x30, "ARC 700"      }, 0x33},
-	{ {0x34, "ARC 700 R4.10"}, 0x34},
-	{ {0x35, "ARC 700 R4.11"}, 0x35},
-#else
-	{ {0x50, "ARC HS38 R2.0"}, 0x51},
-	{ {0x52, "ARC HS38 R2.1"}, 0x52},
-	{ {0x53, "ARC HS38 R3.0"}, 0x53},
-#endif
-	{ {0x00, NULL		} }
-};
-
-
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
 	struct bcr_identity *core = &cpu->core;
-	const struct cpuinfo_data *tbl;
-	char *isa_nm;
-	int i, be, atomic;
-	int n = 0;
+	int i, n = 0;
 
 	FIX_PTR(cpu);
 
-	if (is_isa_arcompact()) {
-		isa_nm = "ARCompact";
-		be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-
-		atomic = cpu->isa.atomic1;
-		if (!cpu->isa.ver)	/* ISA BCR absent, use Kconfig info */
-			atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-	} else {
-		isa_nm = "ARCv2";
-		be = cpu->isa.be;
-		atomic = cpu->isa.atomic;
-	}
-
 	n += scnprintf(buf + n, len - n,
 		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
 		       core->family, core->cpu_id, core->chip_id);
 
-	for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) {
-		if ((core->family >= tbl->info.id) &&
-		    (core->family <= tbl->up_range)) {
-			n += scnprintf(buf + n, len - n,
-				       "processor [%d]\t: %s (%s ISA) %s\n",
-				       cpu_id, tbl->info.str, isa_nm,
-				       IS_AVAIL1(be, "[Big-Endian]"));
-			break;
-		}
-	}
-
-	if (tbl->info.id == 0)
-		n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n");
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n",
+		       cpu_id, cpu->name, cpu->details,
+		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
+		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"));
 
 	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
 		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
@@ -226,7 +241,7 @@
 				 CONFIG_ARC_HAS_RTC));
 
 	n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
-			   IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+			   IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
 			   IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
 			   IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
 
@@ -253,7 +268,7 @@
 		       IS_AVAIL1(cpu->extn.swap, "swap "),
 		       IS_AVAIL1(cpu->extn.minmax, "minmax "),
 		       IS_AVAIL1(cpu->extn.crc, "crc "),
-		       IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE));
+		       IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE));
 
 	if (cpu->bpu.ver)
 		n += scnprintf(buf + n, len - n,
@@ -272,9 +287,7 @@
 
 	FIX_PTR(cpu);
 
-	n += scnprintf(buf + n, len - n,
-		       "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n",
-		       cpu->vec_base, perip_base, perip_end);
+	n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
 
 	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
 		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
@@ -507,7 +520,7 @@
 	 * way to pass it w/o having to kmalloc/free a 2 byte string.
 	 * Encode cpu-id as 0xFFcccc, which is decoded by show routine.
 	 */
-	return *pos < num_possible_cpus() ? cpu_to_ptr(*pos) : NULL;
+	return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index f183cc6..88674d9 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/reboot.h>
+#include <linux/irqdomain.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
@@ -67,11 +68,13 @@
 	int i;
 
 	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
+	 * if platform didn't set the present map already, do it now
+	 * boot cpu is set to present already by init/main.c
 	 */
-	for (i = 0; i < max_cpus; i++)
-		set_cpu_present(i, true);
+	if (num_present_cpus() <= 1) {
+		for (i = 0; i < max_cpus; i++)
+			set_cpu_present(i, true);
+	}
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -351,20 +354,24 @@
  */
 static DEFINE_PER_CPU(int, ipi_dev);
 
-int smp_ipi_irq_setup(int cpu, int irq)
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
 {
 	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+	unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+	if (!virq)
+		panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
 
 	/* Boot cpu calls request, all call enable */
 	if (!cpu) {
 		int rc;
 
-		rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev);
+		rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
 		if (rc)
-			panic("Percpu IRQ request failed for %d\n", irq);
+			panic("Percpu IRQ request failed for %u\n", virq);
 	}
 
-	enable_percpu_irq(irq, 0);
+	enable_percpu_irq(virq, 0);
 
 	return 0;
 }
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index f927b8d..c10390d 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -152,14 +152,17 @@
 		cycle_t  full;
 	} stamp;
 
-
-	__asm__ __volatile(
-	"1:						\n"
-	"	lr		%0, [AUX_RTC_LOW]	\n"
-	"	lr		%1, [AUX_RTC_HIGH]	\n"
-	"	lr		%2, [AUX_RTC_CTRL]	\n"
-	"	bbit0.nt	%2, 31, 1b		\n"
-	: "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+	/*
+	 * hardware has an internal state machine which tracks readout of
+	 * low/high and updates the CTRL.status if
+	 *  - interrupt/exception taken between the two reads
+	 *  - high increments after low has been read
+	 */
+	do {
+		stamp.low = read_aux_reg(AUX_RTC_LOW);
+		stamp.high = read_aux_reg(AUX_RTC_HIGH);
+		status = read_aux_reg(AUX_RTC_CTRL);
+	} while (!(status & _BITUL(31)));
 
 	return stamp.full;
 }
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 934150e..82f9bc8 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -237,113 +237,3 @@
 	if (!user_mode(regs))
 		show_stacktrace(current, regs);
 }
-
-#ifdef CONFIG_DEBUG_FS
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/debugfs.h>
-
-static struct dentry *test_dentry;
-static struct dentry *test_dir;
-static struct dentry *test_u32_dentry;
-
-static u32 clr_on_read = 1;
-
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-u32 numitlb, numdtlb, num_pte_not_present;
-
-static int fill_display_data(char *kbuf)
-{
-	size_t num = 0;
-	num += sprintf(kbuf + num, "I-TLB Miss %x\n", numitlb);
-	num += sprintf(kbuf + num, "D-TLB Miss %x\n", numdtlb);
-	num += sprintf(kbuf + num, "PTE not present %x\n", num_pte_not_present);
-
-	if (clr_on_read)
-		numitlb = numdtlb = num_pte_not_present = 0;
-
-	return num;
-}
-
-static int tlb_stats_open(struct inode *inode, struct file *file)
-{
-	file->private_data = (void *)__get_free_page(GFP_KERNEL);
-	return 0;
-}
-
-/* called on user read(): display the counters */
-static ssize_t tlb_stats_output(struct file *file,	/* file descriptor */
-				char __user *user_buf,	/* user buffer */
-				size_t len,		/* length of buffer */
-				loff_t *offset)		/* offset in the file */
-{
-	size_t num;
-	char *kbuf = (char *)file->private_data;
-
-	/* All of the data can he shoved in one iteration */
-	if (*offset != 0)
-		return 0;
-
-	num = fill_display_data(kbuf);
-
-	/* simple_read_from_buffer() is helper for copy to user space
-	   It copies up to @2 (num) bytes from kernel buffer @4 (kbuf) at offset
-	   @3 (offset) into the user space address starting at @1 (user_buf).
-	   @5 (len) is max size of user buffer
-	 */
-	return simple_read_from_buffer(user_buf, num, offset, kbuf, len);
-}
-
-/* called on user write : clears the counters */
-static ssize_t tlb_stats_clear(struct file *file, const char __user *user_buf,
-			       size_t length, loff_t *offset)
-{
-	numitlb = numdtlb = num_pte_not_present = 0;
-	return length;
-}
-
-static int tlb_stats_close(struct inode *inode, struct file *file)
-{
-	free_page((unsigned long)(file->private_data));
-	return 0;
-}
-
-static const struct file_operations tlb_stats_file_ops = {
-	.read = tlb_stats_output,
-	.write = tlb_stats_clear,
-	.open = tlb_stats_open,
-	.release = tlb_stats_close
-};
-#endif
-
-static int __init arc_debugfs_init(void)
-{
-	test_dir = debugfs_create_dir("arc", NULL);
-
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-	test_dentry = debugfs_create_file("tlb_stats", 0444, test_dir, NULL,
-					  &tlb_stats_file_ops);
-#endif
-
-	test_u32_dentry =
-	    debugfs_create_u32("clr_on_read", 0444, test_dir, &clr_on_read);
-
-	return 0;
-}
-
-module_init(arc_debugfs_init);
-
-static void __exit arc_debugfs_exit(void)
-{
-	debugfs_remove(test_u32_dentry);
-	debugfs_remove(test_dentry);
-	debugfs_remove(test_dir);
-}
-module_exit(arc_debugfs_exit);
-
-#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 97dddbe..2b96cfc 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -22,8 +22,8 @@
 #include <asm/setup.h>
 
 static int l2_line_sz;
-int ioc_exists;
-volatile int slc_enable = 1, ioc_enable = 1;
+static int ioc_exists;
+int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
@@ -53,18 +53,15 @@
 	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
 	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
 
-	if (!is_isa_arcv2())
-                return buf;
-
 	p = &cpuinfo_arc700[c].slc;
 	if (p->ver)
 		n += scnprintf(buf + n, len - n,
 			       "SLC\t\t: %uK, %uB Line%s\n",
 			       p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
 
-	if (ioc_exists)
-		n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
-				IS_DISABLED_RUN(ioc_enable));
+	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+		       perip_base,
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency "));
 
 	return buf;
 }
@@ -113,8 +110,10 @@
 	}
 
 	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
-	if (cbcr.c && ioc_enable)
+	if (cbcr.c)
 		ioc_exists = 1;
+	else
+		ioc_enable = 0;
 
 	/* HS 2.0 didn't have AUX_VOL */
 	if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1002,7 +1001,7 @@
 			read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
 	}
 
-	if (is_isa_arcv2() && ioc_exists) {
+	if (is_isa_arcv2() && ioc_enable) {
 		/* IO coherency base - 0x8z */
 		write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
 		/* IO coherency aperture size - 512Mb: 0x8z-0xAz */
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 20afc65..cd8aad8 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -45,7 +45,7 @@
 	 *   -For coherent data, Read/Write to buffers terminate early in cache
 	 *   (vs. always going to memory - thus are faster)
 	 */
-	if ((is_isa_arcv2() && ioc_exists) ||
+	if ((is_isa_arcv2() && ioc_enable) ||
 	    (attrs & DMA_ATTR_NON_CONSISTENT))
 		need_coh = 0;
 
@@ -97,7 +97,7 @@
 	int is_non_coh = 1;
 
 	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
-			(is_isa_arcv2() && ioc_exists);
+			(is_isa_arcv2() && ioc_enable);
 
 	if (PageHighMem(page) || !is_non_coh)
 		iounmap((void __force __iomem *)vaddr);
@@ -105,6 +105,31 @@
 	__free_pages(page, get_order(size));
 }
 
+static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+			void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			unsigned long attrs)
+{
+	unsigned long user_count = vma_pages(vma);
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr));
+	unsigned long off = vma->vm_pgoff;
+	int ret = -ENXIO;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < count && user_count <= (count - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      user_count << PAGE_SHIFT,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
 /*
  * streaming DMA Mapping API...
  * CPU accesses page via normal paddr, thus needs to explicitly made
@@ -193,6 +218,7 @@
 struct dma_map_ops arc_dma_ops = {
 	.alloc			= arc_dma_alloc,
 	.free			= arc_dma_free,
+	.mmap			= arc_dma_mmap,
 	.map_page		= arc_dma_map_page,
 	.map_sg			= arc_dma_map_sg,
 	.sync_single_for_device	= arc_dma_sync_single_for_device,
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index ec868a9..bdb295e 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -793,16 +793,16 @@
 	char super_pg[64] = "";
 
 	if (p_mmu->s_pg_sz_m)
-		scnprintf(super_pg, 64, "%dM Super Page%s, ",
+		scnprintf(super_pg, 64, "%dM Super Page %s",
 			  p_mmu->s_pg_sz_m,
 			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
+		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
 		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
+		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
 
 	return buf;
 }
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index f1967ee..b30e4e3 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -237,15 +237,6 @@
 
 2:
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-	and.f 0, r0, _PAGE_PRESENT
-	bz   1f
-	ld   r3, [num_pte_not_present]
-	add  r3, r3, 1
-	st   r3, [num_pte_not_present]
-1:
-#endif
-
 .endm
 
 ;-----------------------------------------------------------------
@@ -309,12 +300,6 @@
 
 	TLBMISS_FREEUP_REGS
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-	ld  r0, [@numitlb]
-	add r0, r0, 1
-	st  r0, [@numitlb]
-#endif
-
 	;----------------------------------------------------------------
 	; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
 	LOAD_FAULT_PTE
@@ -349,12 +334,6 @@
 
 	TLBMISS_FREEUP_REGS
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-	ld  r0, [@numdtlb]
-	add r0, r0, 1
-	st  r0, [@numdtlb]
-#endif
-
 	;----------------------------------------------------------------
 	; Get the PTE corresponding to V-addr accessed
 	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c
index 5e901f8..56a4c85 100644
--- a/arch/arc/plat-eznps/smp.c
+++ b/arch/arc/plat-eznps/smp.c
@@ -140,16 +140,10 @@
 	mtm_enable_core(cpu);
 }
 
-static void eznps_ipi_clear(int irq)
-{
-	write_aux_reg(CTOP_AUX_IACK, 1 << irq);
-}
-
 struct plat_smp_ops plat_smp_ops = {
 	.info		= smp_cpuinfo_buf,
 	.init_early_smp	= eznps_init_cpumasks,
 	.cpu_kick	= eznps_smp_wakeup_cpu,
 	.ipi_send	= eznps_ipi_send,
 	.init_per_cpu	= eznps_init_per_cpu,
-	.ipi_clear	= eznps_ipi_clear,
 };
diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts
index dec4b07..3799396 100644
--- a/arch/arm/boot/dts/imx53-qsb.dts
+++ b/arch/arm/boot/dts/imx53-qsb.dts
@@ -64,8 +64,8 @@
 			};
 
 			ldo3_reg: ldo3 {
-				regulator-min-microvolt = <600000>;
-				regulator-max-microvolt = <1800000>;
+				regulator-min-microvolt = <1725000>;
+				regulator-max-microvolt = <3300000>;
 				regulator-always-on;
 			};
 
@@ -76,8 +76,8 @@
 			};
 
 			ldo5_reg: ldo5 {
-				regulator-min-microvolt = <1725000>;
-				regulator-max-microvolt = <3300000>;
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3600000>;
 				regulator-always-on;
 			};
 
@@ -100,14 +100,14 @@
 			};
 
 			ldo9_reg: ldo9 {
-				regulator-min-microvolt = <1200000>;
+				regulator-min-microvolt = <1250000>;
 				regulator-max-microvolt = <3600000>;
 				regulator-always-on;
 			};
 
 			ldo10_reg: ldo10 {
-				regulator-min-microvolt = <1250000>;
-				regulator-max-microvolt = <3650000>;
+				regulator-min-microvolt = <1200000>;
+				regulator-max-microvolt = <3600000>;
 				regulator-always-on;
 			};
 		};
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 0ff1c2d..26cce4d 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -13,6 +13,11 @@
 		};
 	};
 
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0>;
+	};
+
 	wl12xx_vmmc: wl12xx_vmmc {
 		compatible = "regulator-fixed";
 		regulator-name = "vwl1271";
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 731ec37..8f9a69c 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -13,9 +13,9 @@
 		};
 	};
 
-	memory@0 {
+	memory@80000000 {
 		device_type = "memory";
-		reg = <0 0>;
+		reg = <0x80000000 0>;
 	};
 
 	leds {
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 6365635..4caadb2 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -124,6 +124,7 @@
 		compatible = "ti,abe-twl6040";
 		ti,model = "omap5-uevm";
 
+		ti,jack-detection;
 		ti,mclk-freq = <19200000>;
 
 		ti,mcpdm = <&mcpdm>;
@@ -415,7 +416,7 @@
 			ti,backup-battery-charge-high-current;
 		};
 
-		gpadc {
+		gpadc: gpadc {
 			compatible = "ti,palmas-gpadc";
 			interrupts = <18 0
 				      16 0
@@ -475,8 +476,8 @@
 				smps6_reg: smps6 {
 					/* VDD_DDR3 - over VDD_SMPS6 */
 					regulator-name = "smps6";
-					regulator-min-microvolt = <1200000>;
-					regulator-max-microvolt = <1200000>;
+					regulator-min-microvolt = <1350000>;
+					regulator-max-microvolt = <1350000>;
 					regulator-always-on;
 					regulator-boot-on;
 				};
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index b3df1c6..386eee6 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -239,14 +239,25 @@
 			arm,primecell-periphid = <0x10480180>;
 			max-frequency = <100000000>;
 			bus-width = <4>;
+			cap-sd-highspeed;
 			cap-mmc-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			/* All direction control is used */
+			st,sig-dir-cmd;
+			st,sig-dir-dat0;
+			st,sig-dir-dat2;
+			st,sig-dir-dat31;
+			st,sig-pin-fbclk;
+			full-pwr-cycle;
 			vmmc-supply = <&ab8500_ldo_aux3_reg>;
 			vqmmc-supply = <&vmmci>;
 			pinctrl-names = "default", "sleep";
 			pinctrl-0 = <&sdi0_default_mode>;
 			pinctrl-1 = <&sdi0_sleep_mode>;
 
-			cd-gpios  = <&gpio6 26 GPIO_ACTIVE_LOW>; // 218
+			/* GPIO218 MMC_CD */
+			cd-gpios  = <&gpio6 26 GPIO_ACTIVE_LOW>;
 
 			status = "okay";
 		};
@@ -549,7 +560,7 @@
 					/* VMMCI level-shifter enable */
 					snowball_cfg3 {
 						pins = "GPIO217_AH12";
-						ste,config = <&gpio_out_lo>;
+						ste,config = <&gpio_out_hi>;
 					};
 					/* VMMCI level-shifter voltage select */
 					snowball_cfg4 {
diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts
index ef2ff2f..7fb507f 100644
--- a/arch/arm/boot/dts/stih410-b2260.dts
+++ b/arch/arm/boot/dts/stih410-b2260.dts
@@ -74,7 +74,7 @@
 		/* Low speed expansion connector */
 		spi0: spi@9844000 {
 			label = "LS-SPI0";
-			cs-gpio = <&pio30 3 0>;
+			cs-gpios = <&pio30 3 0>;
 			status = "okay";
 		};
 
diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index 48fc24f..300a1bd 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -282,11 +282,15 @@
 			uart1_pins_a: uart1@0 {
 				allwinner,pins = "PG6", "PG7";
 				allwinner,function = "uart1";
+				allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+				allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
 			};
 
 			uart1_pins_cts_rts_a: uart1-cts-rts@0 {
 				allwinner,pins = "PG8", "PG9";
 				allwinner,function = "uart1";
+				allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+				allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
 			};
 
 			mmc0_pins_a: mmc0@0 {
diff --git a/arch/arm/boot/dts/uniphier-pro5.dtsi b/arch/arm/boot/dts/uniphier-pro5.dtsi
index 2c49c36..5357ea9 100644
--- a/arch/arm/boot/dts/uniphier-pro5.dtsi
+++ b/arch/arm/boot/dts/uniphier-pro5.dtsi
@@ -184,11 +184,11 @@
 };
 
 &mio_clk {
-	compatible = "socionext,uniphier-pro5-mio-clock";
+	compatible = "socionext,uniphier-pro5-sd-clock";
 };
 
 &mio_rst {
-	compatible = "socionext,uniphier-pro5-mio-reset";
+	compatible = "socionext,uniphier-pro5-sd-reset";
 };
 
 &peri_clk {
diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
index 8789cd5..950f07b 100644
--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
+++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
@@ -197,11 +197,11 @@
 };
 
 &mio_clk {
-	compatible = "socionext,uniphier-pxs2-mio-clock";
+	compatible = "socionext,uniphier-pxs2-sd-clock";
 };
 
 &mio_rst {
-	compatible = "socionext,uniphier-pxs2-mio-reset";
+	compatible = "socionext,uniphier-pxs2-sd-reset";
 };
 
 &peri_clk {
diff --git a/arch/arm/boot/dts/vf500.dtsi b/arch/arm/boot/dts/vf500.dtsi
index a3824e6..d7fdb2a 100644
--- a/arch/arm/boot/dts/vf500.dtsi
+++ b/arch/arm/boot/dts/vf500.dtsi
@@ -70,7 +70,7 @@
 			global_timer: timer@40002200 {
 				compatible = "arm,cortex-a9-global-timer";
 				reg = <0x40002200 0x20>;
-				interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+				interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 				interrupt-parent = <&intc>;
 				clocks = <&clks VF610_CLK_PLATFORM_BUS>;
 			};
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 437d074..11f37ed 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -850,6 +850,7 @@
 CONFIG_PWM_TEGRA=y
 CONFIG_PWM_VT8500=y
 CONFIG_PHY_HIX5HD2_SATA=y
+CONFIG_E1000E=y
 CONFIG_PWM_STI=y
 CONFIG_PWM_BCM2835=y
 CONFIG_PWM_BRCMSTB=m
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index d7ea6bc..8ef0538 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -66,6 +66,7 @@
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 2d19e02..d5423ab 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -57,6 +57,9 @@
 	/* VTTBR value associated with below pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* Timer */
 	struct arch_timer_kvm	timer;
 
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 343135e..5850890 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -71,6 +71,7 @@
 #define ICIALLUIS	__ACCESS_CP15(c7, 0, c1, 0)
 #define ATS1CPR		__ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS	__ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALL		__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS	__ACCESS_CP15(c8, 4, c3, 4)
 #define PRRR		__ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR		__ACCESS_CP15(c10, 0, c2, 1)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 03e9273..19b5f5c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -114,11 +114,18 @@
  */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-	int ret = 0;
+	int ret, cpu;
 
 	if (type)
 		return -EINVAL;
 
+	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+	if (!kvm->arch.last_vcpu_ran)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
 	ret = kvm_alloc_stage2_pgd(kvm);
 	if (ret)
 		goto out_fail_alloc;
@@ -141,6 +148,8 @@
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
 out_fail_alloc:
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
 	return ret;
 }
 
@@ -168,6 +177,9 @@
 {
 	int i;
 
+	free_percpu(kvm->arch.last_vcpu_ran);
+	kvm->arch.last_vcpu_ran = NULL;
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -312,6 +324,19 @@
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	int *last_ran;
+
+	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+
+	/*
+	 * We might get preempted before the vCPU actually runs, but
+	 * over-invalidation doesn't affect correctness.
+	 */
+	if (*last_ran != vcpu->vcpu_id) {
+		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+		*last_ran = vcpu->vcpu_id;
+	}
+
 	vcpu->cpu = cpu;
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
@@ -1312,6 +1337,13 @@
 		goto out_err;
 	}
 
+	err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
+	if (err) {
+		kvm_err("Cannot map bss section\n");
+		goto out_err;
+	}
+
 	/*
 	 * Map the Hyp stack pages
 	 */
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 7296528..6d810af 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -55,6 +55,21 @@
 	__kvm_tlb_flush_vmid(kvm);
 }
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+	/* Switch to requested VMID */
+	write_sysreg(kvm->arch.vttbr, VTTBR);
+	isb();
+
+	write_sysreg(0, TLBIALL);
+	dsb(nsh);
+	isb();
+
+	write_sysreg(0, VTTBR);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 {
 	write_sysreg(0, TLBIALLNSNHIS);
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 0df062d..b54db47 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -408,7 +408,7 @@
 static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 {
 	struct clk *clk;
-	int i;
+	int i, ret;
 
 	imx6q_pu_domain.reg = pu_reg;
 
@@ -430,13 +430,22 @@
 	if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
 		return 0;
 
-	pm_genpd_init(&imx6q_pu_domain.base, NULL, false);
-	return of_genpd_add_provider_onecell(dev->of_node,
-					     &imx_gpc_onecell_data);
+	for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++)
+		pm_genpd_init(imx_gpc_domains[i], NULL, false);
 
+	ret =  of_genpd_add_provider_onecell(dev->of_node,
+					     &imx_gpc_onecell_data);
+	if (ret)
+		goto power_off;
+
+	return 0;
+
+power_off:
+	imx6q_pm_pu_power_off(&imx6q_pu_domain.base);
 clk_err:
 	while (i--)
 		clk_put(imx6q_pu_domain.clk[i]);
+	imx6q_pu_domain.reg = NULL;
 	return -EINVAL;
 }
 
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 97fd251..45801b27 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -173,7 +173,7 @@
 				ksz9021rn_phy_fixup);
 		phy_register_fixup_for_uid(PHY_ID_KSZ9031, MICREL_PHY_ID_MASK,
 				ksz9031rn_phy_fixup);
-		phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffff,
+		phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffef,
 				ar8031_phy_fixup);
 		phy_register_fixup_for_uid(PHY_ID_AR8035, 0xffffffef,
 				ar8035_phy_fixup);
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index f9b6bd3..541647f 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -23,6 +23,7 @@
 	select CACHE_L2X0
 	select ARM_CPU_SUSPEND
 	select MACH_MVEBU_ANY
+	select MVEBU_CLK_COREDIV
 
 config MACH_ARMADA_370
 	bool "Marvell Armada 370 boards"
@@ -32,7 +33,6 @@
 	select CPU_PJ4B
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_370
-	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 370 SoC with device tree.
@@ -50,7 +50,6 @@
 	select HAVE_SMP
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_375
-	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 375 SoC with device tree.
@@ -68,7 +67,6 @@
 	select HAVE_SMP
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_38X
-	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 380/385 SoC with device tree.
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index a9afeeb..0465338 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -71,6 +71,7 @@
 	select HAVE_ARM_TWD
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
+	select OMAP_INTERCONNECT
 
 config SOC_DRA7XX
 	bool "TI DRA7XX"
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 2abd53a..cc6d9fa 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -205,11 +205,15 @@
 
 #define OMAP3_SHOW_FEATURE(feat)		\
 	if (omap3_has_ ##feat())		\
-		printk(#feat" ");
+		n += scnprintf(buf + n, sizeof(buf) - n, #feat " ");
 
 static void __init omap3_cpuinfo(void)
 {
 	const char *cpu_name;
+	char buf[64];
+	int n = 0;
+
+	memset(buf, 0, sizeof(buf));
 
 	/*
 	 * OMAP3430 and OMAP3530 are assumed to be same.
@@ -241,10 +245,10 @@
 		cpu_name = "OMAP3503";
 	}
 
-	sprintf(soc_name, "%s", cpu_name);
+	scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name);
 
 	/* Print verbose information */
-	pr_info("%s %s (", soc_name, soc_rev);
+	n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev);
 
 	OMAP3_SHOW_FEATURE(l2cache);
 	OMAP3_SHOW_FEATURE(iva);
@@ -252,8 +256,10 @@
 	OMAP3_SHOW_FEATURE(neon);
 	OMAP3_SHOW_FEATURE(isp);
 	OMAP3_SHOW_FEATURE(192mhz_clk);
-
-	printk(")\n");
+	if (*(buf + n - 1) == ' ')
+		n--;
+	n += scnprintf(buf + n, sizeof(buf) - n, ")\n");
+	pr_info("%s", buf);
 }
 
 #define OMAP3_CHECK_FEATURE(status,feat)				\
diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c
index 62680aa..718981b 100644
--- a/arch/arm/mach-omap2/prm3xxx.c
+++ b/arch/arm/mach-omap2/prm3xxx.c
@@ -319,6 +319,9 @@
 	if (has_uart4) {
 		en_uart4_mask = OMAP3630_EN_UART4_MASK;
 		grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK;
+	} else {
+		en_uart4_mask = 0;
+		grpsel_uart4_mask = 0;
 	}
 
 	/* Enable wakeups in PER */
diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c
index cba8cad..cd15dbd 100644
--- a/arch/arm/mach-omap2/voltage.c
+++ b/arch/arm/mach-omap2/voltage.c
@@ -87,6 +87,12 @@
 		return -ENODATA;
 	}
 
+	if (!voltdm->volt_data) {
+		pr_err("%s: No voltage data defined for vdd_%s\n",
+			__func__, voltdm->name);
+		return -ENODATA;
+	}
+
 	/* Adjust voltage to the exact voltage from the OPP table */
 	for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) {
 		if (voltdm->volt_data[i].volt_nominal >= target_volt) {
diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig
index 82dddee..3930fbb 100644
--- a/arch/arm/mach-uniphier/Kconfig
+++ b/arch/arm/mach-uniphier/Kconfig
@@ -1,6 +1,7 @@
 config ARCH_UNIPHIER
 	bool "Socionext UniPhier SoCs"
 	depends on ARCH_MULTI_V7
+	select ARCH_HAS_RESET_CONTROLLER
 	select ARM_AMBA
 	select ARM_GLOBAL_TIMER
 	select ARM_GIC
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 30398db..969ef88 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -915,7 +915,7 @@
 
 config RANDOMIZE_MODULE_REGION_FULL
 	bool "Randomize the module region independently from the core kernel"
-	depends on RANDOMIZE_BASE
+	depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
 	default y
 	help
 	  Randomizes the location of the module region without considering the
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index cfbdf02..101794f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -190,6 +190,7 @@
 
 config ARCH_UNIPHIER
 	bool "Socionext UniPhier SoC Family"
+	select ARCH_HAS_RESET_CONTROLLER
 	select PINCTRL
 	help
 	  This enables support for Socionext UniPhier SoC family.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index ab51aed..3635b86 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,7 +15,7 @@
 GZFLAGS		:=-9
 
 ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux		+= -pie -Bsymbolic
+LDFLAGS_vmlinux		+= -pie -shared -Bsymbolic
 endif
 
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
index 2d7872a..b09f3bc 100644
--- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts
+++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
@@ -164,6 +164,8 @@
 		nand-ecc-mode = "hw";
 		nand-ecc-strength = <8>;
 		nand-ecc-step-size = <512>;
+		nand-bus-width = <16>;
+		brcm,nand-oob-sector-size = <16>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 	};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index 220ac70..97d331e 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -123,6 +123,7 @@
 			     <1 14 0xf08>, /* Physical Non-Secure PPI */
 			     <1 11 0xf08>, /* Virtual PPI */
 			     <1 10 0xf08>; /* Hypervisor PPI */
+		fsl,erratum-a008585;
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index 337da90..7f0dc13 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -195,6 +195,7 @@
 			     <1 14 4>, /* Physical Non-Secure PPI, active-low */
 			     <1 11 4>, /* Virtual PPI, active-low */
 			     <1 10 4>; /* Hypervisor PPI, active-low */
+		fsl,erratum-a008585;
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index c476253..e9bd587 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -105,7 +105,7 @@
 				status = "disabled";
 			};
 
-			nb_perih_clk: nb-periph-clk@13000{
+			nb_periph_clk: nb-periph-clk@13000 {
 				compatible = "marvell,armada-3700-periph-clock-nb";
 				reg = <0x13000 0x100>;
 				clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
@@ -113,7 +113,7 @@
 				#clock-cells = <1>;
 			};
 
-			sb_perih_clk: sb-periph-clk@18000{
+			sb_periph_clk: sb-periph-clk@18000 {
 				compatible = "marvell,armada-3700-periph-clock-sb";
 				reg = <0x18000 0x100>;
 				clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index e5e3ed6..602e2c2 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -131,7 +131,7 @@
 				#address-cells = <0x1>;
 				#size-cells = <0x0>;
 				cell-index = <1>;
-				clocks = <&cpm_syscon0 0 3>;
+				clocks = <&cpm_syscon0 1 21>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 842fb33..6bf9e24 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -130,8 +130,8 @@
 				reg = <0x700600 0x50>;
 				#address-cells = <0x1>;
 				#size-cells = <0x0>;
-				cell-index = <1>;
-				clocks = <&cps_syscon0 0 3>;
+				cell-index = <3>;
+				clocks = <&cps_syscon0 1 21>;
 				status = "disabled";
 			};
 
@@ -140,7 +140,7 @@
 				reg = <0x700680 0x50>;
 				#address-cells = <1>;
 				#size-cells = <0>;
-				cell-index = <2>;
+				cell-index = <4>;
 				clocks = <&cps_syscon0 1 21>;
 				status = "disabled";
 			};
diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts
index 46cdddf..e5eeca2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts
@@ -116,7 +116,6 @@
 	cap-mmc-highspeed;
 	clock-frequency = <150000000>;
 	disable-wp;
-	keep-power-in-suspend;
 	non-removable;
 	num-slots = <1>;
 	vmmc-supply = <&vcc_io>;
@@ -258,8 +257,6 @@
 			};
 
 			vcc_sd: SWITCH_REG1 {
-				regulator-always-on;
-				regulator-boot-on;
 				regulator-name = "vcc_sd";
 			};
 
diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts
index 5797933..ea0a8ec 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts
@@ -152,8 +152,6 @@
 		gpio = <&gpio3 11 GPIO_ACTIVE_LOW>;
 		regulator-min-microvolt = <1800000>;
 		regulator-max-microvolt = <3300000>;
-		regulator-always-on;
-		regulator-boot-on;
 		vin-supply = <&vcc_io>;
 	};
 
@@ -201,7 +199,6 @@
 	bus-width = <8>;
 	cap-mmc-highspeed;
 	disable-wp;
-	keep-power-in-suspend;
 	mmc-pwrseq = <&emmc_pwrseq>;
 	mmc-hs200-1_2v;
 	mmc-hs200-1_8v;
@@ -350,7 +347,6 @@
 	clock-freq-min-max = <400000 50000000>;
 	cap-sd-highspeed;
 	card-detect-delay = <200>;
-	keep-power-in-suspend;
 	num-slots = <1>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index b65c193..7afbfb0 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -300,8 +300,11 @@
 		ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x600000
 			  0x81000000 0x0 0xfa600000 0x0 0xfa600000 0x0 0x100000>;
 		resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-			 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-		reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+			 <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>,
+			 <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>,
+			 <&cru SRST_A_PCIE>;
+		reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+			      "pm", "pclk", "aclk";
 		status = "disabled";
 
 		pcie0_intc: interrupt-controller {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index 08fd7cf..56a1b2e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -257,18 +257,18 @@
 			reg = <0x59801000 0x400>;
 		};
 
-		mioctrl@59810000 {
-			compatible = "socionext,uniphier-mioctrl",
+		sdctrl@59810000 {
+			compatible = "socionext,uniphier-ld20-sdctrl",
 				     "simple-mfd", "syscon";
 			reg = <0x59810000 0x800>;
 
-			mio_clk: clock {
-				compatible = "socionext,uniphier-ld20-mio-clock";
+			sd_clk: clock {
+				compatible = "socionext,uniphier-ld20-sd-clock";
 				#clock-cells = <1>;
 			};
 
-			mio_rst: reset {
-				compatible = "socionext,uniphier-ld20-mio-reset";
+			sd_rst: reset {
+				compatible = "socionext,uniphier-ld20-sd-reset";
 				#reset-cells = <1>;
 			};
 		};
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 39feb85..6e1cb8c 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpucaps.h>
 #include <asm/insn.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
new file mode 100644
index 0000000..87b4465
--- /dev/null
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -0,0 +1,40 @@
+/*
+ * arch/arm64/include/asm/cpucaps.h
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPUCAPS_H
+#define __ASM_CPUCAPS_H
+
+#define ARM64_WORKAROUND_CLEAN_CACHE		0
+#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
+#define ARM64_HAS_SYSREG_GIC_CPUIF		3
+#define ARM64_HAS_PAN				4
+#define ARM64_HAS_LSE_ATOMICS			5
+#define ARM64_WORKAROUND_CAVIUM_23154		6
+#define ARM64_WORKAROUND_834220			7
+#define ARM64_HAS_NO_HW_PREFETCH		8
+#define ARM64_HAS_UAO				9
+#define ARM64_ALT_PAN_NOT_UAO			10
+#define ARM64_HAS_VIRT_HOST_EXTN		11
+#define ARM64_WORKAROUND_CAVIUM_27456		12
+#define ARM64_HAS_32BIT_EL0			13
+#define ARM64_HYP_OFFSET_LOW			14
+#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
+
+#define ARM64_NCAPS				16
+
+#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 758d74f..0bc0b1d 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -11,6 +11,7 @@
 
 #include <linux/jump_label.h>
 
+#include <asm/cpucaps.h>
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 
@@ -24,25 +25,6 @@
 #define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
 #define cpu_feature(x)		ilog2(HWCAP_ ## x)
 
-#define ARM64_WORKAROUND_CLEAN_CACHE		0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
-#define ARM64_WORKAROUND_845719			2
-#define ARM64_HAS_SYSREG_GIC_CPUIF		3
-#define ARM64_HAS_PAN				4
-#define ARM64_HAS_LSE_ATOMICS			5
-#define ARM64_WORKAROUND_CAVIUM_23154		6
-#define ARM64_WORKAROUND_834220			7
-#define ARM64_HAS_NO_HW_PREFETCH		8
-#define ARM64_HAS_UAO				9
-#define ARM64_ALT_PAN_NOT_UAO			10
-#define ARM64_HAS_VIRT_HOST_EXTN		11
-#define ARM64_WORKAROUND_CAVIUM_27456		12
-#define ARM64_HAS_32BIT_EL0			13
-#define ARM64_HYP_OFFSET_LOW			14
-#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
-
-#define ARM64_NCAPS				16
-
 #ifndef __ASSEMBLY__
 
 #include <linux/kernel.h>
@@ -94,7 +76,7 @@
 	u16 capability;
 	int def_scope;			/* default scope */
 	bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
-	void (*enable)(void *);		/* Called on all active CPUs */
+	int (*enable)(void *);		/* Called on all active CPUs */
 	union {
 		struct {	/* To be used for erratum handling only */
 			u32 midr_model;
diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h
index db0563c..f7865dd 100644
--- a/arch/arm64/include/asm/exec.h
+++ b/arch/arm64/include/asm/exec.h
@@ -18,6 +18,9 @@
 #ifndef __ASM_EXEC_H
 #define __ASM_EXEC_H
 
+#include <linux/sched.h>
+
 extern unsigned long arch_align_stack(unsigned long sp);
+void uao_thread_switch(struct task_struct *next);
 
 #endif	/* __ASM_EXEC_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 18f7465..ec3553eb 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -54,6 +54,7 @@
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fd9d5fd..f5ea0ba 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -178,11 +178,6 @@
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
 }
 
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
-{
-	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR);
-}
-
 static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
 {
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
@@ -203,6 +198,12 @@
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
 }
 
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
+		kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
+}
+
 static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
 {
 	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bd94e67..e505038 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -62,6 +62,9 @@
 	/* VTTBR value associated with above pgd and vmid */
 	u64    vttbr;
 
+	/* The last vcpu id that ran on each physical CPU */
+	int __percpu *last_vcpu_ran;
+
 	/* The maximum number of vCPUs depends on the used GIC model */
 	int max_vcpus;
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a79b969..6f72fe8 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -128,7 +128,7 @@
 	return v;
 }
 
-#define kern_hyp_va(v) 	(typeof(v))(__kern_hyp_va((unsigned long)(v)))
+#define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
 /*
  * We currently only support a 40bit IPA.
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 23acc00..fc756e2 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -5,7 +5,6 @@
 
 #include <linux/stringify.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
 
 #ifdef __ASSEMBLER__
 
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index ba62df8..b71086d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -217,7 +217,7 @@
 #define _virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #else
 #define __virt_to_pgoff(kaddr)	(((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
-#define __page_to_voff(kaddr)	(((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
+#define __page_to_voff(page)	(((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
 
 #define page_to_virt(page)	((void *)((__page_to_voff(page)) | PAGE_OFFSET))
 #define virt_to_page(vaddr)	((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e12af67..06ff7fd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -17,6 +17,7 @@
 #define __ASM_MODULE_H
 
 #include <asm-generic/module.h>
+#include <asm/memory.h>
 
 #define MODULE_ARCH_VERMAGIC	"aarch64"
 
@@ -32,6 +33,10 @@
 			  Elf64_Sym *sym);
 
 #ifdef CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_MODVERSIONS
+#define ARCH_RELOCATES_KCRCTAB
+#define reloc_start 		(kimage_vaddr - KIMAGE_VADDR)
+#endif
 extern u64 module_alloc_base;
 #else
 #define module_alloc_base	((u64)_etext - MODULES_VSIZE)
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 2fee2f5..5394c84 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -44,48 +44,44 @@
 									\
 	switch (size) {							\
 	case 1:								\
-		do {							\
-			asm ("//__per_cpu_" #op "_1\n"			\
-			"ldxrb	  %w[ret], %[ptr]\n"			\
+		asm ("//__per_cpu_" #op "_1\n"				\
+		"1:	ldxrb	  %w[ret], %[ptr]\n"			\
 			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-			"stxrb	  %w[loop], %w[ret], %[ptr]\n"		\
-			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
-			  [ptr] "+Q"(*(u8 *)ptr)			\
-			: [val] "Ir" (val));				\
-		} while (loop);						\
+		"	stxrb	  %w[loop], %w[ret], %[ptr]\n"		\
+		"	cbnz	  %w[loop], 1b"				\
+		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
+		  [ptr] "+Q"(*(u8 *)ptr)				\
+		: [val] "Ir" (val));					\
 		break;							\
 	case 2:								\
-		do {							\
-			asm ("//__per_cpu_" #op "_2\n"			\
-			"ldxrh	  %w[ret], %[ptr]\n"			\
+		asm ("//__per_cpu_" #op "_2\n"				\
+		"1:	ldxrh	  %w[ret], %[ptr]\n"			\
 			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-			"stxrh	  %w[loop], %w[ret], %[ptr]\n"		\
-			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
-			  [ptr]  "+Q"(*(u16 *)ptr)			\
-			: [val] "Ir" (val));				\
-		} while (loop);						\
+		"	stxrh	  %w[loop], %w[ret], %[ptr]\n"		\
+		"	cbnz	  %w[loop], 1b"				\
+		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
+		  [ptr]  "+Q"(*(u16 *)ptr)				\
+		: [val] "Ir" (val));					\
 		break;							\
 	case 4:								\
-		do {							\
-			asm ("//__per_cpu_" #op "_4\n"			\
-			"ldxr	  %w[ret], %[ptr]\n"			\
+		asm ("//__per_cpu_" #op "_4\n"				\
+		"1:	ldxr	  %w[ret], %[ptr]\n"			\
 			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-			"stxr	  %w[loop], %w[ret], %[ptr]\n"		\
-			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
-			  [ptr] "+Q"(*(u32 *)ptr)			\
-			: [val] "Ir" (val));				\
-		} while (loop);						\
+		"	stxr	  %w[loop], %w[ret], %[ptr]\n"		\
+		"	cbnz	  %w[loop], 1b"				\
+		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
+		  [ptr] "+Q"(*(u32 *)ptr)				\
+		: [val] "Ir" (val));					\
 		break;							\
 	case 8:								\
-		do {							\
-			asm ("//__per_cpu_" #op "_8\n"			\
-			"ldxr	  %[ret], %[ptr]\n"			\
+		asm ("//__per_cpu_" #op "_8\n"				\
+		"1:	ldxr	  %[ret], %[ptr]\n"			\
 			#asm_op " %[ret], %[ret], %[val]\n"		\
-			"stxr	  %w[loop], %[ret], %[ptr]\n"		\
-			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
-			  [ptr] "+Q"(*(u64 *)ptr)			\
-			: [val] "Ir" (val));				\
-		} while (loop);						\
+		"	stxr	  %w[loop], %[ret], %[ptr]\n"		\
+		"	cbnz	  %w[loop], 1b"				\
+		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
+		  [ptr] "+Q"(*(u64 *)ptr)				\
+		: [val] "Ir" (val));					\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
@@ -150,44 +146,40 @@
 
 	switch (size) {
 	case 1:
-		do {
-			asm ("//__percpu_xchg_1\n"
-			"ldxrb %w[ret], %[ptr]\n"
-			"stxrb %w[loop], %w[val], %[ptr]\n"
-			: [loop] "=&r"(loop), [ret] "=&r"(ret),
-			  [ptr] "+Q"(*(u8 *)ptr)
-			: [val] "r" (val));
-		} while (loop);
+		asm ("//__percpu_xchg_1\n"
+		"1:	ldxrb	%w[ret], %[ptr]\n"
+		"	stxrb	%w[loop], %w[val], %[ptr]\n"
+		"	cbnz	%w[loop], 1b"
+		: [loop] "=&r"(loop), [ret] "=&r"(ret),
+		  [ptr] "+Q"(*(u8 *)ptr)
+		: [val] "r" (val));
 		break;
 	case 2:
-		do {
-			asm ("//__percpu_xchg_2\n"
-			"ldxrh %w[ret], %[ptr]\n"
-			"stxrh %w[loop], %w[val], %[ptr]\n"
-			: [loop] "=&r"(loop), [ret] "=&r"(ret),
-			  [ptr] "+Q"(*(u16 *)ptr)
-			: [val] "r" (val));
-		} while (loop);
+		asm ("//__percpu_xchg_2\n"
+		"1:	ldxrh	%w[ret], %[ptr]\n"
+		"	stxrh	%w[loop], %w[val], %[ptr]\n"
+		"	cbnz	%w[loop], 1b"
+		: [loop] "=&r"(loop), [ret] "=&r"(ret),
+		  [ptr] "+Q"(*(u16 *)ptr)
+		: [val] "r" (val));
 		break;
 	case 4:
-		do {
-			asm ("//__percpu_xchg_4\n"
-			"ldxr %w[ret], %[ptr]\n"
-			"stxr %w[loop], %w[val], %[ptr]\n"
-			: [loop] "=&r"(loop), [ret] "=&r"(ret),
-			  [ptr] "+Q"(*(u32 *)ptr)
-			: [val] "r" (val));
-		} while (loop);
+		asm ("//__percpu_xchg_4\n"
+		"1:	ldxr	%w[ret], %[ptr]\n"
+		"	stxr	%w[loop], %w[val], %[ptr]\n"
+		"	cbnz	%w[loop], 1b"
+		: [loop] "=&r"(loop), [ret] "=&r"(ret),
+		  [ptr] "+Q"(*(u32 *)ptr)
+		: [val] "r" (val));
 		break;
 	case 8:
-		do {
-			asm ("//__percpu_xchg_8\n"
-			"ldxr %[ret], %[ptr]\n"
-			"stxr %w[loop], %[val], %[ptr]\n"
-			: [loop] "=&r"(loop), [ret] "=&r"(ret),
-			  [ptr] "+Q"(*(u64 *)ptr)
-			: [val] "r" (val));
-		} while (loop);
+		asm ("//__percpu_xchg_8\n"
+		"1:	ldxr	%[ret], %[ptr]\n"
+		"	stxr	%w[loop], %[val], %[ptr]\n"
+		"	cbnz	%w[loop], 1b"
+		: [loop] "=&r"(loop), [ret] "=&r"(ret),
+		  [ptr] "+Q"(*(u64 *)ptr)
+		: [val] "r" (val));
 		break;
 	default:
 		BUILD_BUG();
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 2065f46..38b6a2b 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -46,7 +46,15 @@
 #define	ARMV8_PMU_EVTYPE_MASK	0xc800ffff	/* Mask for writable bits */
 #define	ARMV8_PMU_EVTYPE_EVENT	0xffff		/* Mask for EVENT bits */
 
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR	0	/* Software increment event */
+/*
+ * PMUv3 event types: required events
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
 
 /*
  * Event filters for PMUv3
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index df2e53d..60e3482 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -188,8 +188,8 @@
 
 #endif
 
-void cpu_enable_pan(void *__unused);
-void cpu_enable_uao(void *__unused);
-void cpu_enable_cache_maint_trap(void *__unused);
+int cpu_enable_pan(void *__unused);
+int cpu_enable_uao(void *__unused);
+int cpu_enable_cache_maint_trap(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e8d46e8..6c80b36 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -286,7 +286,7 @@
 
 #define write_sysreg_s(v, r) do {					\
 	u64 __val = (u64)v;						\
-	asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val));	\
+	asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val));	\
 } while (0)
 
 static inline void config_sctlr_el1(u32 clear, u32 set)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index bcaf6fb..55d0adb 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -21,6 +21,7 @@
 /*
  * User space memory access functions
  */
+#include <linux/bitops.h>
 #include <linux/kasan-checks.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
@@ -102,6 +103,13 @@
 	flag;								\
 })
 
+/*
+ * When dealing with data aborts or instruction traps we may end up with
+ * a tagged userland pointer. Clear the tag to get a sane pointer to pass
+ * on to access_ok(), for instance.
+ */
+#define untagged_addr(addr)		sign_extend64(addr, 55)
+
 #define access_ok(type, addr, size)	__range_ok(addr, size)
 #define user_addr_max			get_fs
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 42ffdb54..b0988bb 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -280,35 +280,43 @@
 /*
  * Error-checking SWP macros implemented using ldxr{b}/stxr{b}
  */
-#define __user_swpX_asm(data, addr, res, temp, B)		\
+
+/* Arbitrary constant to ensure forward-progress of the LL/SC loop */
+#define __SWP_LL_SC_LOOPS	4
+
+#define __user_swpX_asm(data, addr, res, temp, temp2, B)	\
 	__asm__ __volatile__(					\
+	"	mov		%w3, %w7\n"			\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
 		    CONFIG_ARM64_PAN)				\
-	"0:	ldxr"B"		%w2, [%3]\n"			\
-	"1:	stxr"B"		%w0, %w1, [%3]\n"		\
+	"0:	ldxr"B"		%w2, [%4]\n"			\
+	"1:	stxr"B"		%w0, %w1, [%4]\n"		\
 	"	cbz		%w0, 2f\n"			\
-	"	mov		%w0, %w4\n"			\
+	"	sub		%w3, %w3, #1\n"			\
+	"	cbnz		%w3, 0b\n"			\
+	"	mov		%w0, %w5\n"			\
 	"	b		3f\n"				\
 	"2:\n"							\
 	"	mov		%w1, %w2\n"			\
 	"3:\n"							\
 	"	.pushsection	 .fixup,\"ax\"\n"		\
 	"	.align		2\n"				\
-	"4:	mov		%w0, %w5\n"			\
+	"4:	mov		%w0, %w6\n"			\
 	"	b		3b\n"				\
 	"	.popsection"					\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
 		CONFIG_ARM64_PAN)				\
-	: "=&r" (res), "+r" (data), "=&r" (temp)		\
-	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
+	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),		\
+	  "i" (__SWP_LL_SC_LOOPS)				\
 	: "memory")
 
-#define __user_swp_asm(data, addr, res, temp) \
-	__user_swpX_asm(data, addr, res, temp, "")
-#define __user_swpb_asm(data, addr, res, temp) \
-	__user_swpX_asm(data, addr, res, temp, "b")
+#define __user_swp_asm(data, addr, res, temp, temp2) \
+	__user_swpX_asm(data, addr, res, temp, temp2, "")
+#define __user_swpb_asm(data, addr, res, temp, temp2) \
+	__user_swpX_asm(data, addr, res, temp, temp2, "b")
 
 /*
  * Bit 22 of the instruction encoding distinguishes between
@@ -328,12 +336,12 @@
 	}
 
 	while (1) {
-		unsigned long temp;
+		unsigned long temp, temp2;
 
 		if (type == TYPE_SWPB)
-			__user_swpb_asm(*data, address, res, temp);
+			__user_swpb_asm(*data, address, res, temp, temp2);
 		else
-			__user_swp_asm(*data, address, res, temp);
+			__user_swp_asm(*data, address, res, temp, temp2);
 
 		if (likely(res != -EAGAIN) || signal_pending(current))
 			break;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0150394..b75e917 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -39,10 +39,11 @@
 		(arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
 }
 
-static void cpu_enable_trap_ctr_access(void *__unused)
+static int cpu_enable_trap_ctr_access(void *__unused)
 {
 	/* Clear SCTLR_EL1.UCT */
 	config_sctlr_el1(SCTLR_EL1_UCT, 0);
+	return 0;
 }
 
 #define MIDR_RANGE(model, min, max) \
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d577f26..c02504e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -19,7 +19,9 @@
 #define pr_fmt(fmt) "CPU features: " fmt
 
 #include <linux/bsearch.h>
+#include <linux/cpumask.h>
 #include <linux/sort.h>
+#include <linux/stop_machine.h>
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
@@ -941,7 +943,13 @@
 {
 	for (; caps->matches; caps++)
 		if (caps->enable && cpus_have_cap(caps->capability))
-			on_each_cpu(caps->enable, NULL, true);
+			/*
+			 * Use stop_machine() as it schedules the work allowing
+			 * us to modify PSTATE, instead of on_each_cpu() which
+			 * uses an IPI, giving us a PSTATE that disappears when
+			 * we return.
+			 */
+			stop_machine(caps->enable, NULL, cpu_online_mask);
 }
 
 /*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 427f6d3..332e331 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -586,8 +586,9 @@
 	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
 	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
-	msr	mdcr_el2, x0			// all PMU counters from EL1
 4:
+	csel	x0, xzr, x0, lt			// all PMU counters from EL1
+	msr	mdcr_el2, x0			// (if they exist)
 
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a9310a6..57ae9d9 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -31,17 +31,9 @@
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
- * Common event types.
+ * Common event types (some are defined in asm/perf_event.h).
  */
 
-/* Required events. */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
-
 /* At least one of the following is required. */
 #define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
 #define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 27b2f138..01753cd 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -49,6 +49,7 @@
 #include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
+#include <asm/exec.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
@@ -186,10 +187,19 @@
 	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
 	       regs->pc, lr, regs->pstate);
 	printk("sp : %016llx\n", sp);
-	for (i = top_reg; i >= 0; i--) {
+
+	i = top_reg;
+
+	while (i >= 0) {
 		printk("x%-2d: %016llx ", i, regs->regs[i]);
-		if (i % 2 == 0)
-			printk("\n");
+		i--;
+
+		if (i % 2 == 0) {
+			pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
+			i--;
+		}
+
+		pr_cont("\n");
 	}
 	printk("\n");
 }
@@ -301,7 +311,7 @@
 }
 
 /* Restore the UAO state depending on next's addr_limit */
-static void uao_thread_switch(struct task_struct *next)
+void uao_thread_switch(struct task_struct *next)
 {
 	if (IS_ENABLED(CONFIG_ARM64_UAO)) {
 		if (task_thread_info(next)->addr_limit == KERNEL_DS)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index b8799e7..1bec41b 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -135,7 +135,7 @@
 
 #ifdef CONFIG_KASAN
 	mov	x0, sp
-	bl	kasan_unpoison_remaining_stack
+	bl	kasan_unpoison_task_stack_below
 #endif
 
 	ldp	x19, x20, [x29, #16]
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d3f151c..8507703 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -544,6 +544,7 @@
 			return;
 		}
 		bootcpu_valid = true;
+		early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid));
 		return;
 	}
 
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index ad73414..bb0cd78 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -1,8 +1,11 @@
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
+#include <asm/exec.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
@@ -50,6 +53,14 @@
 	set_my_cpu_offset(per_cpu_offset(cpu));
 
 	/*
+	 * PSTATE was not saved over suspend/resume, re-enable any detected
+	 * features that might not have been set correctly.
+	 */
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
+			CONFIG_ARM64_PAN));
+	uao_thread_switch(current);
+
+	/*
 	 * Restore HW breakpoint registers to sane values
 	 * before debug exceptions are possibly reenabled
 	 * through local_dbg_restore.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5ff020f..c9986b3 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -428,24 +428,28 @@
 	force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
 }
 
-void cpu_enable_cache_maint_trap(void *__unused)
+int cpu_enable_cache_maint_trap(void *__unused)
 {
 	config_sctlr_el1(SCTLR_EL1_UCI, 0);
+	return 0;
 }
 
 #define __user_cache_maint(insn, address, res)			\
-	asm volatile (						\
-		"1:	" insn ", %1\n"				\
-		"	mov	%w0, #0\n"			\
-		"2:\n"						\
-		"	.pushsection .fixup,\"ax\"\n"		\
-		"	.align	2\n"				\
-		"3:	mov	%w0, %w2\n"			\
-		"	b	2b\n"				\
-		"	.popsection\n"				\
-		_ASM_EXTABLE(1b, 3b)				\
-		: "=r" (res)					\
-		: "r" (address), "i" (-EFAULT) )
+	if (untagged_addr(address) >= user_addr_max())		\
+		res = -EFAULT;					\
+	else							\
+		asm volatile (					\
+			"1:	" insn ", %1\n"			\
+			"	mov	%w0, #0\n"		\
+			"2:\n"					\
+			"	.pushsection .fixup,\"ax\"\n"	\
+			"	.align	2\n"			\
+			"3:	mov	%w0, %w2\n"		\
+			"	b	2b\n"			\
+			"	.popsection\n"			\
+			_ASM_EXTABLE(1b, 3b)			\
+			: "=r" (res)				\
+			: "r" (address), "i" (-EFAULT) )
 
 static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 9cc0ea7..88e2f2b 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -64,6 +64,21 @@
 	write_sysreg(0, vttbr_el2);
 }
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+	/* Switch to requested VMID */
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	asm volatile("tlbi vmalle1" : : );
+	dsb(nsh);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 {
 	dsb(ishst);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f302fdb..87e7e66 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -597,8 +597,14 @@
 
 			idx = ARMV8_PMU_CYCLE_IDX;
 		} else {
-			BUG();
+			return false;
 		}
+	} else if (r->CRn == 0 && r->CRm == 9) {
+		/* PMCCNTR */
+		if (pmu_access_event_counter_el0_disabled(vcpu))
+			return false;
+
+		idx = ARMV8_PMU_CYCLE_IDX;
 	} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
 		/* PMEVCNTRn_EL0 */
 		if (pmu_access_event_counter_el0_disabled(vcpu))
@@ -606,7 +612,7 @@
 
 		idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
 	} else {
-		BUG();
+		return false;
 	}
 
 	if (!pmu_counter_idx_valid(vcpu, idx))
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 53d9159..0f87883 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -29,7 +29,9 @@
 #include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
+#include <linux/preempt.h>
 
+#include <asm/bug.h>
 #include <asm/cpufeature.h>
 #include <asm/exception.h>
 #include <asm/debug-monitors.h>
@@ -670,9 +672,17 @@
 NOKPROBE_SYMBOL(do_debug_exception);
 
 #ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void *__unused)
+int cpu_enable_pan(void *__unused)
 {
+	/*
+	 * We modify PSTATE. This won't work from irq context as the PSTATE
+	 * is discarded once we return from the exception.
+	 */
+	WARN_ON_ONCE(in_interrupt());
+
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
+	asm(SET_PSTATE_PAN(1));
+	return 0;
 }
 #endif /* CONFIG_ARM64_PAN */
 
@@ -683,8 +693,9 @@
  * We need to enable the feature at runtime (instead of adding it to
  * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
  */
-void cpu_enable_uao(void *__unused)
+int cpu_enable_uao(void *__unused)
 {
 	asm(SET_PSTATE_UAO(1));
+	return 0;
 }
 #endif /* CONFIG_ARM64_UAO */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 21c489b..212c4d1 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -421,35 +421,35 @@
 
 	pr_notice("Virtual kernel memory layout:\n");
 #ifdef CONFIG_KASAN
-	pr_cont("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+	pr_notice("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 		MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
 #endif
-	pr_cont("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+	pr_notice("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 		MLM(MODULES_VADDR, MODULES_END));
-	pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+	pr_notice("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 		MLG(VMALLOC_START, VMALLOC_END));
-	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+	pr_notice("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(_text, _etext));
-	pr_cont("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+	pr_notice("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(__start_rodata, __init_begin));
-	pr_cont("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+	pr_notice("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(__init_begin, __init_end));
-	pr_cont("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+	pr_notice("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(_sdata, _edata));
-	pr_cont("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+	pr_notice("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(__bss_start, __bss_stop));
-	pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
+	pr_notice("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
 		MLK(FIXADDR_START, FIXADDR_TOP));
-	pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+	pr_notice("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 		MLM(PCI_IO_START, PCI_IO_END));
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-	pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
+	pr_notice("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
 		MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
-	pr_cont("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
+	pr_notice("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
 		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
 		    (unsigned long)virt_to_page(high_memory)));
 #endif
-	pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+	pr_notice("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 		MLM(__phys_to_virt(memblock_start_of_DRAM()),
 		    (unsigned long)high_memory));
 
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 778a985..4b32168 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -147,7 +147,7 @@
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-	return node_distance(from, to);
+	return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
@@ -223,8 +223,11 @@
 	void *nd;
 	int tnid;
 
-	pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
-		nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+	if (start_pfn < end_pfn)
+		pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+			start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+	else
+		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
 
 	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 8b8fe67..8d79286 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -271,7 +271,7 @@
 			case BFIN_MEM_ACCESS_CORE:
 			case BFIN_MEM_ACCESS_CORE_ONLY:
 				copied = access_process_vm(child, addr, &tmp,
-				                           to_copy, 0);
+							   to_copy, FOLL_FORCE);
 				if (copied)
 					break;
 
@@ -324,7 +324,8 @@
 			case BFIN_MEM_ACCESS_CORE:
 			case BFIN_MEM_ACCESS_CORE_ONLY:
 				copied = access_process_vm(child, addr, &data,
-				                           to_copy, 1);
+				                           to_copy,
+							   FOLL_FORCE | FOLL_WRITE);
 				break;
 			case BFIN_MEM_ACCESS_DMA:
 				if (safe_dma_memcpy(paddr, &data, to_copy))
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index b5698c8..0068fd4 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2722,7 +2722,6 @@
 	err = get_user_pages((unsigned long int)(oper.indata + prev_ix),
 			     noinpages,
 			     0,  /* read access only for in data */
-			     0, /* no force */
 			     inpages,
 			     NULL);
 
@@ -2736,8 +2735,7 @@
 	if (oper.do_cipher){
 		err = get_user_pages((unsigned long int)oper.cipher_outdata,
 				     nooutpages,
-				     1, /* write access for out data */
-				     0, /* no force */
+				     FOLL_WRITE, /* write access for out data */
 				     outpages,
 				     NULL);
 		up_read(&current->mm->mmap_sem);
@@ -3151,7 +3149,7 @@
 	printk("print_dma_descriptors start\n");
 
 	printk("iop:\n");
-	printk("\tsid: 0x%lld\n", iop->sid);
+	printk("\tsid: 0x%llx\n", iop->sid);
 
 	printk("\tcdesc_out: 0x%p\n", iop->cdesc_out);
 	printk("\tcdesc_in: 0x%p\n", iop->cdesc_in);
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index f085229..f0df654 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -147,7 +147,7 @@
 				/* The trampoline page is globally mapped, no page table to traverse.*/
 				tmp = *(unsigned long*)addr;
 			} else {
-				copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+				copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
 
 				if (copied != sizeof(tmp))
 					break;
@@ -279,7 +279,7 @@
   int opsize = 0;
 
   /* Read the opcode at pc (do what PTRACE_PEEKTEXT would do). */
-  copied = access_process_vm(child, pc, &opcode, sizeof(opcode), 0);
+  copied = access_process_vm(child, pc, &opcode, sizeof(opcode), FOLL_FORCE);
   if (copied != sizeof(opcode))
     return 0;
 
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index b408fe6..3cef068 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -31,7 +31,6 @@
 	int		   cpu;			/* cpu we're on */
 	int		   preempt_count;	/* 0 => preemptable, <0 => BUG */
 	mm_segment_t		addr_limit;
-	struct restart_block restart_block;
 };
 
 /*
@@ -44,9 +43,6 @@
 	.cpu =		0,			\
 	.preempt_count = INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
-	.restart_block	= {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index ad1f81f..7138303 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -79,7 +79,7 @@
 	unsigned int er0;
 
 	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+	current->restart_block.fn = do_no_restart_syscall;
 
 	/* restore passed registers */
 #define COPY(r)  do { err |= get_user(regs->r, &usc->sc_##r); } while (0)
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 09f8457..5ed0ea9 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -142,7 +142,7 @@
 	u64 virt_addr=simple_strtoull(buf, NULL, 16);
 	int ret;
 
-	ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL);
+	ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
 	if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
 		printk("Virtual address %lx is not existing.\n",virt_addr);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6f54d51..31aa8c0 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -453,7 +453,7 @@
 			return 0;
 		}
 	}
-	copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+	copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE);
 	if (copied != sizeof(ret))
 		return -EIO;
 	*val = ret;
@@ -489,7 +489,8 @@
 				*ia64_rse_skip_regs(krbs, regnum) = val;
 			}
 		}
-	} else if (access_process_vm(child, addr, &val, sizeof(val), 1)
+	} else if (access_process_vm(child, addr, &val, sizeof(val),
+				FOLL_FORCE | FOLL_WRITE)
 		   != sizeof(val))
 		return -EIO;
 	return 0;
@@ -543,7 +544,8 @@
 		ret = ia64_peek(child, sw, user_rbs_end, addr, &val);
 		if (ret < 0)
 			return ret;
-		if (access_process_vm(child, addr, &val, sizeof(val), 1)
+		if (access_process_vm(child, addr, &val, sizeof(val),
+				FOLL_FORCE | FOLL_WRITE)
 		    != sizeof(val))
 			return -EIO;
 	}
@@ -559,7 +561,8 @@
 
 	/* now copy word for word from user rbs to kernel rbs: */
 	for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
-		if (access_process_vm(child, addr, &val, sizeof(val), 0)
+		if (access_process_vm(child, addr, &val, sizeof(val),
+				FOLL_FORCE)
 				!= sizeof(val))
 			return -EIO;
 
@@ -1156,7 +1159,8 @@
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
 		/* read word at location addr */
-		if (access_process_vm(child, addr, &data, sizeof(data), 0)
+		if (access_process_vm(child, addr, &data, sizeof(data),
+				FOLL_FORCE)
 		    != sizeof(data))
 			return -EIO;
 		/* ensure return value is not mistaken for error code */
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 51f5e9a..c145605 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -493,7 +493,8 @@
 	int i;
 
 	for (i = 0; i < p->nr_trap; i++)
-		access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), 1);
+		access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]),
+				FOLL_FORCE | FOLL_WRITE);
 	p->nr_trap = 0;
 }
 
@@ -537,7 +538,8 @@
 	unsigned long next_insn, code;
 	unsigned long addr = next_pc & ~3;
 
-	if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), 0)
+	if (access_process_vm(child, addr, &next_insn, sizeof(next_insn),
+			FOLL_FORCE)
 	    != sizeof(next_insn)) {
 		return -1; /* error */
 	}
@@ -546,7 +548,8 @@
 	if (register_debug_trap(child, next_pc, next_insn, &code)) {
 		return -1; /* error */
 	}
-	if (access_process_vm(child, addr, &code, sizeof(code), 1)
+	if (access_process_vm(child, addr, &code, sizeof(code),
+			FOLL_FORCE | FOLL_WRITE)
 	    != sizeof(code)) {
 		return -1; /* error */
 	}
@@ -562,7 +565,8 @@
  	addr = (regs->bpc - 2) & ~3;
 	regs->bpc -= 2;
 	if (unregister_debug_trap(current, addr, &code)) {
-	    access_process_vm(current, addr, &code, sizeof(code), 1);
+	    access_process_vm(current, addr, &code, sizeof(code),
+		    FOLL_FORCE | FOLL_WRITE);
 	    invalidate_cache();
 	}
 }
@@ -589,7 +593,8 @@
 	/* Compute next pc.  */
 	pc = get_stack_long(child, PT_BPC);
 
-	if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0)
+	if (access_process_vm(child, pc&~3, &insn, sizeof(insn),
+			FOLL_FORCE)
 	    != sizeof(insn))
 		return;
 
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index fbf40d3..1a6bac7 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -263,7 +263,7 @@
 
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y) \
 		  VMLINUX_ENTRY_ADDRESS=$(entry-y) \
-		  PLATFORM=$(platform-y)
+		  PLATFORM="$(platform-y)"
 ifdef CONFIG_32BIT
 bootvars-y	+= ADDR_BITS=32
 endif
diff --git a/arch/mips/boot/dts/mti/malta.dts b/arch/mips/boot/dts/mti/malta.dts
index f604a27..ffe3a15 100644
--- a/arch/mips/boot/dts/mti/malta.dts
+++ b/arch/mips/boot/dts/mti/malta.dts
@@ -84,12 +84,13 @@
 	fpga_regs: system-controller@1f000000 {
 		compatible = "mti,malta-fpga", "syscon", "simple-mfd";
 		reg = <0x1f000000 0x1000>;
+		native-endian;
 
 		reboot {
 			compatible = "syscon-reboot";
 			regmap = <&fpga_regs>;
 			offset = <0x500>;
-			mask = <0x4d>;
+			mask = <0x42>;
 		};
 	};
 
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index 0ea73e8..d493ccb 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -30,9 +30,19 @@
 
 void __init prom_init(void)
 {
+	plat_get_fdt();
+	BUG_ON(!fdt);
+}
+
+void __init *plat_get_fdt(void)
+{
 	const struct mips_machine *check_mach;
 	const struct of_device_id *match;
 
+	if (fdt)
+		/* Already set up */
+		return (void *)fdt;
+
 	if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) {
 		/*
 		 * We booted using the UHI boot protocol, so we have been
@@ -75,12 +85,6 @@
 		/* Retrieve the machine's FDT */
 		fdt = mach->fdt;
 	}
-
-	BUG_ON(!fdt);
-}
-
-void __init *plat_get_fdt(void)
-{
 	return (void *)fdt;
 }
 
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 355dc25..c05369e 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -63,6 +63,8 @@
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    void *__user *fault_addr);
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+		     struct task_struct *tsk);
 int process_fpemu_return(int sig, void __user *fault_addr,
 			 unsigned long fcr31);
 int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
@@ -81,4 +83,15 @@
 		set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
 }
 
+/*
+ * Mask the FCSR Cause bits according to the Enable bits, observing
+ * that Unimplemented is always enabled.
+ */
+static inline unsigned long mask_fcr31_x(unsigned long fcr31)
+{
+	return fcr31 & (FPU_CSR_UNI_X |
+			((fcr31 & FPU_CSR_ALL_E) <<
+			 (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E))));
+}
+
 #endif /* _ASM_FPU_EMULATOR_H */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 07f58cf..bebec37 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -293,7 +293,10 @@
 	/* Host KSEG0 address of the EI/DI offset */
 	void *kseg0_commpage;
 
-	u32 io_gpr;		/* GPR used as IO source/target */
+	/* Resume PC after MMIO completion */
+	unsigned long io_pc;
+	/* GPR used as IO source/target */
+	u32 io_gpr;
 
 	struct hrtimer comparecount_timer;
 	/* Count timer control KVM register */
@@ -315,8 +318,6 @@
 	/* Bitmask of pending exceptions to be cleared */
 	unsigned long pending_exceptions_clr;
 
-	u32 pending_load_cause;
-
 	/* Save/Restore the entryhi register when are are preempted/scheduled back in */
 	unsigned long preempt_entryhi;
 
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index ebb5c0f..c0ae279 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -76,6 +76,22 @@
 } while (0)
 
 /*
+ * Check FCSR for any unmasked exceptions pending set with `ptrace',
+ * clear them and send a signal.
+ */
+#define __sanitize_fcr31(next)						\
+do {									\
+	unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31);	\
+	void __user *pc;						\
+									\
+	if (unlikely(fcr31)) {						\
+		pc = (void __user *)task_pt_regs(next)->cp0_epc;	\
+		next->thread.fpu.fcr31 &= ~fcr31;			\
+		force_fcr31_sig(fcr31, pc, next);			\
+	}								\
+} while (0)
+
+/*
  * For newly created kernel threads switch_to() will return to
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
  * That is, everything following resume() will be skipped for new threads.
@@ -85,6 +101,8 @@
 do {									\
 	__mips_mt_fpaff_switch_to(prev);				\
 	lose_fpu_inatomic(1, prev);					\
+	if (tsk_used_math(next))					\
+		__sanitize_fcr31(next);					\
 	if (cpu_has_dsp) {						\
 		__save_dsp(prev);					\
 		__restore_dsp(next);					\
diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index 2a45867..a4964c3 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c
@@ -21,6 +21,11 @@
 
 static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
+phys_addr_t __weak mips_cpc_default_phys_base(void)
+{
+	return 0;
+}
+
 /**
  * mips_cpc_phys_base - retrieve the physical base address of the CPC
  *
@@ -43,8 +48,12 @@
 	if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK)
 		return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK;
 
-	/* Otherwise, give it the default address & enable it */
+	/* Otherwise, use the default address */
 	cpc_base = mips_cpc_default_phys_base();
+	if (!cpc_base)
+		return cpc_base;
+
+	/* Enable the CPC, mapped at the default address */
 	write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK);
 	return cpc_base;
 }
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 22dedd6..bd09853 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -899,7 +899,7 @@
  * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
  * @regs: Process register set
  * @inst: Instruction to decode and emulate
- * @fcr31: Floating Point Control and Status Register returned
+ * @fcr31: Floating Point Control and Status Register Cause bits returned
  */
 int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31)
 {
@@ -1172,13 +1172,13 @@
 
 		err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 					       &fault_addr);
-		*fcr31 = current->thread.fpu.fcr31;
 
 		/*
-		 * We can't allow the emulated instruction to leave any of
-		 * the cause bits set in $fcr31.
+		 * We can't allow the emulated instruction to leave any
+		 * enabled Cause bits set in $fcr31.
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		*fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~res;
 
 		/*
 		 * this is a tricky issue - lose_fpu() uses LL/SC atomics
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 6103b24..a92994d 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -79,16 +79,15 @@
 }
 
 /*
- * Poke at FCSR according to its mask.  Don't set the cause bits as
- * this is currently not handled correctly in FP context restoration
- * and will cause an oops if a corresponding enable bit is set.
+ * Poke at FCSR according to its mask.  Set the Cause bits even
+ * if a corresponding Enable bit is set.  This will be noticed at
+ * the time the thread is switched to and SIGFPE thrown accordingly.
  */
 static void ptrace_setfcr31(struct task_struct *child, u32 value)
 {
 	u32 fcr31;
 	u32 mask;
 
-	value &= ~FPU_CSR_ALL_X;
 	fcr31 = child->thread.fpu.fcr31;
 	mask = boot_cpu_data.fpu_msk31;
 	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
@@ -817,6 +816,7 @@
 			break;
 #endif
 		case FPC_CSR:
+			init_fp_ctx(child);
 			ptrace_setfcr31(child, data);
 			break;
 		case DSP_BASE ... DSP_BASE + 5: {
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 283b5a1..7e71a4e 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -70,7 +70,7 @@
 			break;
 
 		copied = access_process_vm(child, (u64)addrOthers, &tmp,
-				sizeof(tmp), 0);
+				sizeof(tmp), FOLL_FORCE);
 		if (copied != sizeof(tmp))
 			break;
 		ret = put_user(tmp, (u32 __user *) (unsigned long) data);
@@ -179,7 +179,8 @@
 			break;
 		ret = 0;
 		if (access_process_vm(child, (u64)addrOthers, &data,
-					sizeof(data), 1) == sizeof(data))
+					sizeof(data),
+					FOLL_FORCE | FOLL_WRITE) == sizeof(data))
 			break;
 		ret = -EIO;
 		break;
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index b4ac637..918f2f6 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -21,106 +21,84 @@
 #define EX(a,b)							\
 9:	a,##b;							\
 	.section __ex_table,"a";				\
+	PTR	9b,fault;					\
+	.previous
+
+#define EX2(a,b)						\
+9:	a,##b;							\
+	.section __ex_table,"a";				\
 	PTR	9b,bad_stack;					\
+	PTR	9b+4,bad_stack;					\
 	.previous
 
 	.set	noreorder
 	.set	mips1
-	/* Save floating point context */
+
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
 LEAF(_save_fp_context)
 	.set	push
 	SET_HARDFLOAT
 	li	v0, 0					# assume success
-	cfc1	t1,fcr31
-	EX(swc1 $f0,(SC_FPREGS+0)(a0))
-	EX(swc1 $f1,(SC_FPREGS+8)(a0))
-	EX(swc1 $f2,(SC_FPREGS+16)(a0))
-	EX(swc1 $f3,(SC_FPREGS+24)(a0))
-	EX(swc1 $f4,(SC_FPREGS+32)(a0))
-	EX(swc1 $f5,(SC_FPREGS+40)(a0))
-	EX(swc1 $f6,(SC_FPREGS+48)(a0))
-	EX(swc1 $f7,(SC_FPREGS+56)(a0))
-	EX(swc1 $f8,(SC_FPREGS+64)(a0))
-	EX(swc1 $f9,(SC_FPREGS+72)(a0))
-	EX(swc1 $f10,(SC_FPREGS+80)(a0))
-	EX(swc1 $f11,(SC_FPREGS+88)(a0))
-	EX(swc1 $f12,(SC_FPREGS+96)(a0))
-	EX(swc1 $f13,(SC_FPREGS+104)(a0))
-	EX(swc1 $f14,(SC_FPREGS+112)(a0))
-	EX(swc1 $f15,(SC_FPREGS+120)(a0))
-	EX(swc1 $f16,(SC_FPREGS+128)(a0))
-	EX(swc1 $f17,(SC_FPREGS+136)(a0))
-	EX(swc1 $f18,(SC_FPREGS+144)(a0))
-	EX(swc1 $f19,(SC_FPREGS+152)(a0))
-	EX(swc1 $f20,(SC_FPREGS+160)(a0))
-	EX(swc1 $f21,(SC_FPREGS+168)(a0))
-	EX(swc1 $f22,(SC_FPREGS+176)(a0))
-	EX(swc1 $f23,(SC_FPREGS+184)(a0))
-	EX(swc1 $f24,(SC_FPREGS+192)(a0))
-	EX(swc1 $f25,(SC_FPREGS+200)(a0))
-	EX(swc1 $f26,(SC_FPREGS+208)(a0))
-	EX(swc1 $f27,(SC_FPREGS+216)(a0))
-	EX(swc1 $f28,(SC_FPREGS+224)(a0))
-	EX(swc1 $f29,(SC_FPREGS+232)(a0))
-	EX(swc1 $f30,(SC_FPREGS+240)(a0))
-	EX(swc1 $f31,(SC_FPREGS+248)(a0))
-	EX(sw	t1,(SC_FPC_CSR)(a0))
-	cfc1	t0,$0				# implementation/version
+	cfc1	t1, fcr31
+	EX2(s.d $f0, 0(a0))
+	EX2(s.d $f2, 16(a0))
+	EX2(s.d $f4, 32(a0))
+	EX2(s.d $f6, 48(a0))
+	EX2(s.d $f8, 64(a0))
+	EX2(s.d $f10, 80(a0))
+	EX2(s.d $f12, 96(a0))
+	EX2(s.d $f14, 112(a0))
+	EX2(s.d $f16, 128(a0))
+	EX2(s.d $f18, 144(a0))
+	EX2(s.d $f20, 160(a0))
+	EX2(s.d $f22, 176(a0))
+	EX2(s.d $f24, 192(a0))
+	EX2(s.d $f26, 208(a0))
+	EX2(s.d $f28, 224(a0))
+	EX2(s.d $f30, 240(a0))
 	jr	ra
+	 EX(sw	t1, (a1))
 	.set	pop
-	.set	nomacro
-	 EX(sw	t0,(SC_FPC_EIR)(a0))
-	.set	macro
 	END(_save_fp_context)
 
-/*
- * Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
 LEAF(_restore_fp_context)
 	.set	push
 	SET_HARDFLOAT
 	li	v0, 0					# assume success
-	EX(lw t0,(SC_FPC_CSR)(a0))
-	EX(lwc1 $f0,(SC_FPREGS+0)(a0))
-	EX(lwc1 $f1,(SC_FPREGS+8)(a0))
-	EX(lwc1 $f2,(SC_FPREGS+16)(a0))
-	EX(lwc1 $f3,(SC_FPREGS+24)(a0))
-	EX(lwc1 $f4,(SC_FPREGS+32)(a0))
-	EX(lwc1 $f5,(SC_FPREGS+40)(a0))
-	EX(lwc1 $f6,(SC_FPREGS+48)(a0))
-	EX(lwc1 $f7,(SC_FPREGS+56)(a0))
-	EX(lwc1 $f8,(SC_FPREGS+64)(a0))
-	EX(lwc1 $f9,(SC_FPREGS+72)(a0))
-	EX(lwc1 $f10,(SC_FPREGS+80)(a0))
-	EX(lwc1 $f11,(SC_FPREGS+88)(a0))
-	EX(lwc1 $f12,(SC_FPREGS+96)(a0))
-	EX(lwc1 $f13,(SC_FPREGS+104)(a0))
-	EX(lwc1 $f14,(SC_FPREGS+112)(a0))
-	EX(lwc1 $f15,(SC_FPREGS+120)(a0))
-	EX(lwc1 $f16,(SC_FPREGS+128)(a0))
-	EX(lwc1 $f17,(SC_FPREGS+136)(a0))
-	EX(lwc1 $f18,(SC_FPREGS+144)(a0))
-	EX(lwc1 $f19,(SC_FPREGS+152)(a0))
-	EX(lwc1 $f20,(SC_FPREGS+160)(a0))
-	EX(lwc1 $f21,(SC_FPREGS+168)(a0))
-	EX(lwc1 $f22,(SC_FPREGS+176)(a0))
-	EX(lwc1 $f23,(SC_FPREGS+184)(a0))
-	EX(lwc1 $f24,(SC_FPREGS+192)(a0))
-	EX(lwc1 $f25,(SC_FPREGS+200)(a0))
-	EX(lwc1 $f26,(SC_FPREGS+208)(a0))
-	EX(lwc1 $f27,(SC_FPREGS+216)(a0))
-	EX(lwc1 $f28,(SC_FPREGS+224)(a0))
-	EX(lwc1 $f29,(SC_FPREGS+232)(a0))
-	EX(lwc1 $f30,(SC_FPREGS+240)(a0))
-	EX(lwc1 $f31,(SC_FPREGS+248)(a0))
+	EX(lw t0, (a1))
+	EX2(l.d $f0, 0(a0))
+	EX2(l.d $f2, 16(a0))
+	EX2(l.d $f4, 32(a0))
+	EX2(l.d $f6, 48(a0))
+	EX2(l.d $f8, 64(a0))
+	EX2(l.d $f10, 80(a0))
+	EX2(l.d $f12, 96(a0))
+	EX2(l.d $f14, 112(a0))
+	EX2(l.d $f16, 128(a0))
+	EX2(l.d $f18, 144(a0))
+	EX2(l.d $f20, 160(a0))
+	EX2(l.d $f22, 176(a0))
+	EX2(l.d $f24, 192(a0))
+	EX2(l.d $f26, 208(a0))
+	EX2(l.d $f28, 224(a0))
+	EX2(l.d $f30, 240(a0))
 	jr	ra
-	 ctc1	t0,fcr31
+	 ctc1	t0, fcr31
 	.set	pop
 	END(_restore_fp_context)
 	.set	reorder
diff --git a/arch/mips/kernel/r6000_fpu.S b/arch/mips/kernel/r6000_fpu.S
index 4707738..9cc7bfa 100644
--- a/arch/mips/kernel/r6000_fpu.S
+++ b/arch/mips/kernel/r6000_fpu.S
@@ -21,7 +21,14 @@
 	.set	push
 	SET_HARDFLOAT
 
-	/* Save floating point context */
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
 	LEAF(_save_fp_context)
 	mfc0	t0,CP0_STATUS
 	sll	t0,t0,2
@@ -30,59 +37,59 @@
 
 	cfc1	t1,fcr31
 	/* Store the 16 double precision registers */
-	sdc1	$f0,(SC_FPREGS+0)(a0)
-	sdc1	$f2,(SC_FPREGS+16)(a0)
-	sdc1	$f4,(SC_FPREGS+32)(a0)
-	sdc1	$f6,(SC_FPREGS+48)(a0)
-	sdc1	$f8,(SC_FPREGS+64)(a0)
-	sdc1	$f10,(SC_FPREGS+80)(a0)
-	sdc1	$f12,(SC_FPREGS+96)(a0)
-	sdc1	$f14,(SC_FPREGS+112)(a0)
-	sdc1	$f16,(SC_FPREGS+128)(a0)
-	sdc1	$f18,(SC_FPREGS+144)(a0)
-	sdc1	$f20,(SC_FPREGS+160)(a0)
-	sdc1	$f22,(SC_FPREGS+176)(a0)
-	sdc1	$f24,(SC_FPREGS+192)(a0)
-	sdc1	$f26,(SC_FPREGS+208)(a0)
-	sdc1	$f28,(SC_FPREGS+224)(a0)
-	sdc1	$f30,(SC_FPREGS+240)(a0)
+	sdc1	$f0,0(a0)
+	sdc1	$f2,16(a0)
+	sdc1	$f4,32(a0)
+	sdc1	$f6,48(a0)
+	sdc1	$f8,64(a0)
+	sdc1	$f10,80(a0)
+	sdc1	$f12,96(a0)
+	sdc1	$f14,112(a0)
+	sdc1	$f16,128(a0)
+	sdc1	$f18,144(a0)
+	sdc1	$f20,160(a0)
+	sdc1	$f22,176(a0)
+	sdc1	$f24,192(a0)
+	sdc1	$f26,208(a0)
+	sdc1	$f28,224(a0)
+	sdc1	$f30,240(a0)
 	jr	ra
-	 sw	t0,SC_FPC_CSR(a0)
+	 sw	t0,(a1)
 1:	jr	ra
 	 nop
 	END(_save_fp_context)
 
-/* Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
 	LEAF(_restore_fp_context)
 	mfc0	t0,CP0_STATUS
 	sll	t0,t0,2
 
 	bgez	t0,1f
-	 lw	t0,SC_FPC_CSR(a0)
+	 lw	t0,(a1)
 	/* Restore the 16 double precision registers */
-	ldc1	$f0,(SC_FPREGS+0)(a0)
-	ldc1	$f2,(SC_FPREGS+16)(a0)
-	ldc1	$f4,(SC_FPREGS+32)(a0)
-	ldc1	$f6,(SC_FPREGS+48)(a0)
-	ldc1	$f8,(SC_FPREGS+64)(a0)
-	ldc1	$f10,(SC_FPREGS+80)(a0)
-	ldc1	$f12,(SC_FPREGS+96)(a0)
-	ldc1	$f14,(SC_FPREGS+112)(a0)
-	ldc1	$f16,(SC_FPREGS+128)(a0)
-	ldc1	$f18,(SC_FPREGS+144)(a0)
-	ldc1	$f20,(SC_FPREGS+160)(a0)
-	ldc1	$f22,(SC_FPREGS+176)(a0)
-	ldc1	$f24,(SC_FPREGS+192)(a0)
-	ldc1	$f26,(SC_FPREGS+208)(a0)
-	ldc1	$f28,(SC_FPREGS+224)(a0)
-	ldc1	$f30,(SC_FPREGS+240)(a0)
+	ldc1	$f0,0(a0)
+	ldc1	$f2,16(a0)
+	ldc1	$f4,32(a0)
+	ldc1	$f6,48(a0)
+	ldc1	$f8,64(a0)
+	ldc1	$f10,80(a0)
+	ldc1	$f12,96(a0)
+	ldc1	$f14,112(a0)
+	ldc1	$f16,128(a0)
+	ldc1	$f18,144(a0)
+	ldc1	$f20,160(a0)
+	ldc1	$f22,176(a0)
+	ldc1	$f24,192(a0)
+	ldc1	$f26,208(a0)
+	ldc1	$f28,224(a0)
+	ldc1	$f30,240(a0)
 	jr	ra
 	 ctc1	t0,fcr31
 1:	jr	ra
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
index ca1cc30..1958910 100644
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -200,7 +200,7 @@
 
 #if defined(CONFIG_USE_OF)
 	/* Get any additional entropy passed in device tree */
-	{
+	if (initial_boot_params) {
 		int node, len;
 		u64 *prop;
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 0d57909..f66e5ce 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -368,6 +368,19 @@
 		end = PFN_DOWN(boot_mem_map.map[i].addr
 				+ boot_mem_map.map[i].size);
 
+#ifndef CONFIG_HIGHMEM
+		/*
+		 * Skip highmem here so we get an accurate max_low_pfn if low
+		 * memory stops short of high memory.
+		 * If the region overlaps HIGHMEM_START, end is clipped so
+		 * max_pfn excludes the highmem portion.
+		 */
+		if (start >= PFN_DOWN(HIGHMEM_START))
+			continue;
+		if (end > PFN_DOWN(HIGHMEM_START))
+			end = PFN_DOWN(HIGHMEM_START);
+#endif
+
 		if (end > max_low_pfn)
 			max_low_pfn = end;
 		if (start < min_low_pfn)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 1f5fdee..3905003 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -156,7 +156,7 @@
 		print_ip_sym(pc);
 		pc = unwind_stack(task, &sp, pc, &ra);
 	} while (pc);
-	printk("\n");
+	pr_cont("\n");
 }
 
 /*
@@ -174,22 +174,24 @@
 	printk("Stack :");
 	i = 0;
 	while ((unsigned long) sp & (PAGE_SIZE - 1)) {
-		if (i && ((i % (64 / field)) == 0))
-			printk("\n	 ");
+		if (i && ((i % (64 / field)) == 0)) {
+			pr_cont("\n");
+			printk("       ");
+		}
 		if (i > 39) {
-			printk(" ...");
+			pr_cont(" ...");
 			break;
 		}
 
 		if (__get_user(stackdata, sp++)) {
-			printk(" (Bad stack address)");
+			pr_cont(" (Bad stack address)");
 			break;
 		}
 
-		printk(" %0*lx", field, stackdata);
+		pr_cont(" %0*lx", field, stackdata);
 		i++;
 	}
-	printk("\n");
+	pr_cont("\n");
 	show_backtrace(task, regs);
 }
 
@@ -229,18 +231,19 @@
 	long i;
 	unsigned short __user *pc16 = NULL;
 
-	printk("\nCode:");
+	printk("Code:");
 
 	if ((unsigned long)pc & 1)
 		pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
 	for(i = -3 ; i < 6 ; i++) {
 		unsigned int insn;
 		if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
-			printk(" (Bad address in epc)\n");
+			pr_cont(" (Bad address in epc)\n");
 			break;
 		}
-		printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
+		pr_cont("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
 	}
+	pr_cont("\n");
 }
 
 static void __show_regs(const struct pt_regs *regs)
@@ -259,15 +262,15 @@
 		if ((i % 4) == 0)
 			printk("$%2d   :", i);
 		if (i == 0)
-			printk(" %0*lx", field, 0UL);
+			pr_cont(" %0*lx", field, 0UL);
 		else if (i == 26 || i == 27)
-			printk(" %*s", field, "");
+			pr_cont(" %*s", field, "");
 		else
-			printk(" %0*lx", field, regs->regs[i]);
+			pr_cont(" %0*lx", field, regs->regs[i]);
 
 		i++;
 		if ((i % 4) == 0)
-			printk("\n");
+			pr_cont("\n");
 	}
 
 #ifdef CONFIG_CPU_HAS_SMARTMIPS
@@ -288,46 +291,46 @@
 
 	if (cpu_has_3kex) {
 		if (regs->cp0_status & ST0_KUO)
-			printk("KUo ");
+			pr_cont("KUo ");
 		if (regs->cp0_status & ST0_IEO)
-			printk("IEo ");
+			pr_cont("IEo ");
 		if (regs->cp0_status & ST0_KUP)
-			printk("KUp ");
+			pr_cont("KUp ");
 		if (regs->cp0_status & ST0_IEP)
-			printk("IEp ");
+			pr_cont("IEp ");
 		if (regs->cp0_status & ST0_KUC)
-			printk("KUc ");
+			pr_cont("KUc ");
 		if (regs->cp0_status & ST0_IEC)
-			printk("IEc ");
+			pr_cont("IEc ");
 	} else if (cpu_has_4kex) {
 		if (regs->cp0_status & ST0_KX)
-			printk("KX ");
+			pr_cont("KX ");
 		if (regs->cp0_status & ST0_SX)
-			printk("SX ");
+			pr_cont("SX ");
 		if (regs->cp0_status & ST0_UX)
-			printk("UX ");
+			pr_cont("UX ");
 		switch (regs->cp0_status & ST0_KSU) {
 		case KSU_USER:
-			printk("USER ");
+			pr_cont("USER ");
 			break;
 		case KSU_SUPERVISOR:
-			printk("SUPERVISOR ");
+			pr_cont("SUPERVISOR ");
 			break;
 		case KSU_KERNEL:
-			printk("KERNEL ");
+			pr_cont("KERNEL ");
 			break;
 		default:
-			printk("BAD_MODE ");
+			pr_cont("BAD_MODE ");
 			break;
 		}
 		if (regs->cp0_status & ST0_ERL)
-			printk("ERL ");
+			pr_cont("ERL ");
 		if (regs->cp0_status & ST0_EXL)
-			printk("EXL ");
+			pr_cont("EXL ");
 		if (regs->cp0_status & ST0_IE)
-			printk("IE ");
+			pr_cont("IE ");
 	}
-	printk("\n");
+	pr_cont("\n");
 
 	exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
 	printk("Cause : %08x (ExcCode %02x)\n", cause, exccode);
@@ -705,6 +708,32 @@
 	exception_exit(prev_state);
 }
 
+/*
+ * Send SIGFPE according to FCSR Cause bits, which must have already
+ * been masked against Enable bits.  This is impotant as Inexact can
+ * happen together with Overflow or Underflow, and `ptrace' can set
+ * any bits.
+ */
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+		     struct task_struct *tsk)
+{
+	struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE };
+
+	if (fcr31 & FPU_CSR_INV_X)
+		si.si_code = FPE_FLTINV;
+	else if (fcr31 & FPU_CSR_DIV_X)
+		si.si_code = FPE_FLTDIV;
+	else if (fcr31 & FPU_CSR_OVF_X)
+		si.si_code = FPE_FLTOVF;
+	else if (fcr31 & FPU_CSR_UDF_X)
+		si.si_code = FPE_FLTUND;
+	else if (fcr31 & FPU_CSR_INE_X)
+		si.si_code = FPE_FLTRES;
+	else
+		si.si_code = __SI_FAULT;
+	force_sig_info(SIGFPE, &si, tsk);
+}
+
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
 	struct siginfo si = { 0 };
@@ -715,27 +744,7 @@
 		return 0;
 
 	case SIGFPE:
-		si.si_addr = fault_addr;
-		si.si_signo = sig;
-		/*
-		 * Inexact can happen together with Overflow or Underflow.
-		 * Respect the mask to deliver the correct exception.
-		 */
-		fcr31 &= (fcr31 & FPU_CSR_ALL_E) <<
-			 (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E));
-		if (fcr31 & FPU_CSR_INV_X)
-			si.si_code = FPE_FLTINV;
-		else if (fcr31 & FPU_CSR_DIV_X)
-			si.si_code = FPE_FLTDIV;
-		else if (fcr31 & FPU_CSR_OVF_X)
-			si.si_code = FPE_FLTOVF;
-		else if (fcr31 & FPU_CSR_UDF_X)
-			si.si_code = FPE_FLTUND;
-		else if (fcr31 & FPU_CSR_INE_X)
-			si.si_code = FPE_FLTRES;
-		else
-			si.si_code = __SI_FAULT;
-		force_sig_info(sig, &si, current);
+		force_fcr31_sig(fcr31, fault_addr, current);
 		return 1;
 
 	case SIGBUS:
@@ -799,13 +808,13 @@
 	/* Run the emulator */
 	sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 				       &fault_addr);
-	fcr31 = current->thread.fpu.fcr31;
 
 	/*
-	 * We can't allow the emulated instruction to leave any of
-	 * the cause bits set in $fcr31.
+	 * We can't allow the emulated instruction to leave any
+	 * enabled Cause bits set in $fcr31.
 	 */
-	current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+	fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+	current->thread.fpu.fcr31 &= ~fcr31;
 
 	/* Restore the hardware register state */
 	own_fpu(1);
@@ -831,7 +840,7 @@
 		goto out;
 
 	/* Clear FCSR.Cause before enabling interrupts */
-	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+	write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31));
 	local_irq_enable();
 
 	die_if_kernel("FP exception in kernel code", regs);
@@ -853,13 +862,13 @@
 		/* Run the emulator */
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
 					       &fault_addr);
-		fcr31 = current->thread.fpu.fcr31;
 
 		/*
-		 * We can't allow the emulated instruction to leave any of
-		 * the cause bits set in $fcr31.
+		 * We can't allow the emulated instruction to leave any
+		 * enabled Cause bits set in $fcr31.
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~fcr31;
 
 		/* Restore the hardware register state */
 		own_fpu(1);	/* Using the FPU again.	 */
@@ -1424,13 +1433,13 @@
 
 		sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
 					       &fault_addr);
-		fcr31 = current->thread.fpu.fcr31;
 
 		/*
 		 * We can't allow the emulated instruction to leave
-		 * any of the cause bits set in $fcr31.
+		 * any enabled Cause bits set in $fcr31.
 		 */
-		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+		fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+		current->thread.fpu.fcr31 &= ~fcr31;
 
 		/* Send a signal if required.  */
 		if (!process_fpemu_return(sig, fault_addr, fcr31) && !err)
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 8770f32..aa09374 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -790,15 +790,15 @@
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
 	enum emulation_result er = EMULATE_DONE;
 
-	if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
+	if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
+		kvm_clear_c0_guest_status(cop0, ST0_ERL);
+		vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
+	} else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
 		kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
 			  kvm_read_c0_guest_epc(cop0));
 		kvm_clear_c0_guest_status(cop0, ST0_EXL);
 		vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
 
-	} else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
-		kvm_clear_c0_guest_status(cop0, ST0_ERL);
-		vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
 	} else {
 		kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
 			vcpu->arch.pc);
@@ -1528,13 +1528,25 @@
 					    struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er = EMULATE_DO_MMIO;
+	unsigned long curr_pc;
 	u32 op, rt;
 	u32 bytes;
 
 	rt = inst.i_format.rt;
 	op = inst.i_format.opcode;
 
-	vcpu->arch.pending_load_cause = cause;
+	/*
+	 * Find the resume PC now while we have safe and easy access to the
+	 * prior branch instruction, and save it for
+	 * kvm_mips_complete_mmio_load() to restore later.
+	 */
+	curr_pc = vcpu->arch.pc;
+	er = update_pc(vcpu, cause);
+	if (er == EMULATE_FAIL)
+		return er;
+	vcpu->arch.io_pc = vcpu->arch.pc;
+	vcpu->arch.pc = curr_pc;
+
 	vcpu->arch.io_gpr = rt;
 
 	switch (op) {
@@ -2494,9 +2506,8 @@
 		goto done;
 	}
 
-	er = update_pc(vcpu, vcpu->arch.pending_load_cause);
-	if (er == EMULATE_FAIL)
-		return er;
+	/* Restore saved resume PC */
+	vcpu->arch.pc = vcpu->arch.io_pc;
 
 	switch (run->mmio.len) {
 	case 4:
@@ -2518,11 +2529,6 @@
 		break;
 	}
 
-	if (vcpu->arch.pending_load_cause & CAUSEF_BD)
-		kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n",
-			  vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr,
-			  vcpu->mmio_needed);
-
 done:
 	return er;
 }
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index ce96149..06a60b1 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
@@ -425,7 +426,7 @@
 static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
 {
 	struct mips_coproc *cop0 = vcpu->arch.cop0;
-	int cpu = smp_processor_id();
+	int i, cpu = smp_processor_id();
 	unsigned int gasid;
 
 	/*
@@ -441,6 +442,9 @@
 						vcpu);
 			vcpu->arch.guest_user_asid[cpu] =
 				vcpu->arch.guest_user_mm.context.asid[cpu];
+			for_each_possible_cpu(i)
+				if (i != cpu)
+					vcpu->arch.guest_user_asid[cpu] = 0;
 			vcpu->arch.last_user_gasid = gasid;
 		}
 	}
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 03883ba..3b677c8 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -260,13 +260,9 @@
 
 	if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
 						asid_version_mask(cpu)) {
-		u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
-				KVM_ENTRYHI_ASID;
-
 		kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
 		vcpu->arch.guest_user_asid[cpu] =
 		    vcpu->arch.guest_user_mm.context.asid[cpu];
-		vcpu->arch.last_user_gasid = gasid;
 		newasid++;
 
 		kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c
index 0f80b93..6eb50a7 100644
--- a/arch/mips/lib/dump_tlb.c
+++ b/arch/mips/lib/dump_tlb.c
@@ -135,42 +135,42 @@
 		c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 		c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 
-		printk("va=%0*lx asid=%0*lx",
-		       vwidth, (entryhi & ~0x1fffUL),
-		       asidwidth, entryhi & asidmask);
+		pr_cont("va=%0*lx asid=%0*lx",
+			vwidth, (entryhi & ~0x1fffUL),
+			asidwidth, entryhi & asidmask);
 		if (cpu_has_guestid)
-			printk(" gid=%02lx",
-			       (guestctl1 & MIPS_GCTL1_RID)
+			pr_cont(" gid=%02lx",
+				(guestctl1 & MIPS_GCTL1_RID)
 					>> MIPS_GCTL1_RID_SHIFT);
 		/* RI/XI are in awkward places, so mask them off separately */
 		pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		if (xpa)
 			pa |= (unsigned long long)readx_c0_entrylo0() << 30;
 		pa = (pa << 6) & PAGE_MASK;
-		printk("\n\t[");
+		pr_cont("\n\t[");
 		if (cpu_has_rixi)
-			printk("ri=%d xi=%d ",
-			       (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
-			       (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
-		printk("pa=%0*llx c=%d d=%d v=%d g=%d] [",
-		       pwidth, pa, c0,
-		       (entrylo0 & ENTRYLO_D) ? 1 : 0,
-		       (entrylo0 & ENTRYLO_V) ? 1 : 0,
-		       (entrylo0 & ENTRYLO_G) ? 1 : 0);
+			pr_cont("ri=%d xi=%d ",
+				(entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
+				(entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
+		pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d] [",
+			pwidth, pa, c0,
+			(entrylo0 & ENTRYLO_D) ? 1 : 0,
+			(entrylo0 & ENTRYLO_V) ? 1 : 0,
+			(entrylo0 & ENTRYLO_G) ? 1 : 0);
 		/* RI/XI are in awkward places, so mask them off separately */
 		pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
 		if (xpa)
 			pa |= (unsigned long long)readx_c0_entrylo1() << 30;
 		pa = (pa << 6) & PAGE_MASK;
 		if (cpu_has_rixi)
-			printk("ri=%d xi=%d ",
-			       (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
-			       (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
-		printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
-		       pwidth, pa, c1,
-		       (entrylo1 & ENTRYLO_D) ? 1 : 0,
-		       (entrylo1 & ENTRYLO_V) ? 1 : 0,
-		       (entrylo1 & ENTRYLO_G) ? 1 : 0);
+			pr_cont("ri=%d xi=%d ",
+				(entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
+				(entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
+		pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
+			pwidth, pa, c1,
+			(entrylo1 & ENTRYLO_D) ? 1 : 0,
+			(entrylo1 & ENTRYLO_V) ? 1 : 0,
+			(entrylo1 & ENTRYLO_G) ? 1 : 0);
 	}
 	printk("\n");
 
diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c
index 744f4a7..85b4086 100644
--- a/arch/mips/lib/r3k_dump_tlb.c
+++ b/arch/mips/lib/r3k_dump_tlb.c
@@ -53,15 +53,15 @@
 			 */
 			printk("Index: %2d ", i);
 
-			printk("va=%08lx asid=%08lx"
-			       "  [pa=%06lx n=%d d=%d v=%d g=%d]",
-			       entryhi & PAGE_MASK,
-			       entryhi & asid_mask,
-			       entrylo0 & PAGE_MASK,
-			       (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
-			       (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
+			pr_cont("va=%08lx asid=%08lx"
+				"  [pa=%06lx n=%d d=%d v=%d g=%d]",
+				entryhi & PAGE_MASK,
+				entryhi & asid_mask,
+				entrylo0 & PAGE_MASK,
+				(entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
+				(entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
 		}
 	}
 	printk("\n");
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 42d124f..d8c3c15 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -287,7 +287,7 @@
 	pages += nr;
 
 	ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
-				      write, 0, pages);
+				      pages, write ? FOLL_WRITE : 0);
 
 	/* Have to be a bit careful with return values */
 	if (nr > 0) {
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index d9563dd..746bf5c 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -324,6 +324,7 @@
 		ret = nios2_clocksource_init(timer);
 		break;
 	default:
+		ret = 0;
 		break;
 	}
 
diff --git a/arch/openrisc/include/asm/cache.h b/arch/openrisc/include/asm/cache.h
index 4ce7a01..5f55da9 100644
--- a/arch/openrisc/include/asm/cache.h
+++ b/arch/openrisc/include/asm/cache.h
@@ -23,6 +23,8 @@
  * they shouldn't be hard-coded!
  */
 
+#define __ro_after_init __read_mostly
+
 #define L1_CACHE_BYTES 16
 #define L1_CACHE_SHIFT 4
 
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index a9b9407..6b0741e 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -368,7 +368,9 @@
 
 #define __IGNORE_select		/* newselect */
 #define __IGNORE_fadvise64	/* fadvise64_64 */
-
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
 
 #define LINUX_GATEWAY_ADDR      0x100
 
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index f815066..700e2d2 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -873,11 +873,11 @@
 
 	if (dev->num_addrs) {
 		int k;
-		printk(", additional addresses: ");
+		pr_cont(", additional addresses: ");
 		for (k = 0; k < dev->num_addrs; k++)
-			printk("0x%lx ", dev->addr[k]);
+			pr_cont("0x%lx ", dev->addr[k]);
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 
 /**
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index d03422e..23de307 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -100,14 +100,12 @@
 	.endr
 
 /* This address must remain fixed at 0x100 for glibc's syscalls to work */
-	.align 256
+	.align LINUX_GATEWAY_ADDR
 linux_gateway_entry:
 	gate	.+8, %r0			/* become privileged */
 	mtsp	%r0,%sr4			/* get kernel space into sr4 */
 	mtsp	%r0,%sr5			/* get kernel space into sr5 */
 	mtsp	%r0,%sr6			/* get kernel space into sr6 */
-	mfsp    %sr7,%r1                        /* save user sr7 */
-	mtsp    %r1,%sr3                        /* and store it in sr3 */
 
 #ifdef CONFIG_64BIT
 	/* for now we can *always* set the W bit on entry to the syscall
@@ -133,6 +131,14 @@
 	depdi	0, 31, 32, %r21
 1:	
 #endif
+
+	/* We use a rsm/ssm pair to prevent sr3 from being clobbered
+	 * by external interrupts.
+	 */
+	mfsp    %sr7,%r1                        /* save user sr7 */
+	rsm	PSW_SM_I, %r0			/* disable interrupts */
+	mtsp    %r1,%sr3                        /* and store it in sr3 */
+
 	mfctl   %cr30,%r1
 	xor     %r1,%r30,%r30                   /* ye olde xor trick */
 	xor     %r1,%r30,%r1
@@ -147,6 +153,7 @@
 	 */
 
 	mtsp	%r0,%sr7			/* get kernel space into sr7 */
+	ssm	PSW_SM_I, %r0			/* enable interrupts */
 	STREGM	%r1,FRAME_SIZE(%r30)		/* save r1 (usp) here for now */
 	mfctl	%cr30,%r1			/* get task ptr in %r1 */
 	LDREG	TI_TASK(%r1),%r1
@@ -474,11 +481,6 @@
 	comiclr,>>	__NR_lws_entries, %r20, %r0
 	b,n	lws_exit_nosys
 
-	/* WARNING: Trashing sr2 and sr3 */
-	mfsp	%sr7,%r1			/* get userspace into sr3 */
-	mtsp	%r1,%sr3
-	mtsp	%r0,%sr2			/* get kernel space into sr2 */
-
 	/* Load table start */
 	ldil	L%lws_table, %r1
 	ldo	R%lws_table(%r1), %r28	/* Scratch use of r28 */
@@ -627,9 +629,9 @@
 	stw	%r1, 4(%sr2,%r20)
 #endif
 	/* The load and store could fail */
-1:	ldw,ma	0(%sr3,%r26), %r28
+1:	ldw,ma	0(%r26), %r28
 	sub,<>	%r28, %r25, %r0
-2:	stw,ma	%r24, 0(%sr3,%r26)
+2:	stw,ma	%r24, 0(%r26)
 	/* Free lock */
 	stw,ma	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
@@ -706,9 +708,9 @@
 	nop
 
 	/* 8bit load */
-4:	ldb	0(%sr3,%r25), %r25
+4:	ldb	0(%r25), %r25
 	b	cas2_lock_start
-5:	ldb	0(%sr3,%r24), %r24
+5:	ldb	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -716,9 +718,9 @@
 	nop
 
 	/* 16bit load */
-6:	ldh	0(%sr3,%r25), %r25
+6:	ldh	0(%r25), %r25
 	b	cas2_lock_start
-7:	ldh	0(%sr3,%r24), %r24
+7:	ldh	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -726,9 +728,9 @@
 	nop
 
 	/* 32bit load */
-8:	ldw	0(%sr3,%r25), %r25
+8:	ldw	0(%r25), %r25
 	b	cas2_lock_start
-9:	ldw	0(%sr3,%r24), %r24
+9:	ldw	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -737,14 +739,14 @@
 
 	/* 64bit load */
 #ifdef CONFIG_64BIT
-10:	ldd	0(%sr3,%r25), %r25
-11:	ldd	0(%sr3,%r24), %r24
+10:	ldd	0(%r25), %r25
+11:	ldd	0(%r24), %r24
 #else
 	/* Load new value into r22/r23 - high/low */
-10:	ldw	0(%sr3,%r25), %r22
-11:	ldw	4(%sr3,%r25), %r23
+10:	ldw	0(%r25), %r22
+11:	ldw	4(%r25), %r23
 	/* Load new value into fr4 for atomic store later */
-12:	flddx	0(%sr3,%r24), %fr4
+12:	flddx	0(%r24), %fr4
 #endif
 
 cas2_lock_start:
@@ -794,30 +796,30 @@
 	ldo	1(%r0),%r28
 
 	/* 8bit CAS */
-13:	ldb,ma	0(%sr3,%r26), %r29
+13:	ldb,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-14:	stb,ma	%r24, 0(%sr3,%r26)
+14:	stb,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 16bit CAS */
-15:	ldh,ma	0(%sr3,%r26), %r29
+15:	ldh,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-16:	sth,ma	%r24, 0(%sr3,%r26)
+16:	sth,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 32bit CAS */
-17:	ldw,ma	0(%sr3,%r26), %r29
+17:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-18:	stw,ma	%r24, 0(%sr3,%r26)
+18:	stw,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
@@ -825,22 +827,22 @@
 
 	/* 64bit CAS */
 #ifdef CONFIG_64BIT
-19:	ldd,ma	0(%sr3,%r26), %r29
+19:	ldd,ma	0(%r26), %r29
 	sub,*=	%r29, %r25, %r0
 	b,n	cas2_end
-20:	std,ma	%r24, 0(%sr3,%r26)
+20:	std,ma	%r24, 0(%r26)
 	copy	%r0, %r28
 #else
 	/* Compare first word */
-19:	ldw,ma	0(%sr3,%r26), %r29
+19:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r22, %r0
 	b,n	cas2_end
 	/* Compare second word */
-20:	ldw,ma	4(%sr3,%r26), %r29
+20:	ldw,ma	4(%r26), %r29
 	sub,=	%r29, %r23, %r0
 	b,n	cas2_end
 	/* Perform the store */
-21:	fstdx	%fr4, 0(%sr3,%r26)
+21:	fstdx	%fr4, 0(%r26)
 	copy	%r0, %r28
 #endif
 
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index f7a184b..57d42d1 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -32,9 +32,16 @@
 	void *addr = 0;
 	struct elf_info ei;
 	long len;
+	int uncompressed_image = 0;
 
-	partial_decompress(vmlinuz_addr, vmlinuz_size,
+	len = partial_decompress(vmlinuz_addr, vmlinuz_size,
 		elfheader, sizeof(elfheader), 0);
+	/* assume uncompressed data if -1 is returned */
+	if (len == -1) {
+		uncompressed_image = 1;
+		memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+		printf("No valid compressed data found, assume uncompressed data\n\r");
+	}
 
 	if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
 		fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -67,6 +74,13 @@
 					"device tree\n\r");
 	}
 
+	if (uncompressed_image) {
+		memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+		printf("0x%lx bytes of uncompressed data copied\n\r",
+		       ei.loadsize);
+		goto out;
+	}
+
 	/* Finally, decompress the kernel */
 	printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
 	       vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
@@ -82,7 +96,7 @@
 			 len, ei.loadsize);
 
 	printf("Done! Decompressed 0x%lx bytes\n\r", len);
-
+out:
 	flush_cache(addr, ei.loadsize);
 
 	return (struct addr_range){addr, ei.memsize};
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index ee655ed..1e8fceb 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,10 +53,8 @@
 	return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
 }
 
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                                     unsigned short len,
-                                     unsigned short proto,
-                                     __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
 #ifdef __powerpc64__
 	unsigned long s = (__force u32)sum;
@@ -83,10 +81,8 @@
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
  */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-					unsigned short len,
-					unsigned short proto,
-					__wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 01b8a13..3919332 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -26,7 +26,7 @@
 	std	r0,0(r1);					\
 	ptesync;						\
 	ld	r0,0(r1);					\
-1:	cmp	cr0,r0,r0;					\
+1:	cmpd	cr0,r0,r0;					\
 	bne	1b;						\
 	IDLE_INST;						\
 	b	.
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 2e4e7d8..9a3eee6 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -91,7 +91,11 @@
  */
 #define LOAD_HANDLER(reg, label)					\
 	ld	reg,PACAKBASE(r13);	/* get high part of &label */	\
-	ori	reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
+	ori	reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
+
+#define __LOAD_HANDLER(reg, label)					\
+	ld	reg,PACAKBASE(r13);					\
+	ori	reg,reg,(ABS_ADDR(label))@l;
 
 /* Exception register prefixes */
 #define EXC_HV	H
@@ -154,14 +158,17 @@
 	std	ra,offset(r13);						\
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0(area)					\
-	GET_PACA(r13);							\
+#define EXCEPTION_PROLOG_0_PACA(area)					\
 	std	r9,area+EX_R9(r13);	/* save r9 */			\
 	OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);			\
 	HMT_MEDIUM;							\
 	std	r10,area+EX_R10(r13);	/* save r10 - r12 */		\
 	OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
+#define EXCEPTION_PROLOG_0(area)					\
+	GET_PACA(r13);							\
+	EXCEPTION_PROLOG_0_PACA(area)
+
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
@@ -192,6 +199,12 @@
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
+/* Have the PACA in r13 already */
+#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec)	\
+	EXCEPTION_PROLOG_0_PACA(area);					\
+	EXCEPTION_PROLOG_1(area, extra, vec);				\
+	EXCEPTION_PROLOG_PSERIES_1(label, h);
+
 #define __KVMTEST(h, n)							\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
@@ -208,6 +221,18 @@
 #define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
+#ifdef CONFIG_RELOCATABLE
+#define BRANCH_TO_COMMON(reg, label)					\
+	__LOAD_HANDLER(reg, label);					\
+	mtctr	reg;							\
+	bctr
+
+#else
+#define BRANCH_TO_COMMON(reg, label)					\
+	b	label
+
+#endif
+
 #define __KVM_HANDLER_PROLOG(area, n)					\
 	BEGIN_FTR_SECTION_NESTED(947)					\
 	ld	r10,area+EX_CFAR(r13);					\
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831..c56ea8c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -460,5 +460,6 @@
 
 #define PPC_SLBIA(IH)	stringify_in_c(.long PPC_INST_SLBIA | \
 				       ((IH & 0x7) << 21))
+#define PPC_INVALIDATE_ERAT	PPC_SLBIA(7)
 
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index f6f68f7..99e1397 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -52,11 +52,23 @@
 	return cpumask_subset(mm_cpumask(mm),
 			      topology_sibling_cpumask(smp_processor_id()));
 }
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	return cpumask_equal(mm_cpumask(mm),
+			      cpumask_of(smp_processor_id()));
+}
+
 #else
 static inline int mm_is_core_local(struct mm_struct *mm)
 {
 	return 1;
 }
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	return 1;
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index cf12c58..e8cdfec 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -16,6 +16,10 @@
 
 #define __NR__exit __NR_exit
 
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f129408..1ba82ea 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -95,19 +95,40 @@
 /* No virt vectors corresponding with 0x0..0x100 */
 EXC_VIRT_NONE(0x4000, 0x4100)
 
+
+#ifdef CONFIG_PPC_P7_NAP
+	/*
+	 * If running native on arch 2.06 or later, check if we are waking up
+	 * from nap/sleep/winkle, and branch to idle handler.
+	 */
+#define IDLETEST(n)							\
+	BEGIN_FTR_SECTION ;						\
+	mfspr	r10,SPRN_SRR1 ;						\
+	rlwinm.	r10,r10,47-31,30,31 ;					\
+	beq-	1f ;							\
+	cmpwi	cr3,r10,2 ;						\
+	BRANCH_TO_COMMON(r10, system_reset_idle_common) ;		\
+1:									\
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#else
+#define IDLETEST NOTEST
+#endif
+
 EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
 	SET_SCRATCH0(r13)
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap/sleep/winkle.
-	 */
-	mfspr	r13,SPRN_SRR1
-	rlwinm.	r13,r13,47-31,30,31
-	beq	9f
-
-	cmpwi	cr3,r13,2
 	GET_PACA(r13)
+	clrrdi	r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
+	EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+				 IDLETEST, 0x100)
+
+EXC_REAL_END(system_reset, 0x100, 0x200)
+EXC_VIRT_NONE(0x4100, 0x4200)
+
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(system_reset_idle_common)
+BEGIN_FTR_SECTION
+	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	pnv_restore_hyp_resource
 
 	li	r0,PNV_THREAD_RUNNING
@@ -130,14 +151,8 @@
 	blt	cr3,2f
 	b	pnv_wakeup_loss
 2:	b	pnv_wakeup_noloss
+#endif
 
-9:
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
-				 NOTEST, 0x100)
-EXC_REAL_END(system_reset, 0x100, 0x200)
-EXC_VIRT_NONE(0x4100, 0x4200)
 EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
 
 #ifdef CONFIG_PPC_PSERIES
@@ -159,7 +174,7 @@
 	SET_SCRATCH0(r13)		/* save r13 */
 	/*
 	 * Running native on arch 2.06 or later, we may wakeup from winkle
-	 * inside machine check. If yes, then last bit of HSPGR0 would be set
+	 * inside machine check. If yes, then last bit of HSPRG0 would be set
 	 * to 1. Hence clear it unconditionally.
 	 */
 	GET_PACA(r13)
@@ -378,7 +393,7 @@
 	/*
 	 * Go back to winkle. Please note that this thread was woken up in
 	 * machine check from winkle and have not restored the per-subcore
-	 * state. Hence before going back to winkle, set last bit of HSPGR0
+	 * state. Hence before going back to winkle, set last bit of HSPRG0
 	 * to 1. This will make sure that if this thread gets woken up
 	 * again at reset vector 0x100 then it will get chance to restore
 	 * the subcore state.
@@ -817,10 +832,8 @@
 TRAMP_KVM(PACA_EXGEN, 0xb00)
 EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 
-
-#define LOAD_SYSCALL_HANDLER(reg)				\
-	ld	reg,PACAKBASE(r13);				\
-	ori	reg,reg,(ABS_ADDR(system_call_common))@l;
+#define LOAD_SYSCALL_HANDLER(reg)					\
+	__LOAD_HANDLER(reg, system_call_common)
 
 /* Syscall routine is used twice, in reloc-off and reloc-on paths */
 #define SYSCALL_PSERIES_1 					\
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 9781c69..03d089b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -275,7 +275,7 @@
 	if (!stepped) {
 		WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
 			"0x%lx will be disabled.", info->address);
-		perf_event_disable(bp);
+		perf_event_disable_inatomic(bp);
 		goto out;
 	}
 	/*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index bd739fe..72dac0b 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -90,6 +90,7 @@
  * Threads will spin in HMT_LOW until the lock bit is cleared.
  * r14 - pointer to core_idle_state
  * r15 - used to load contents of core_idle_state
+ * r9  - used as a temporary variable
  */
 
 core_idle_lock_held:
@@ -99,6 +100,8 @@
 	bne	3b
 	HMT_MEDIUM
 	lwarx	r15,0,r14
+	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	core_idle_lock_held
 	blr
 
 /*
@@ -163,12 +166,6 @@
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	/* Tell KVM we're entering idle */
-	li	r4,KVM_HWTHREAD_IN_IDLE
-	stb	r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
-
 	/*
 	 * Go to real mode to do the nap, as required by the architecture.
 	 * Also, we need to be in real mode before setting hwthread_state,
@@ -185,6 +182,26 @@
 
 	.globl pnv_enter_arch207_idle_mode
 pnv_enter_arch207_idle_mode:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* Tell KVM we're entering idle */
+	li	r4,KVM_HWTHREAD_IN_IDLE
+	/******************************************************/
+	/*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
+	/* The following store to HSTATE_HWTHREAD_STATE(r13)  */
+	/* MUST occur in real mode, i.e. with the MMU off,    */
+	/* and the MMU must stay off until we clear this flag */
+	/* and test HSTATE_HWTHREAD_REQ(r13) in the system    */
+	/* reset interrupt vector in exceptions-64s.S.        */
+	/* The reason is that another thread can switch the   */
+	/* MMU to a guest context whenever this flag is set   */
+	/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
+	/* that would potentially cause this thread to start  */
+	/* executing instructions from guest memory in        */
+	/* hypervisor mode, leading to a host crash or data   */
+	/* corruption, or worse.                              */
+	/******************************************************/
+	stb	r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
 	stb	r3,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi	cr3,r3,PNV_THREAD_SLEEP
 	bge	cr3,2f
@@ -250,6 +267,12 @@
  * r3 - requested stop state
  */
 power_enter_stop:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* Tell KVM we're entering idle */
+	li	r4,KVM_HWTHREAD_IN_IDLE
+	/* DO THIS IN REAL MODE!  See comment above. */
+	stb	r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
 /*
  * Check if the requested state is a deep idle state.
  */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9e7c10f..49a680d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1012,7 +1012,7 @@
 	/* Ensure that restore_math() will restore */
 	if (msr_diff & MSR_FP)
 		current->thread.load_fp = 1;
-#ifdef CONFIG_ALIVEC
+#ifdef CONFIG_ALTIVEC
 	if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
 		current->thread.load_vec = 1;
 #endif
@@ -1215,7 +1215,7 @@
 		int instr;
 
 		if (!(i % 8))
-			printk("\n");
+			pr_cont("\n");
 
 #if !defined(CONFIG_BOOKE)
 		/* If executing with the IMMU off, adjust pc rather
@@ -1227,18 +1227,18 @@
 
 		if (!__kernel_text_address(pc) ||
 		     probe_kernel_address((unsigned int __user *)pc, instr)) {
-			printk(KERN_CONT "XXXXXXXX ");
+			pr_cont("XXXXXXXX ");
 		} else {
 			if (regs->nip == pc)
-				printk(KERN_CONT "<%08x> ", instr);
+				pr_cont("<%08x> ", instr);
 			else
-				printk(KERN_CONT "%08x ", instr);
+				pr_cont("%08x ", instr);
 		}
 
 		pc += sizeof(int);
 	}
 
-	printk("\n");
+	pr_cont("\n");
 }
 
 struct regbit {
@@ -1282,7 +1282,7 @@
 
 	for (; bits->bit; ++bits)
 		if (val & bits->bit) {
-			printk("%s%s", s, bits->name);
+			pr_cont("%s%s", s, bits->name);
 			s = sep;
 		}
 }
@@ -1305,9 +1305,9 @@
  *   T: Transactional	(bit 34)
  */
 	if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
-		printk(",TM[");
+		pr_cont(",TM[");
 		print_bits(val, msr_tm_bits, "");
-		printk("]");
+		pr_cont("]");
 	}
 }
 #else
@@ -1316,10 +1316,10 @@
 
 static void print_msr_bits(unsigned long val)
 {
-	printk("<");
+	pr_cont("<");
 	print_bits(val, msr_bits, ",");
 	print_tm_bits(val);
-	printk(">");
+	pr_cont(">");
 }
 
 #ifdef CONFIG_PPC64
@@ -1347,29 +1347,29 @@
 	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
 	trap = TRAP(regs);
 	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
-		printk("CFAR: "REG" ", regs->orig_gpr3);
+		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
 	if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-		printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+		pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
 #else
-		printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+		pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-	printk("SOFTE: %ld ", regs->softe);
+	pr_cont("SOFTE: %ld ", regs->softe);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (MSR_TM_ACTIVE(regs->msr))
-		printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+		pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
 #endif
 
 	for (i = 0;  i < 32;  i++) {
 		if ((i % REGS_PER_LINE) == 0)
-			printk("\nGPR%02d: ", i);
-		printk(REG " ", regs->gpr[i]);
+			pr_cont("\nGPR%02d: ", i);
+		pr_cont(REG " ", regs->gpr[i]);
 		if (i == LAST_VOLATILE && !FULL_REGS(regs))
 			break;
 	}
-	printk("\n");
+	pr_cont("\n");
 #ifdef CONFIG_KALLSYMS
 	/*
 	 * Lookup NIP late so we have the best change of getting the
@@ -1900,14 +1900,14 @@
 			printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 			if ((ip == rth) && curr_frame >= 0) {
-				printk(" (%pS)",
+				pr_cont(" (%pS)",
 				       (void *)current->ret_stack[curr_frame].ret);
 				curr_frame--;
 			}
 #endif
 			if (firstframe)
-				printk(" (unreliable)");
-			printk("\n");
+				pr_cont(" (unreliable)");
+			pr_cont("\n");
 		}
 		firstframe = 0;
 
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index f52b7db3..010b7b3 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -74,7 +74,7 @@
 			break;
 
 		copied = access_process_vm(child, (u64)addrOthers, &tmp,
-				sizeof(tmp), 0);
+				sizeof(tmp), FOLL_FORCE);
 		if (copied != sizeof(tmp))
 			break;
 		ret = put_user(tmp, (u32 __user *)data);
@@ -179,7 +179,8 @@
 			break;
 		ret = 0;
 		if (access_process_vm(child, (u64)addrOthers, &tmp,
-					sizeof(tmp), 1) == sizeof(tmp))
+					sizeof(tmp),
+					FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
 			break;
 		ret = -EIO;
 		break;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7ac8e6e..8d586cf 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -226,17 +226,25 @@
 		if (firmware_has_feature(FW_FEATURE_OPAL))
 			opal_configure_cores();
 
-		/* Enable AIL if supported, and we are in hypervisor mode */
-		if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
-		    early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
-			unsigned long lpcr = mfspr(SPRN_LPCR);
-			mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
-		}
+		/* AIL on native is done in cpu_ready_for_interrupts() */
 	}
 }
 
 static void cpu_ready_for_interrupts(void)
 {
+	/*
+	 * Enable AIL if supported, and we are in hypervisor mode. This
+	 * is called once for every processor.
+	 *
+	 * If we are not in hypervisor mode the job is done once for
+	 * the whole partition in configure_exceptions().
+	 */
+	if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+	    early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		unsigned long lpcr = mfspr(SPRN_LPCR);
+		mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+	}
+
 	/* Set IR and DR in PACA MSR */
 	get_paca()->kernel_msr = MSR_KERNEL;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 82ff5de..a0ea63a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -23,6 +23,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/pnv-pci.h>
 #include <asm/opal.h>
+#include <asm/smp.h>
 
 #include "book3s_xics.h"
 
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index bb03542..362954f 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -106,6 +106,8 @@
 	switch (REGION_ID(ea)) {
 	case USER_REGION_ID:
 		pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+		if (mm == NULL)
+			return 1;
 		psize = get_slice_psize(mm, ea);
 		ssize = user_segment_size(ea);
 		vsid = get_vsid(mm->context.id, ea, ssize);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 44d3c3a..5503078 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1029,6 +1029,10 @@
 {
 	/* Initialize hash table for that CPU */
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			update_hid_for_hash();
+
 		if (!cpu_has_feature(CPU_FTR_ARCH_300))
 			mtspr(SPRN_SDR1, _SDR1);
 		else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 75b9cd6..a51c188 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -845,7 +845,7 @@
 		return;
 
 	for_each_online_node(node) {
-		printk(KERN_DEBUG "Node %d CPUs:", node);
+		pr_info("Node %d CPUs:", node);
 
 		count = 0;
 		/*
@@ -856,52 +856,18 @@
 			if (cpumask_test_cpu(cpu,
 					node_to_cpumask_map[node])) {
 				if (count == 0)
-					printk(" %u", cpu);
+					pr_cont(" %u", cpu);
 				++count;
 			} else {
 				if (count > 1)
-					printk("-%u", cpu - 1);
+					pr_cont("-%u", cpu - 1);
 				count = 0;
 			}
 		}
 
 		if (count > 1)
-			printk("-%u", nr_cpu_ids - 1);
-		printk("\n");
-	}
-}
-
-static void __init dump_numa_memory_topology(void)
-{
-	unsigned int node;
-	unsigned int count;
-
-	if (min_common_depth == -1 || !numa_enabled)
-		return;
-
-	for_each_online_node(node) {
-		unsigned long i;
-
-		printk(KERN_DEBUG "Node %d Memory:", node);
-
-		count = 0;
-
-		for (i = 0; i < memblock_end_of_DRAM();
-		     i += (1 << SECTION_SIZE_BITS)) {
-			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
-				if (count == 0)
-					printk(" 0x%lx", i);
-				++count;
-			} else {
-				if (count > 0)
-					printk("-0x%lx", i);
-				count = 0;
-			}
-		}
-
-		if (count > 0)
-			printk("-0x%lx", i);
-		printk("\n");
+			pr_cont("-%u", nr_cpu_ids - 1);
+		pr_cont("\n");
 	}
 }
 
@@ -947,8 +913,6 @@
 
 	if (parse_numa_properties())
 		setup_nonnuma();
-	else
-		dump_numa_memory_topology();
 
 	memblock_dump_all();
 
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ed7bddc..688b545 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -388,6 +388,10 @@
 	 * update partition table control register and UPRT
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			update_hid_for_radix();
+
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 0e49ec5..3493cf4 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -50,6 +50,8 @@
 	for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
 		__tlbiel_pid(pid, set, ric);
 	}
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+		asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
 	return;
 }
 
@@ -83,6 +85,8 @@
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("ptesync": : :"memory");
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+		asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -175,7 +179,7 @@
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		goto no_context;
 
-	if (!mm_is_core_local(mm)) {
+	if (!mm_is_thread_local(mm)) {
 		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
@@ -201,7 +205,7 @@
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		goto no_context;
 
-	if (!mm_is_core_local(mm)) {
+	if (!mm_is_thread_local(mm)) {
 		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
@@ -226,7 +230,7 @@
 	pid = mm ? mm->context.id : 0;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		goto bail;
-	if (!mm_is_core_local(mm)) {
+	if (!mm_is_thread_local(mm)) {
 		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
@@ -321,7 +325,7 @@
 {
 	unsigned long pid;
 	unsigned long addr;
-	int local = mm_is_core_local(mm);
+	int local = mm_is_thread_local(mm);
 	unsigned long ap = mmu_get_ap(psize);
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 28f03ca..794bebb 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -363,11 +363,11 @@
 static int diag224_get_name_table(void)
 {
 	/* memory must be below 2GB */
-	diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
 	if (!diag224_cpu_names)
 		return -ENOMEM;
 	if (diag224(diag224_cpu_names)) {
-		kfree(diag224_cpu_names);
+		free_page((unsigned long) diag224_cpu_names);
 		return -EOPNOTSUPP;
 	}
 	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
@@ -376,7 +376,7 @@
 
 static void diag224_delete_name_table(void)
 {
-	kfree(diag224_cpu_names);
+	free_page((unsigned long) diag224_cpu_names);
 }
 
 static int diag224_idx2name(int index, char *name)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 64053d9..836c562 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -12,9 +12,7 @@
 
 #ifndef __ASSEMBLY__
 
-unsigned long return_address(int depth);
-
-#define ftrace_return_address(n) return_address(n)
+#define ftrace_return_address(n) __builtin_return_address(n)
 
 void _mcount(void);
 void ftrace_caller(void);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0332317..602af69 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -192,7 +192,7 @@
 struct mm_struct;
 struct seq_file;
 
-typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
 void dump_trace(dump_trace_func_t func, void *data,
 		struct task_struct *task, unsigned long sp);
 
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 02613ba..3066031 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -9,6 +9,9 @@
 #include <uapi/asm/unistd.h>
 
 #define __IGNORE_time
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 43446fa..c74c592 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -2014,12 +2014,12 @@
 			*ptr++ = '\t';
 		ptr += print_insn(ptr, code + start, addr);
 		start += opsize;
-		printk("%s", buffer);
+		pr_cont("%s", buffer);
 		ptr = buffer;
 		ptr += sprintf(ptr, "\n          ");
 		hops++;
 	}
-	printk("\n");
+	pr_cont("\n");
 }
 
 void print_fn_code(unsigned char *code, unsigned long len)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 6693383..55d4fe1 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,10 +38,10 @@
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
+		if (func(data, sf->gprs[8], 0))
+			return sp;
 		/* Follow the backchain. */
 		while (1) {
-			if (func(data, sf->gprs[8]))
-				return sp;
 			low = sp;
 			sp = sf->back_chain;
 			if (!sp)
@@ -49,6 +49,8 @@
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
+			if (func(data, sf->gprs[8], 1))
+				return sp;
 		}
 		/* Zero backchain detected, check for interrupt frame. */
 		sp = (unsigned long) (sf + 1);
@@ -56,7 +58,7 @@
 			return sp;
 		regs = (struct pt_regs *) sp;
 		if (!user_mode(regs)) {
-			if (func(data, regs->psw.addr))
+			if (func(data, regs->psw.addr, 1))
 				return sp;
 		}
 		low = sp;
@@ -85,33 +87,12 @@
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 
-struct return_address_data {
-	unsigned long address;
-	int depth;
-};
-
-static int __return_address(void *data, unsigned long address)
+static int show_address(void *data, unsigned long address, int reliable)
 {
-	struct return_address_data *rd = data;
-
-	if (rd->depth--)
-		return 0;
-	rd->address = address;
-	return 1;
-}
-
-unsigned long return_address(int depth)
-{
-	struct return_address_data rd = { .depth = depth + 2 };
-
-	dump_trace(__return_address, &rd, NULL, current_stack_pointer());
-	return rd.address;
-}
-EXPORT_SYMBOL_GPL(return_address);
-
-static int show_address(void *data, unsigned long address)
-{
-	printk("([<%016lx>] %pSR)\n", address, (void *)address);
+	if (reliable)
+		printk(" [<%016lx>] %pSR \n", address, (void *)address);
+	else
+		printk("([<%016lx>] %pSR)\n", address, (void *)address);
 	return 0;
 }
 
@@ -138,14 +119,14 @@
 		else
 			stack = (unsigned long *)task->thread.ksp;
 	}
+	printk(KERN_DEFAULT "Stack:\n");
 	for (i = 0; i < 20; i++) {
 		if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
 			break;
-		if ((i * sizeof(long) % 32) == 0)
-			printk("%s       ", i == 0 ? "" : "\n");
-		printk("%016lx ", *stack++);
+		if (i % 4 == 0)
+			printk(KERN_DEFAULT "       ");
+		pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' ');
 	}
-	printk("\n");
 	show_trace(task, (unsigned long)sp);
 }
 
@@ -163,13 +144,13 @@
 	mode = user_mode(regs) ? "User" : "Krnl";
 	printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
 	if (!user_mode(regs))
-		printk(" (%pSR)", (void *)regs->psw.addr);
-	printk("\n");
+		pr_cont(" (%pSR)", (void *)regs->psw.addr);
+	pr_cont("\n");
 	printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
 	       "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
 	       psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
-	printk(" RI:%x EA:%x", psw->ri, psw->eaba);
-	printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+	pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+	printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
 	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
 	printk("           %016lx %016lx %016lx %016lx\n",
 	       regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
@@ -205,14 +186,14 @@
 	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
 	       regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
+	pr_cont("PREEMPT ");
 #endif
 #ifdef CONFIG_SMP
-	printk("SMP ");
+	pr_cont("SMP ");
 #endif
 	if (debug_pagealloc_enabled())
-		printk("DEBUG_PAGEALLOC");
-	printk("\n");
+		pr_cont("DEBUG_PAGEALLOC");
+	pr_cont("\n");
 	notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
 	print_modules();
 	show_regs(regs);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 17431f6..955a7b6 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -222,7 +222,7 @@
 }
 arch_initcall(service_level_perf_register);
 
-static int __perf_callchain_kernel(void *data, unsigned long address)
+static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
 {
 	struct perf_callchain_entry_ctx *entry = data;
 
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 44f84b2..355db9d 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -27,12 +27,12 @@
 	return 1;
 }
 
-static int save_address(void *data, unsigned long address)
+static int save_address(void *data, unsigned long address, int reliable)
 {
 	return __save_address(data, address, 0);
 }
 
-static int save_address_nosched(void *data, unsigned long address)
+static int save_address_nosched(void *data, unsigned long address, int reliable)
 {
 	return __save_address(data, address, 1);
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 000e6e91..3667d20 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -62,9 +62,11 @@
 
 	. = ALIGN(PAGE_SIZE);
 	__start_ro_after_init = .;
+	__start_data_ro_after_init = .;
 	.data..ro_after_init : {
 		 *(.data..ro_after_init)
 	}
+	__end_data_ro_after_init = .;
 	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 1cab8a1..7a27eeb 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -119,8 +119,13 @@
 
 	vcpu->stat.exit_validity++;
 	trace_kvm_s390_intercept_validity(vcpu, viwhy);
-	WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
-	return -EOPNOTSUPP;
+	KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+		  current->pid, vcpu->kvm);
+
+	/* do not warn on invalid runtime instrumentation mode */
+	WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n",
+		  viwhy);
+	return -EINVAL;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index bd98b7d..05c98bb 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -315,7 +315,7 @@
 	if (r < 0)
 		goto out;
 
-	diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
 	if (!diag224_buf || diag224(diag224_buf))
 		goto out;
 
@@ -378,7 +378,7 @@
 	sctns->par.infpval1 |= PAR_WGHT_VLD;
 
 out:
-	kfree(diag224_buf);
+	free_page((unsigned long)diag224_buf);
 	vfree(diag204_buf);
 }
 
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index adb0c34..18d4107e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -266,7 +266,8 @@
 	/* Try to get the remaining pages with get_user_pages */
 	start += nr << PAGE_SHIFT;
 	pages += nr;
-	ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+	ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+				      write ? FOLL_WRITE : 0);
 	/* Have to be a bit careful with return values */
 	if (nr > 0)
 		ret = (ret < 0) ? nr : ret + nr;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index cd404aa..4a0c5bc 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -217,6 +217,7 @@
 	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 	} else {
+		hugetlb_bad_size();
 		pr_err("hugepagesz= specifies an unsupported page size %s\n",
 			string);
 		return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f56a39b..b3e9d18 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -151,36 +151,40 @@
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
-	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
+	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
-	unsigned long nr_pages;
-	int rc, zone_enum;
+	pg_data_t *pgdat = NODE_DATA(nid);
+	struct zone *zone;
+	int rc, i;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
 
-	while (size_pages > 0) {
-		if (start_pfn < dma_end_pfn) {
-			nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
-				   dma_end_pfn - start_pfn : size_pages;
-			zone_enum = ZONE_DMA;
-		} else if (start_pfn < normal_end_pfn) {
-			nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
-				   normal_end_pfn - start_pfn : size_pages;
-			zone_enum = ZONE_NORMAL;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		zone = pgdat->node_zones + i;
+		if (zone_idx(zone) != ZONE_MOVABLE) {
+			/* Add range within existing zone limits, if possible */
+			zone_start_pfn = zone->zone_start_pfn;
+			zone_end_pfn = zone->zone_start_pfn +
+				       zone->spanned_pages;
 		} else {
-			nr_pages = size_pages;
-			zone_enum = ZONE_MOVABLE;
+			/* Add remaining range to ZONE_MOVABLE */
+			zone_start_pfn = start_pfn;
+			zone_end_pfn = start_pfn + size_pages;
 		}
-		rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
-				 start_pfn, size_pages);
+		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
+			continue;
+		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
+			   zone_end_pfn - start_pfn : size_pages;
+		rc = __add_pages(nid, zone, start_pfn, nr_pages);
 		if (rc)
 			break;
 		start_pfn += nr_pages;
 		size_pages -= nr_pages;
+		if (!size_pages)
+			break;
 	}
 	if (rc)
 		vmem_remove_mapping(start, size);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 16f4c39..9a4de45 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <asm/processor.h>
 
-static int __s390_backtrace(void *data, unsigned long address)
+static int __s390_backtrace(void *data, unsigned long address, int reliable)
 {
 	unsigned int *depth = data;
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 7350c8b..6b2f72f 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -423,7 +423,7 @@
 	dma_addr_t dma_addr_base, dma_addr;
 	int flags = ZPCI_PTE_VALID;
 	struct scatterlist *s;
-	unsigned long pa;
+	unsigned long pa = 0;
 	int ret;
 
 	size = PAGE_ALIGN(size);
diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c
index 5583618..4f7314d 100644
--- a/arch/score/kernel/ptrace.c
+++ b/arch/score/kernel/ptrace.c
@@ -131,7 +131,7 @@
 {
 	int copied;
 
-	copied = access_process_vm(child, addr, res, sizeof(*res), 0);
+	copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE);
 
 	return copied != sizeof(*res) ? -EIO : 0;
 }
@@ -142,7 +142,7 @@
 {
 	int copied;
 
-	copied = access_process_vm(child, addr, res, sizeof(*res), 0);
+	copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE);
 
 	return copied != sizeof(*res) ? -EIO : 0;
 }
@@ -153,7 +153,8 @@
 {
 	int copied;
 
-	copied = access_process_vm(child, addr, &val, sizeof(val), 1);
+	copied = access_process_vm(child, addr, &val, sizeof(val),
+			FOLL_FORCE | FOLL_WRITE);
 
 	return copied != sizeof(val) ? -EIO : 0;
 }
@@ -164,7 +165,8 @@
 {
 	int copied;
 
-	copied = access_process_vm(child, addr, &val, sizeof(val), 1);
+	copied = access_process_vm(child, addr, &val, sizeof(val),
+			FOLL_FORCE | FOLL_WRITE);
 
 	return copied != sizeof(val) ? -EIO : 0;
 }
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 0047666..336f33a 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -31,7 +31,7 @@
 endif
 
 cflags-$(CONFIG_CPU_SH2)		:= $(call cc-option,-m2,)
-cflags-$(CONFIG_CPU_J2)			:= $(call cc-option,-mj2,)
+cflags-$(CONFIG_CPU_J2)			+= $(call cc-option,-mj2,)
 cflags-$(CONFIG_CPU_SH2A)		+= $(call cc-option,-m2a,) \
 					   $(call cc-option,-m2a-nofpu,) \
 					   $(call cc-option,-m4-nofpu,)
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index e9c2c42..4e21949 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -22,6 +22,16 @@
 	  have sufficient driver coverage to use this option; do not
 	  select it if you are using original SuperH hardware.
 
+config SH_JCORE_SOC
+	bool "J-Core SoC"
+	depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
+	select CLKSRC_JCORE_PIT
+	select JCORE_AIC
+	default y if CPU_J2
+	help
+	  Select this option to include drivers core components of the
+	  J-Core SoC, including interrupt controllers and timers.
+
 config SH_SOLUTION_ENGINE
 	bool "SolutionEngine"
 	select SOLUTION_ENGINE
diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig
index 94d1eca..2eb81ebe 100644
--- a/arch/sh/configs/j2_defconfig
+++ b/arch/sh/configs/j2_defconfig
@@ -8,6 +8,7 @@
 CONFIG_MEMORY_SIZE=0x04000000
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_SH_DEVICE_TREE=y
+CONFIG_SH_JCORE_SOC=y
 CONFIG_HZ_100=y
 CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttyUL0 earlycon"
@@ -20,6 +21,7 @@
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_NETDEVICES=y
+CONFIG_SERIAL_EARLYCON=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
 CONFIG_I2C=y
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 40fa6c8..063c298 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -258,7 +258,8 @@
 		pages += nr;
 
 		ret = get_user_pages_unlocked(start,
-			(end - start) >> PAGE_SHIFT, write, 0, pages);
+			(end - start) >> PAGE_SHIFT, pages,
+			write ? FOLL_WRITE : 0);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index a6cfdab..5b0ed48 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -24,9 +24,10 @@
 	unsigned int	icache_line_size;
 	unsigned int	ecache_size;
 	unsigned int	ecache_line_size;
-	unsigned short	sock_id;
+	unsigned short	sock_id;	/* physical package */
 	unsigned short	core_id;
-	int		proc_id;
+	unsigned short  max_cache_id;	/* groupings of highest shared cache */
+	unsigned short	proc_id;	/* strand (aka HW thread) id */
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index d9c5876..8011e79 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -134,7 +134,7 @@
 	*(volatile __u32 *)&lp->lock = ~0U;
 }
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
 	__asm__ __volatile__(
 "	st		%%g0, [%0]"
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 87990b7..07c9f2e 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -96,7 +96,7 @@
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
-static void inline arch_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -119,7 +119,7 @@
 	: "memory");
 }
 
-static int inline arch_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 	int tmp1, tmp2;
 
@@ -140,7 +140,7 @@
 	return tmp1;
 }
 
-static void inline arch_read_unlock(arch_rwlock_t *lock)
+static inline void arch_read_unlock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
@@ -156,7 +156,7 @@
 	: "memory");
 }
 
-static void inline arch_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2;
 
@@ -181,7 +181,7 @@
 	: "memory");
 }
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stw		%%g0, [%0]"
@@ -190,7 +190,7 @@
 	: "memory");
 }
 
-static int inline arch_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2, result;
 
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index bec481a..7b4898a 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -44,14 +44,20 @@
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
 #define topology_core_cpumask(cpu)		(&cpu_core_sib_map[cpu])
+#define topology_core_cache_cpumask(cpu)	(&cpu_core_sib_cache_map[cpu])
 #define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 #endif /* CONFIG_SMP */
 
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern cpumask_t cpu_core_sib_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
+
+/**
+ * Return cores that shares the last level cache.
+ */
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 {
-        return &cpu_core_map[cpu];
+	return &cpu_core_sib_cache_map[cpu];
 }
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index b68acc5..5373136 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -82,7 +82,6 @@
 	return 1;
 }
 
-void __ret_efault(void);
 void __retl_efault(void);
 
 /* Uh, these should become the main single-value transfer routines..
@@ -189,55 +188,34 @@
 unsigned long __must_check ___copy_from_user(void *to,
 					     const void __user *from,
 					     unsigned long size);
-unsigned long copy_from_user_fixup(void *to, const void __user *from,
-				   unsigned long size);
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
-	unsigned long ret;
-
 	check_object_size(to, size, false);
 
-	ret = ___copy_from_user(to, from, size);
-	if (unlikely(ret))
-		ret = copy_from_user_fixup(to, from, size);
-
-	return ret;
+	return ___copy_from_user(to, from, size);
 }
 #define __copy_from_user copy_from_user
 
 unsigned long __must_check ___copy_to_user(void __user *to,
 					   const void *from,
 					   unsigned long size);
-unsigned long copy_to_user_fixup(void __user *to, const void *from,
-				 unsigned long size);
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
-	unsigned long ret;
-
 	check_object_size(from, size, true);
 
-	ret = ___copy_to_user(to, from, size);
-	if (unlikely(ret))
-		ret = copy_to_user_fixup(to, from, size);
-	return ret;
+	return ___copy_to_user(to, from, size);
 }
 #define __copy_to_user copy_to_user
 
 unsigned long __must_check ___copy_in_user(void __user *to,
 					   const void __user *from,
 					   unsigned long size);
-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
-				 unsigned long size);
 static inline unsigned long __must_check
 copy_in_user(void __user *to, void __user *from, unsigned long size)
 {
-	unsigned long ret = ___copy_in_user(to, from, size);
-
-	if (unlikely(ret))
-		ret = copy_in_user_fixup(to, from, size);
-	return ret;
+	return ___copy_in_user(to, from, size);
 }
 #define __copy_in_user copy_in_user
 
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index beba6c1..6aa3da1 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -926,48 +926,11 @@
 EXPORT_SYMBOL(tlb_type)
 	.section	".fixup",#alloc,#execinstr
 
-	.globl	__ret_efault, __retl_efault, __ret_one, __retl_one
-ENTRY(__ret_efault)
-	ret
-	 restore %g0, -EFAULT, %o0
-ENDPROC(__ret_efault)
-EXPORT_SYMBOL(__ret_efault)
-
 ENTRY(__retl_efault)
 	retl
 	 mov	-EFAULT, %o0
 ENDPROC(__retl_efault)
 
-ENTRY(__retl_one)
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one)
-
-ENTRY(__retl_one_fp)
-	VISExitHalf
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_fp)
-
-ENTRY(__ret_one_asi)
-	wr	%g0, ASI_AIUS, %asi
-	ret
-	 restore %g0, 1, %o0
-ENDPROC(__ret_one_asi)
-
-ENTRY(__retl_one_asi)
-	wr	%g0, ASI_AIUS, %asi
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_asi)
-
-ENTRY(__retl_one_asi_fp)
-	wr	%g0, ASI_AIUS, %asi
-	VISExitHalf
-	retl
-	 mov	1, %o0
-ENDPROC(__retl_one_asi_fp)
-
 ENTRY(__retl_o1)
 	retl
 	 mov	%o1, %o0
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 59bbeff..07933b9 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -13,19 +13,30 @@
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 {
-	u32 val;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
+	u32 val;
 
 	if (type == JUMP_LABEL_JMP) {
 		s32 off = (s32)entry->target - (s32)entry->code;
+		bool use_v9_branch = false;
+
+		BUG_ON(off & 3);
 
 #ifdef CONFIG_SPARC64
-		/* ba,pt %xcc, . + (off << 2) */
-		val = 0x10680000 | ((u32) off >> 2);
-#else
-		/* ba . + (off << 2) */
-		val = 0x10800000 | ((u32) off >> 2);
+		if (off <= 0xfffff && off >= -0x100000)
+			use_v9_branch = true;
 #endif
+		if (use_v9_branch) {
+			/* WDISP19 - target is . + immed << 2 */
+			/* ba,pt %xcc, . + off */
+			val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
+		} else {
+			/* WDISP22 - target is . + immed << 2 */
+			BUG_ON(off > 0x7fffff);
+			BUG_ON(off < -0x800000);
+			/* ba . + off */
+			val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
+		}
 	} else {
 		val = 0x01000000;
 	}
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 1122886..8a6982d 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -645,13 +645,20 @@
 		cpu_data(*id).core_id = core_id;
 }
 
-static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
-			   int sock_id)
+static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
+				int max_cache_id)
 {
 	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
 
-	if (*id < num_possible_cpus())
-		cpu_data(*id).sock_id = sock_id;
+	if (*id < num_possible_cpus()) {
+		cpu_data(*id).max_cache_id = max_cache_id;
+
+		/**
+		 * On systems without explicit socket descriptions socket
+		 * is max_cache_id
+		 */
+		cpu_data(*id).sock_id = max_cache_id;
+	}
 }
 
 static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
@@ -660,10 +667,11 @@
 	find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
 }
 
-static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
-			  int sock_id)
+static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
+			       int max_cache_id)
 {
-	find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+	find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
+			     max_cache_id, 10);
 }
 
 static void set_core_ids(struct mdesc_handle *hp)
@@ -694,14 +702,15 @@
 	}
 }
 
-static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
 {
 	u64 mp;
 	int idx = 1;
 	int fnd = 0;
 
-	/* Identify unique sockets by looking for cpus backpointed to by
-	 * shared level n caches.
+	/**
+	 * Identify unique highest level of shared cache by looking for cpus
+	 * backpointed to by shared level N caches.
 	 */
 	mdesc_for_each_node_by_name(hp, mp, "cache") {
 		const u64 *cur_lvl;
@@ -709,8 +718,7 @@
 		cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
 		if (*cur_lvl != level)
 			continue;
-
-		mark_sock_ids(hp, mp, idx);
+		mark_max_cache_ids(hp, mp, idx);
 		idx++;
 		fnd = 1;
 	}
@@ -745,15 +753,17 @@
 {
 	u64 mp;
 
-	/* If machine description exposes sockets data use it.
-	 * Otherwise fallback to use shared L3 or L2 caches.
+	/**
+	 * Find the highest level of shared cache which pre-T7 is also
+	 * the socket.
 	 */
+	if (!set_max_cache_ids_by_cache(hp, 3))
+		set_max_cache_ids_by_cache(hp, 2);
+
+	/* If machine description exposes sockets data use it.*/
 	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
 	if (mp != MDESC_NODE_NULL)
-		return set_sock_ids_by_socket(hp, mp);
-
-	if (!set_sock_ids_by_cache(hp, 3))
-		set_sock_ids_by_cache(hp, 2);
+		set_sock_ids_by_socket(hp, mp);
 }
 
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 9ddc492..ac082dd 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -127,7 +127,8 @@
 		if (copy_from_user(kbuf, (void __user *) uaddr, len))
 			return -EFAULT;
 	} else {
-		int len2 = access_process_vm(target, uaddr, kbuf, len, 0);
+		int len2 = access_process_vm(target, uaddr, kbuf, len,
+				FOLL_FORCE);
 		if (len2 != len)
 			return -EFAULT;
 	}
@@ -141,7 +142,8 @@
 		if (copy_to_user((void __user *) uaddr, kbuf, len))
 			return -EFAULT;
 	} else {
-		int len2 = access_process_vm(target, uaddr, kbuf, len, 1);
+		int len2 = access_process_vm(target, uaddr, kbuf, len,
+				FOLL_FORCE | FOLL_WRITE);
 		if (len2 != len)
 			return -EFAULT;
 	}
@@ -505,7 +507,8 @@
 				if (access_process_vm(target,
 						      (unsigned long)
 						      &reg_window[pos],
-						      k, sizeof(*k), 0)
+						      k, sizeof(*k),
+						      FOLL_FORCE)
 				    != sizeof(*k))
 					return -EFAULT;
 				k++;
@@ -531,12 +534,14 @@
 				if (access_process_vm(target,
 						      (unsigned long)
 						      &reg_window[pos],
-						      &reg, sizeof(reg), 0)
+						      &reg, sizeof(reg),
+						      FOLL_FORCE)
 				    != sizeof(reg))
 					return -EFAULT;
 				if (access_process_vm(target,
 						      (unsigned long) u,
-						      &reg, sizeof(reg), 1)
+						      &reg, sizeof(reg),
+						      FOLL_FORCE | FOLL_WRITE)
 				    != sizeof(reg))
 					return -EFAULT;
 				pos++;
@@ -615,7 +620,8 @@
 						      (unsigned long)
 						      &reg_window[pos],
 						      (void *) k,
-						      sizeof(*k), 1)
+						      sizeof(*k),
+						      FOLL_FORCE | FOLL_WRITE)
 				    != sizeof(*k))
 					return -EFAULT;
 				k++;
@@ -642,13 +648,15 @@
 				if (access_process_vm(target,
 						      (unsigned long)
 						      u,
-						      &reg, sizeof(reg), 0)
+						      &reg, sizeof(reg),
+						      FOLL_FORCE)
 				    != sizeof(reg))
 					return -EFAULT;
 				if (access_process_vm(target,
 						      (unsigned long)
 						      &reg_window[pos],
-						      &reg, sizeof(reg), 1)
+						      &reg, sizeof(reg),
+						      FOLL_FORCE | FOLL_WRITE)
 				    != sizeof(reg))
 					return -EFAULT;
 				pos++;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..8182f7c 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -63,9 +63,13 @@
 cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
 
 static cpumask_t smp_commenced_mask;
 
@@ -1265,6 +1269,10 @@
 		unsigned int j;
 
 		for_each_present_cpu(j)  {
+			if (cpu_data(i).max_cache_id ==
+			    cpu_data(j).max_cache_id)
+				cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
 			if (cpu_data(i).sock_id == cpu_data(j).sock_id)
 				cpumask_set_cpu(j, &cpu_core_sib_map[i]);
 		}
diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
index b7d0bd6..69a439f 100644
--- a/arch/sparc/lib/GENcopy_from_user.S
+++ b/arch/sparc/lib/GENcopy_from_user.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
index 780550e..9947427 100644
--- a/arch/sparc/lib/GENcopy_to_user.S
+++ b/arch/sparc/lib/GENcopy_to_user.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
index 89358ee..059ea24 100644
--- a/arch/sparc/lib/GENmemcpy.S
+++ b/arch/sparc/lib/GENmemcpy.S
@@ -4,21 +4,18 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #define GLOBAL_SPARE	%g7
 #else
 #define GLOBAL_SPARE	%g5
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST(x,y)	x
 #endif
 
 #ifndef LOAD
@@ -45,6 +42,29 @@
 	.register	%g3,#scratch
 
 	.text
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+ENTRY(GEN_retl_o4_1)
+	add	%o4, %o2, %o4
+	retl
+	 add	%o4, 1, %o0
+ENDPROC(GEN_retl_o4_1)
+ENTRY(GEN_retl_g1_8)
+	add	%g1, %o2, %g1
+	retl
+	 add	%g1, 8, %o0
+ENDPROC(GEN_retl_g1_8)
+ENTRY(GEN_retl_o2_4)
+	retl
+	 add	%o2, 4, %o0
+ENDPROC(GEN_retl_o2_4)
+ENTRY(GEN_retl_o2_1)
+	retl
+	 add	%o2, 1, %o0
+ENDPROC(GEN_retl_o2_1)
+#endif
+
 	.align		64
 
 	.globl	FUNC_NAME
@@ -73,8 +93,8 @@
 	sub		%g0, %o4, %o4
 	sub		%o2, %o4, %o2
 1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
+	EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
 	add		%o1, 1, %o1
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
@@ -82,8 +102,8 @@
 	andn		%o2, 0x7, %g1
 	sub		%o2, %g1, %o2
 1:	subcc		%g1, 0x8, %g1
-	EX_LD(LOAD(ldx, %o1, %g2))
-	EX_ST(STORE(stx, %g2, %o0))
+	EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
+	EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
 	add		%o1, 0x8, %o1
 	bne,pt		%XCC, 1b
 	 add		%o0, 0x8, %o0
@@ -100,8 +120,8 @@
 
 1:
 	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1))
-	EX_ST(STORE(stw, %g1, %o1 + %o3))
+	EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
+	EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 
@@ -111,8 +131,8 @@
 	.align		32
 90:
 	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o1 + %o3))
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
+	EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	retl
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 885f00e..69912d2 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -38,7 +38,7 @@
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 obj-$(CONFIG_SPARC64) += iomap.o
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
index d5242b8..b79a699 100644
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
index 4e962d9..dcec55f 100644
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index d5f585d..c629dbd 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE	%g7
@@ -32,21 +33,17 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 
 #ifndef LOAD
@@ -140,45 +137,110 @@
 	fsrc2		%x6, %f12; \
 	fsrc2		%x7, %f14;
 #define FREG_LOAD_1(base, x0) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
 #define FREG_LOAD_2(base, x0, x1) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_3(base, x0, x1, x2) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
-	EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
 
 	.register	%g2,#scratch
 	.register	%g3,#scratch
 
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_fp:
+	VISExitHalf
+__restore_asi:
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+ENTRY(NG2_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(NG2_retl_o2)
+ENTRY(NG2_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(NG2_retl_o2_plus_1)
+ENTRY(NG2_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(NG2_retl_o2_plus_4)
+ENTRY(NG2_retl_o2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(NG2_retl_o2_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_1)
+	add	%o4, 1, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_1)
+ENTRY(NG2_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_16)
+ENTRY(NG2_retl_o2_plus_g1_fp)
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
+	add	%g1, 64, %g1
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_1)
+ENTRY(NG2_retl_o2_and_7_plus_o4)
+	and	%o2, 7, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4)
+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
+	and	%o2, 7, %o2
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
+#endif
+
 	.align		64
 
 	.globl	FUNC_NAME
@@ -230,8 +292,8 @@
 	sub		%g0, %o4, %o4	! bytes to align dst
 	sub		%o2, %o4, %o2
 1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
 	add		%o1, 1, %o1
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
@@ -281,11 +343,11 @@
 	 nop
 	/* fall through for 0 < low bits < 8 */
 110:	sub		%o4, 64, %g2
-	EX_LD_FP(LOAD_BLK(%g2, %f0))
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+	EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -296,10 +358,10 @@
 
 120:	sub		%o4, 56, %g2
 	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -310,10 +372,10 @@
 
 130:	sub		%o4, 48, %g2
 	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -324,10 +386,10 @@
 
 140:	sub		%o4, 40, %g2
 	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_5(f22, f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -338,10 +400,10 @@
 
 150:	sub		%o4, 32, %g2
 	FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_4(f24, f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -352,10 +414,10 @@
 
 160:	sub		%o4, 24, %g2
 	FREG_LOAD_3(%g2, f0, f2, f4)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_3(f26, f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -366,10 +428,10 @@
 
 170:	sub		%o4, 16, %g2
 	FREG_LOAD_2(%g2, f0, f2)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_2(f28, f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -380,10 +442,10 @@
 
 180:	sub		%o4, 8, %g2
 	FREG_LOAD_1(%g2, f0)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-	EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
 	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	FREG_MOVE_1(f30)
 	subcc		%g1, 64, %g1
 	add		%o4, 64, %o4
@@ -393,10 +455,10 @@
 	 nop
 
 190:
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
 	subcc		%g1, 64, %g1
-	EX_LD_FP(LOAD_BLK(%o4, %f0))
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+	EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
 	add		%o4, 64, %o4
 	bne,pt		%xcc, 1b
 	 LOAD(prefetch, %o4 + 64, #one_read)
@@ -423,28 +485,28 @@
 	andn		%o2, 0xf, %o4
 	and		%o2, 0xf, %o2
 1:	subcc		%o4, 0x10, %o4
-	EX_LD(LOAD(ldx, %o1, %o5))
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
 	add		%o1, 0x08, %o1
-	EX_LD(LOAD(ldx, %o1, %g1))
+	EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
 	sub		%o1, 0x08, %o1
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
 	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
+	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x8, %o1
 73:	andcc		%o2, 0x8, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%o2, 0x8, %o2
-	EX_LD(LOAD(ldx, %o1, %o5))
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
 	add		%o1, 0x8, %o1
 1:	andcc		%o2, 0x4, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%o2, 0x4, %o2
-	EX_LD(LOAD(lduw, %o1, %o5))
-	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
 	add		%o1, 0x4, %o1
 1:	cmp		%o2, 0
 	be,pt		%XCC, 85f
@@ -460,8 +522,8 @@
 	sub		%o2, %g1, %o2
 
 1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %o1, %o5))
-	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
 	bgu,pt		%icc, 1b
 	 add		%o1, 1, %o1
 
@@ -477,16 +539,16 @@
 
 8:	mov		64, GLOBAL_SPARE
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2))
+	EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
 	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
 	andn		%o2, 0x7, %o4
 	sllx		%g2, %g1, %g2
 1:	add		%o1, 0x8, %o1
-	EX_LD(LOAD(ldx, %o1, %g3))
+	EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
 	subcc		%o4, 0x8, %o4
 	srlx		%g3, GLOBAL_SPARE, %o5
 	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0))
+	EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
@@ -506,8 +568,8 @@
 
 1:
 	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1))
-	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 
@@ -517,8 +579,8 @@
 	.align		32
 90:
 	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
+	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	retl
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
index 2e8ee7a..16a286c 100644
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x, y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
index be0bf45..6b0276f 100644
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_asi_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 8e13ee1..75bb93b 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE	%g7
@@ -46,22 +47,19 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-#endif
 
 #ifndef LOAD
 #define LOAD(type,addr,dest)	type [addr], dest
@@ -94,6 +92,158 @@
 	.register	%g3,#scratch
 
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_asi_fp:
+	VISExitHalf
+__restore_asi:
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+
+ENTRY(NG4_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+	add	%o5, 4, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+	add	%o5, 8, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+	add	%o5, 16, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+	add	%o5, 24, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+	add	%o5, 32, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+	add	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
+#endif
 	.align		64
 
 	.globl	FUNC_NAME
@@ -124,12 +274,13 @@
 	brz,pt		%g1, 51f
 	 sub		%o2, %g1, %o2
 
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 
 51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
 	LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@
 	brz,pt		%g1, .Llarge_aligned
 	 sub		%o2, %g1, %o2
 
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 8, %o1
 	subcc		%g1, 8, %g1
 	add		%o0, 8, %o0
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g2, %o0 - 0x08))
+	 EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
 
 .Llarge_aligned:
 	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
 	andn		%o2, 0x3f, %o4
 	sub		%o2, %o4, %o2
 
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
 	add		%o1, 0x40, %o1
-	EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+	EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
 	subcc		%o4, 0x40, %o4
-	EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
-	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
-	EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
-	EX_ST(STORE_INIT(%g1, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
+	EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0))
+	EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
-	EX_ST(STORE_INIT(%g3, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
+	EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
 	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
-	EX_ST(STORE_INIT(%o5, %o0))
+	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
+	EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0))
+	EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g3, %o0))
+	EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
 	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x08, %o0
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@
 	sub		%o2, %o4, %o2
 	alignaddr	%o1, %g0, %g1
 	add		%o1, %o4, %o1
-	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
-1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
+1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
 	subcc		%o4, 0x40, %o4
-	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
-	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
 	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
+	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
 	faligndata	%f2, %f4, %f18
 	add		%g1, 0x40, %g1
 	faligndata	%f4, %f6, %f20
@@ -235,14 +386,14 @@
 	faligndata	%f10, %f12, %f26
 	faligndata	%f12, %f14, %f28
 	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
-	EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
-	EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
-	EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
-	EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
-	EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
-	EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
-	EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
+	EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
+	EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
+	EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
+	EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
+	EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
+	EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
+	EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
+	EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x40, %o0
 	bne,pt		%icc, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@
 	andncc		%o2, 0x20 - 1, %o5
 	be,pn		%icc, 2f
 	 sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
-	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
-	EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
 	add		%o1, 0x20, %o1
 	subcc		%o5, 0x20, %o5
-	EX_ST(STORE(stx, %g1, %o0 + 0x00))
-	EX_ST(STORE(stx, %g2, %o0 + 0x08))
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
-	EX_ST(STORE(stx, %o4, %o0 + 0x18))
+	EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
+	EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
 	bne,pt		%icc, 1b
 	 add		%o0, 0x20, %o0
 2:	andcc		%o2, 0x18, %o5
 	be,pt		%icc, 3f
 	 sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	add		%o1, 0x08, %o1
 	add		%o0, 0x08, %o0
 	subcc		%o5, 0x08, %o5
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g1, %o0 - 0x08))
+	 EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
 3:	brz,pt		%o2, .Lexit
 	 cmp		%o2, 0x04
 	bl,pn		%icc, .Ltiny
 	 nop
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 0x04, %o1
 	add		%o0, 0x04, %o0
 	subcc		%o2, 0x04, %o2
 	bne,pn		%icc, .Ltiny
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04))
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
 	ba,a,pt		%icc, .Lexit
 .Lmedium_unaligned:
 	/* First get dest 8 byte aligned.  */
@@ -309,12 +461,12 @@
 	brz,pt		%g1, 2f
 	 sub		%o2, %g1, %o2
 
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 2:
 	and		%o1, 0x7, %g1
 	brz,pn		%g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@
 	mov		64, %g2
 	sub		%g2, %g1, %g2
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
 	sllx		%o4, %g1, %o4
 	andn		%o2, 0x08 - 1, %o5
 	sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
 	add		%o1, 0x08, %o1
 	subcc		%o5, 0x08, %o5
 	srlx		%g3, %g2, GLOBAL_SPARE
 	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
 	add		%o0, 0x08, %o0
 	bne,pt		%icc, 1b
 	 sllx		%g3, %g1, %o4
@@ -342,17 +494,17 @@
 	ba,pt		%icc, .Lsmall_unaligned
 
 .Ltiny:
-	EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
 	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x00))
-	EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
 	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x01))
-	EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
 	ba,pt		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x02))
+	 EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
 
 .Lsmall:
 	andcc		%g2, 0x3, %g0
@@ -360,22 +512,22 @@
 	 andn		%o2, 0x4 - 1, %o5
 	sub		%o2, %o5, %o2
 1:
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	add		%o1, 0x04, %o1
 	subcc		%o5, 0x04, %o5
 	add		%o0, 0x04, %o0
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04))
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
 	brz,pt		%o2, .Lexit
 	 nop
 	ba,a,pt		%icc, .Ltiny
 
 .Lsmall_unaligned:
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 1, %o1
 	add		%o0, 1, %o0
 	subcc		%o2, 1, %o2
 	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g1, %o0 - 0x01))
+	 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
 	ba,a,pt		%icc, .Lexit
 	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
index 5d1e4d1..9cd42fc 100644
--- a/arch/sparc/lib/NGcopy_from_user.S
+++ b/arch/sparc/lib/NGcopy_from_user.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __ret_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
index ff630dc..5c358af 100644
--- a/arch/sparc/lib/NGcopy_to_user.S
+++ b/arch/sparc/lib/NGcopy_to_user.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __ret_one_asi;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
index 96a14ca..d88c4ed 100644
--- a/arch/sparc/lib/NGmemcpy.S
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/thread_info.h>
 #define GLOBAL_SPARE	%g7
@@ -27,15 +28,11 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST(x,y)	x
 #endif
 
 #ifndef LOAD
@@ -79,6 +76,92 @@
 	.register	%g3,#scratch
 
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_asi:
+	ret
+	wr	%g0, ASI_AIUS, %asi
+	 restore
+ENTRY(NG_ret_i2_plus_i4_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i5, %i0
+ENDPROC(NG_ret_i2_plus_i4_plus_1)
+ENTRY(NG_ret_i2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1)
+ENTRY(NG_ret_i2_plus_g1_minus_8)
+	sub	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_8)
+ENTRY(NG_ret_i2_plus_g1_minus_16)
+	sub	%g1, 16, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_16)
+ENTRY(NG_ret_i2_plus_g1_minus_24)
+	sub	%g1, 24, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_24)
+ENTRY(NG_ret_i2_plus_g1_minus_32)
+	sub	%g1, 32, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_32)
+ENTRY(NG_ret_i2_plus_g1_minus_40)
+	sub	%g1, 40, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_40)
+ENTRY(NG_ret_i2_plus_g1_minus_48)
+	sub	%g1, 48, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_48)
+ENTRY(NG_ret_i2_plus_g1_minus_56)
+	sub	%g1, 56, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_56)
+ENTRY(NG_ret_i2_plus_i4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4)
+ENTRY(NG_ret_i2_plus_i4_minus_8)
+	sub	%i4, 8, %i4
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4_minus_8)
+ENTRY(NG_ret_i2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 8, %i0
+ENDPROC(NG_ret_i2_plus_8)
+ENTRY(NG_ret_i2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 4, %i0
+ENDPROC(NG_ret_i2_plus_4)
+ENTRY(NG_ret_i2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 1, %i0
+ENDPROC(NG_ret_i2_plus_1)
+ENTRY(NG_ret_i2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_plus_1)
+ENTRY(NG_ret_i2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%i2, %i0
+ENDPROC(NG_ret_i2)
+ENTRY(NG_ret_i2_and_7_plus_i4)
+	and	%i2, 7, %i2
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_and_7_plus_i4)
+#endif
+
 	.align		64
 
 	.globl	FUNC_NAME
@@ -126,8 +209,8 @@
 	sub		%g0, %i4, %i4	! bytes to align dst
 	sub		%i2, %i4, %i2
 1:	subcc		%i4, 1, %i4
-	EX_LD(LOAD(ldub, %i1, %g1))
-	EX_ST(STORE(stb, %g1, %o0))
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
 	add		%i1, 1, %i1
 	bne,pt		%XCC, 1b
 	add		%o0, 1, %o0
@@ -160,7 +243,7 @@
 	and		%i4, 0x7, GLOBAL_SPARE
 	sll		GLOBAL_SPARE, 3, GLOBAL_SPARE
 	mov		64, %i5
-	EX_LD(LOAD_TWIN(%i1, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
 	sub		%i5, GLOBAL_SPARE, %i5
 	mov		16, %o4
 	mov		32, %o5
@@ -178,31 +261,31 @@
 	srlx		WORD3, PRE_SHIFT, TMP; \
 	or		WORD2, TMP, WORD2;
 
-8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 
-	EX_ST(STORE_INIT(%g2, %o0 + 0x00))
-	EX_ST(STORE_INIT(%g3, %o0 + 0x08))
+	EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%g2, %o0 + 0x20))
-	EX_ST(STORE_INIT(%g3, %o0 + 0x28))
+	EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
 	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 8b
@@ -211,31 +294,31 @@
 	ba,pt		%XCC, 60f
 	 add		%i1, %i4, %i1
 
-9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 	LOAD(prefetch, %i1 + %i3, #one_read)
 
-	EX_ST(STORE_INIT(%g3, %o0 + 0x00))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+	EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-	EX_ST(STORE_INIT(%g2, %o0 + 0x18))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%g3, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+	EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
 	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-	EX_ST(STORE_INIT(%g2, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 9b
@@ -249,25 +332,25 @@
 	 * one twin load ahead, then add 8 back into source when
 	 * we finish the loop.
 	 */
-	EX_LD(LOAD_TWIN(%i1, %o4, %o5))
+	EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
 	mov	16, %o7
 	mov	32, %g2
 	mov	48, %g3
 	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o5, %o0 + 0x00))	! initializes cache line
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08))
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o4, %o0 + 0x18))
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
-	EX_ST(STORE_INIT(%o5, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28))
-	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
+	EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
 	add		%i1, 64, %i1
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o4, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 	subcc		%g1, 64, %g1
 	bne,pt		%XCC, 1b
 	 add		%o0, 64, %o0
@@ -282,20 +365,20 @@
 	mov	32, %g2
 	mov	48, %g3
 	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
 	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o4, %o0 + 0x00))	! initializes cache line
-	EX_ST(STORE_INIT(%o5, %o0 + 0x08))
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18))
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+	EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
 	add	%i1, 64, %i1
-	EX_ST(STORE_INIT(%o4, %o0 + 0x20))
-	EX_ST(STORE_INIT(%o5, %o0 + 0x28))
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+	EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 	subcc	%g1, 64, %g1
 	bne,pt	%XCC, 1b
 	 add	%o0, 64, %o0
@@ -321,28 +404,28 @@
 	andn		%i2, 0xf, %i4
 	and		%i2, 0xf, %i2
 1:	subcc		%i4, 0x10, %i4
-	EX_LD(LOAD(ldx, %i1, %o4))
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
 	add		%i1, 0x08, %i1
-	EX_LD(LOAD(ldx, %i1, %g1))
+	EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
 	sub		%i1, 0x08, %i1
-	EX_ST(STORE(stx, %o4, %i1 + %i3))
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
 	add		%i1, 0x8, %i1
-	EX_ST(STORE(stx, %g1, %i1 + %i3))
+	EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
 	bgu,pt		%XCC, 1b
 	 add		%i1, 0x8, %i1
 73:	andcc		%i2, 0x8, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%i2, 0x8, %i2
-	EX_LD(LOAD(ldx, %i1, %o4))
-	EX_ST(STORE(stx, %o4, %i1 + %i3))
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
 	add		%i1, 0x8, %i1
 1:	andcc		%i2, 0x4, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%i2, 0x4, %i2
-	EX_LD(LOAD(lduw, %i1, %i5))
-	EX_ST(STORE(stw, %i5, %i1 + %i3))
+	EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
 	add		%i1, 0x4, %i1
 1:	cmp		%i2, 0
 	be,pt		%XCC, 85f
@@ -358,8 +441,8 @@
 	sub		%i2, %g1, %i2
 
 1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %i1, %i5))
-	EX_ST(STORE(stb, %i5, %i1 + %i3))
+	EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
 	bgu,pt		%icc, 1b
 	 add		%i1, 1, %i1
 
@@ -375,16 +458,16 @@
 
 8:	mov		64, %i3
 	andn		%i1, 0x7, %i1
-	EX_LD(LOAD(ldx, %i1, %g2))
+	EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
 	sub		%i3, %g1, %i3
 	andn		%i2, 0x7, %i4
 	sllx		%g2, %g1, %g2
 1:	add		%i1, 0x8, %i1
-	EX_LD(LOAD(ldx, %i1, %g3))
+	EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
 	subcc		%i4, 0x8, %i4
 	srlx		%g3, %i3, %i5
 	or		%i5, %g2, %i5
-	EX_ST(STORE(stx, %i5, %o0))
+	EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
@@ -404,8 +487,8 @@
 
 1:
 	subcc		%i2, 4, %i2
-	EX_LD(LOAD(lduw, %i1, %g1))
-	EX_ST(STORE(stw, %g1, %i1 + %i3))
+	EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
 	bgu,pt		%XCC, 1b
 	 add		%i1, 4, %i1
 
@@ -415,8 +498,8 @@
 	.align		32
 90:
 	subcc		%i2, 1, %i2
-	EX_LD(LOAD(ldub, %i1, %g1))
-	EX_ST(STORE(stb, %g1, %i1 + %i3))
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
+	EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
 	bgu,pt		%XCC, 90b
 	 add		%i1, 1, %i1
 	ret
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
index ecc5692..bb6ff73 100644
--- a/arch/sparc/lib/U1copy_from_user.S
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_fp;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
index 9eea392..ed92ce73 100644
--- a/arch/sparc/lib/U1copy_to_user.S
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_fp;\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index 97e1b21..4f0d50b 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -5,6 +5,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/export.h>
@@ -24,21 +25,17 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 
 #ifndef LOAD
@@ -79,53 +76,169 @@
 	faligndata		%f7, %f8, %f60;			\
 	faligndata		%f8, %f9, %f62;
 
-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)	\
-	EX_LD_FP(LOAD_BLK(%src, %fdest));				\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest));				\
-	add			%src, 0x40, %src;		\
-	subcc			%len, 0x40, %len;		\
-	be,pn			%xcc, jmptgt;			\
-	 add			%dest, 0x40, %dest;		\
+#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)			\
+	EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);			\
+	add			%src, 0x40, %src;			\
+	subcc			%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;	\
+	be,pn			%xcc, jmptgt;				\
+	 add			%dest, 0x40, %dest;			\
 
-#define LOOP_CHUNK1(src, dest, len, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
-#define LOOP_CHUNK2(src, dest, len, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
-#define LOOP_CHUNK3(src, dest, len, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
+#define LOOP_CHUNK1(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
+#define LOOP_CHUNK2(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
+#define LOOP_CHUNK3(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
 
 #define DO_SYNC			membar	#Sync;
 #define STORE_SYNC(dest, fsrc)				\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest));			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);	\
 	add			%dest, 0x40, %dest;	\
 	DO_SYNC
 
 #define STORE_JUMP(dest, fsrc, target)			\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest));			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp);	\
 	add			%dest, 0x40, %dest;	\
 	ba,pt			%xcc, target;		\
 	 nop;
 
-#define FINISH_VISCHUNK(dest, f0, f1, left)	\
-	subcc			%left, 8, %left;\
-	bl,pn			%xcc, 95f;	\
-	 faligndata		%f0, %f1, %f48;	\
-	EX_ST_FP(STORE(std, %f48, %dest));		\
+#define FINISH_VISCHUNK(dest, f0, f1)			\
+	subcc			%g3, 8, %g3;		\
+	bl,pn			%xcc, 95f;		\
+	 faligndata		%f0, %f1, %f48;		\
+	EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);	\
 	add			%dest, 8, %dest;
 
-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)	\
-	subcc			%left, 8, %left;	\
-	bl,pn			%xcc, 95f;		\
+#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
+	subcc			%g3, 8, %g3;	\
+	bl,pn			%xcc, 95f;	\
 	 fsrc2			%f0, %f1;
 
-#define UNEVEN_VISCHUNK(dest, f0, f1, left)		\
-	UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)	\
+#define UNEVEN_VISCHUNK(dest, f0, f1)		\
+	UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
 	ba,a,pt			%xcc, 93f;
 
 	.register	%g2,#scratch
 	.register	%g3,#scratch
 
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+ENTRY(U1_g1_1_fp)
+	VISExitHalf
+	add		%g1, 1, %g1
+	add		%g1, %g2, %g1
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_1_fp)
+ENTRY(U1_g2_0_fp)
+	VISExitHalf
+	retl
+	 add		%g2, %o2, %o0
+ENDPROC(U1_g2_0_fp)
+ENTRY(U1_g2_8_fp)
+	VISExitHalf
+	add		%g2, 8, %g2
+	retl
+	 add		%g2, %o2, %o0
+ENDPROC(U1_g2_8_fp)
+ENTRY(U1_gs_0_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_0_fp)
+ENTRY(U1_gs_80_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_80_fp)
+ENTRY(U1_gs_40_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_40_fp)
+ENTRY(U1_g3_0_fp)
+	VISExitHalf
+	retl
+	 add		%g3, %o2, %o0
+ENDPROC(U1_g3_0_fp)
+ENTRY(U1_g3_8_fp)
+	VISExitHalf
+	add		%g3, 8, %g3
+	retl
+	 add		%g3, %o2, %o0
+ENDPROC(U1_g3_8_fp)
+ENTRY(U1_o2_0_fp)
+	VISExitHalf
+	retl
+	 mov		%o2, %o0
+ENDPROC(U1_o2_0_fp)
+ENTRY(U1_o2_1_fp)
+	VISExitHalf
+	retl
+	 add		%o2, 1, %o0
+ENDPROC(U1_o2_1_fp)
+ENTRY(U1_gs_0)
+	VISExitHalf
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0)
+ENTRY(U1_gs_8)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, 0x8, %o0
+ENDPROC(U1_gs_8)
+ENTRY(U1_gs_10)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, 0x10, %o0
+ENDPROC(U1_gs_10)
+ENTRY(U1_o2_0)
+	retl
+	 mov		%o2, %o0
+ENDPROC(U1_o2_0)
+ENTRY(U1_o2_8)
+	retl
+	 add		%o2, 8, %o0
+ENDPROC(U1_o2_8)
+ENTRY(U1_o2_4)
+	retl
+	 add		%o2, 4, %o0
+ENDPROC(U1_o2_4)
+ENTRY(U1_o2_1)
+	retl
+	 add		%o2, 1, %o0
+ENDPROC(U1_o2_1)
+ENTRY(U1_g1_0)
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_0)
+ENTRY(U1_g1_1)
+	add		%g1, 1, %g1
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_1)
+ENTRY(U1_gs_0_o2_adj)
+	and		%o2, 7, %o2
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0_o2_adj)
+ENTRY(U1_gs_8_o2_adj)
+	and		%o2, 7, %o2
+	add		%GLOBAL_SPARE, 8, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_8_o2_adj)
+#endif
+
 	.align		64
 
 	.globl		FUNC_NAME
@@ -167,8 +280,8 @@
 	 and		%g2, 0x38, %g2
 
 1:	subcc		%g1, 0x1, %g1
-	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
-	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
+	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x1, %o1
 
@@ -179,20 +292,20 @@
 	be,pt		%icc, 3f
 	 alignaddr	%o1, %g0, %o1
 
-	EX_LD_FP(LOAD(ldd, %o1, %f4))
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+	EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f4, %f6, %f0
-	EX_ST_FP(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
 	be,pn		%icc, 3f
 	 add		%o0, 0x8, %o0
 
-	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f6, %f4, %f0
-	EX_ST_FP(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
 	bne,pt		%icc, 1b
 	 add		%o0, 0x8, %o0
 
@@ -215,13 +328,13 @@
 	add		%g1, %GLOBAL_SPARE, %g1
 	subcc		%o2, %g3, %o2
 
-	EX_LD_FP(LOAD_BLK(%o1, %f0))
+	EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
 	add		%o1, 0x40, %o1
 	add		%g1, %g3, %g1
-	EX_LD_FP(LOAD_BLK(%o1, %f16))
+	EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
 	add		%o1, 0x40, %o1
 	sub		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
-	EX_LD_FP(LOAD_BLK(%o1, %f32))
+	EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
 	add		%o1, 0x40, %o1
 
 	/* There are 8 instances of the unrolled loop,
@@ -241,11 +354,11 @@
 
 	.align		64
 1:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f0, %f2, %f48
 1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
@@ -262,11 +375,11 @@
 	STORE_JUMP(o0, f48, 56f)
 
 1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f2, %f4, %f48
 1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
@@ -283,11 +396,11 @@
 	STORE_JUMP(o0, f48, 57f)
 
 1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f4, %f6, %f48
 1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
@@ -304,11 +417,11 @@
 	STORE_JUMP(o0, f48, 58f)
 
 1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f6, %f8, %f48
 1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
@@ -325,11 +438,11 @@
 	STORE_JUMP(o0, f48, 59f)
 
 1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f8, %f10, %f48
 1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
@@ -346,11 +459,11 @@
 	STORE_JUMP(o0, f48, 60f)
 
 1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f10, %f12, %f48
 1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
@@ -367,11 +480,11 @@
 	STORE_JUMP(o0, f48, 61f)
 
 1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f12, %f14, %f48
 1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
@@ -388,11 +501,11 @@
 	STORE_JUMP(o0, f48, 62f)
 
 1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+	LOOP_CHUNK1(o1, o0, 1f)
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+	LOOP_CHUNK2(o1, o0, 2f)
 	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+	LOOP_CHUNK3(o1, o0, 3f)
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f14, %f16, %f48
 1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
@@ -408,53 +521,53 @@
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
 	STORE_JUMP(o0, f48, 63f)
 
-40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
-41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
-42:	FINISH_VISCHUNK(o0, f4,  f6,  g3)
-43:	FINISH_VISCHUNK(o0, f6,  f8,  g3)
-44:	FINISH_VISCHUNK(o0, f8,  f10, g3)
-45:	FINISH_VISCHUNK(o0, f10, f12, g3)
-46:	FINISH_VISCHUNK(o0, f12, f14, g3)
-47:	UNEVEN_VISCHUNK(o0, f14, f0,  g3)
-48:	FINISH_VISCHUNK(o0, f16, f18, g3)
-49:	FINISH_VISCHUNK(o0, f18, f20, g3)
-50:	FINISH_VISCHUNK(o0, f20, f22, g3)
-51:	FINISH_VISCHUNK(o0, f22, f24, g3)
-52:	FINISH_VISCHUNK(o0, f24, f26, g3)
-53:	FINISH_VISCHUNK(o0, f26, f28, g3)
-54:	FINISH_VISCHUNK(o0, f28, f30, g3)
-55:	UNEVEN_VISCHUNK(o0, f30, f0,  g3)
-56:	FINISH_VISCHUNK(o0, f32, f34, g3)
-57:	FINISH_VISCHUNK(o0, f34, f36, g3)
-58:	FINISH_VISCHUNK(o0, f36, f38, g3)
-59:	FINISH_VISCHUNK(o0, f38, f40, g3)
-60:	FINISH_VISCHUNK(o0, f40, f42, g3)
-61:	FINISH_VISCHUNK(o0, f42, f44, g3)
-62:	FINISH_VISCHUNK(o0, f44, f46, g3)
-63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
+40:	FINISH_VISCHUNK(o0, f0,  f2)
+41:	FINISH_VISCHUNK(o0, f2,  f4)
+42:	FINISH_VISCHUNK(o0, f4,  f6)
+43:	FINISH_VISCHUNK(o0, f6,  f8)
+44:	FINISH_VISCHUNK(o0, f8,  f10)
+45:	FINISH_VISCHUNK(o0, f10, f12)
+46:	FINISH_VISCHUNK(o0, f12, f14)
+47:	UNEVEN_VISCHUNK(o0, f14, f0)
+48:	FINISH_VISCHUNK(o0, f16, f18)
+49:	FINISH_VISCHUNK(o0, f18, f20)
+50:	FINISH_VISCHUNK(o0, f20, f22)
+51:	FINISH_VISCHUNK(o0, f22, f24)
+52:	FINISH_VISCHUNK(o0, f24, f26)
+53:	FINISH_VISCHUNK(o0, f26, f28)
+54:	FINISH_VISCHUNK(o0, f28, f30)
+55:	UNEVEN_VISCHUNK(o0, f30, f0)
+56:	FINISH_VISCHUNK(o0, f32, f34)
+57:	FINISH_VISCHUNK(o0, f34, f36)
+58:	FINISH_VISCHUNK(o0, f36, f38)
+59:	FINISH_VISCHUNK(o0, f38, f40)
+60:	FINISH_VISCHUNK(o0, f40, f42)
+61:	FINISH_VISCHUNK(o0, f42, f44)
+62:	FINISH_VISCHUNK(o0, f44, f46)
+63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0)
 
-93:	EX_LD_FP(LOAD(ldd, %o1, %f2))
+93:	EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
 	add		%o1, 8, %o1
 	subcc		%g3, 8, %g3
 	faligndata	%f0, %f2, %f8
-	EX_ST_FP(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
 	bl,pn		%xcc, 95f
 	 add		%o0, 8, %o0
-	EX_LD_FP(LOAD(ldd, %o1, %f0))
+	EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
 	add		%o1, 8, %o1
 	subcc		%g3, 8, %g3
 	faligndata	%f2, %f0, %f8
-	EX_ST_FP(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
 	bge,pt		%xcc, 93b
 	 add		%o0, 8, %o0
 
 95:	brz,pt		%o2, 2f
 	 mov		%g1, %o1
 
-1:	EX_LD_FP(LOAD(ldub, %o1, %o3))
+1:	EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
 	add		%o1, 1, %o1
 	subcc		%o2, 1, %o2
-	EX_ST_FP(STORE(stb, %o3, %o0))
+	EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
 	bne,pt		%xcc, 1b
 	 add		%o0, 1, %o0
 
@@ -470,27 +583,27 @@
 
 72:	andn		%o2, 0xf, %GLOBAL_SPARE
 	and		%o2, 0xf, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
 	subcc		%GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
-	EX_ST(STORE(stx, %o5, %o1 + %o3))
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
 	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + %o3))
+	EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x8, %o1
 73:	andcc		%o2, 0x8, %g0
 	be,pt		%XCC, 1f
 	 nop
-	EX_LD(LOAD(ldx, %o1, %o5))
+	EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
 	sub		%o2, 0x8, %o2
-	EX_ST(STORE(stx, %o5, %o1 + %o3))
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
 	add		%o1, 0x8, %o1
 1:	andcc		%o2, 0x4, %g0
 	be,pt		%XCC, 1f
 	 nop
-	EX_LD(LOAD(lduw, %o1, %o5))
+	EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
 	sub		%o2, 0x4, %o2
-	EX_ST(STORE(stw, %o5, %o1 + %o3))
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
 	add		%o1, 0x4, %o1
 1:	cmp		%o2, 0
 	be,pt		%XCC, 85f
@@ -504,9 +617,9 @@
 	 sub		%g0, %g1, %g1
 	sub		%o2, %g1, %o2
 
-1:	EX_LD(LOAD(ldub, %o1, %o5))
+1:	EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
 	subcc		%g1, 1, %g1
-	EX_ST(STORE(stb, %o5, %o1 + %o3))
+	EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
 	bgu,pt		%icc, 1b
 	 add		%o1, 1, %o1
 
@@ -522,16 +635,16 @@
 
 8:	mov		64, %o3
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2))
+	EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
 	sub		%o3, %g1, %o3
 	andn		%o2, 0x7, %GLOBAL_SPARE
 	sllx		%g2, %g1, %g2
-1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
 	subcc		%GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
 	add		%o1, 0x8, %o1
 	srlx		%g3, %o3, %o5
 	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0))
+	EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
@@ -549,9 +662,9 @@
 	bne,pn		%XCC, 90f
 	 sub		%o0, %o1, %o3
 
-1:	EX_LD(LOAD(lduw, %o1, %g1))
+1:	EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
 	subcc		%o2, 4, %o2
-	EX_ST(STORE(stw, %g1, %o1 + %o3))
+	EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 
@@ -559,9 +672,9 @@
 	 mov		EX_RETVAL(%o4), %o0
 
 	.align		32
-90:	EX_LD(LOAD(ldub, %o1, %g1))
+90:	EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
 	subcc		%o2, 1, %o2
-	EX_ST(STORE(stb, %g1, %o1 + %o3))
+	EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	retl
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
index 88ad73d..db73010 100644
--- a/arch/sparc/lib/U3copy_from_user.S
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_LD(x)		\
+#define EX_LD(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_LD_FP(x)		\
+#define EX_LD_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
index 845139d..c4ee858 100644
--- a/arch/sparc/lib/U3copy_to_user.S
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_ST(x)		\
+#define EX_ST(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, y;		\
 	.text;			\
 	.align 4;
 
-#define EX_ST_FP(x)		\
+#define EX_ST_FP(x,y)		\
 98:	x;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one_fp;\
+	.word 98b, y##_fp;	\
 	.text;			\
 	.align 4;
 
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
index 491ee69..54f9870 100644
--- a/arch/sparc/lib/U3memcpy.S
+++ b/arch/sparc/lib/U3memcpy.S
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE	%g7
@@ -22,21 +23,17 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)	x
+#define EX_LD(x,y)	x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)	x
+#define EX_LD_FP(x,y)	x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)	x
+#define EX_ST(x,y)	x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
+#define EX_ST_FP(x,y)	x
 #endif
 
 #ifndef LOAD
@@ -77,6 +74,87 @@
 	 */
 
 	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_fp:
+	VISExitHalf
+	retl
+	 nop
+ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+	add	%g1, 1, %g1
+	add	%g2, %g1, %g2
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+ENTRY(U3_retl_o2_plus_g2_fp)
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_fp)
+ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
+	add	%g2, 8, %g2
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
+ENTRY(U3_retl_o2)
+	retl
+	 mov	%o2, %o0
+ENDPROC(U3_retl_o2)
+ENTRY(U3_retl_o2_plus_1)
+	retl
+	 add	%o2, 1, %o0
+ENDPROC(U3_retl_o2_plus_1)
+ENTRY(U3_retl_o2_plus_4)
+	retl
+	 add	%o2, 4, %o0
+ENDPROC(U3_retl_o2_plus_4)
+ENTRY(U3_retl_o2_plus_8)
+	retl
+	 add	%o2, 8, %o0
+ENDPROC(U3_retl_o2_plus_8)
+ENTRY(U3_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	retl
+	 add	%o2, %g1, %o0
+ENDPROC(U3_retl_o2_plus_g1_plus_1)
+ENTRY(U3_retl_o2_fp)
+	ba,pt	%xcc, __restore_fp
+	 mov	%o2, %o0
+ENDPROC(U3_retl_o2_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+	sll	%o3, 6, %o3
+	add	%o3, 0x80, %o3
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+	sll	%o3, 6, %o3
+	add	%o3, 0x40, %o3
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+ENTRY(U3_retl_o2_plus_GS_plus_0x10)
+	add	GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
+ENTRY(U3_retl_o2_plus_GS_plus_0x08)
+	add	GLOBAL_SPARE, 0x08, GLOBAL_SPARE
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ENTRY(U3_retl_o2_and_7_plus_GS)
+	and	%o2, 7, %o2
+	retl
+	 add	%o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS)
+ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+	add	GLOBAL_SPARE, 8, GLOBAL_SPARE
+	and	%o2, 7, %o2
+	retl
+	 add	%o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+#endif
+
 	.align		64
 
 	/* The cheetah's flexible spine, oversized liver, enlarged heart,
@@ -126,8 +204,8 @@
 	 and		%g2, 0x38, %g2
 
 1:	subcc		%g1, 0x1, %g1
-	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
-	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
+	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x1, %o1
 
@@ -138,20 +216,20 @@
 	be,pt		%icc, 3f
 	 alignaddr	%o1, %g0, %o1
 
-	EX_LD_FP(LOAD(ldd, %o1, %f4))
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+	EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f4, %f6, %f0
-	EX_ST_FP(STORE(std, %f0, %o0))
+	EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
 	be,pn		%icc, 3f
 	 add		%o0, 0x8, %o0
 
-	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f6, %f4, %f2
-	EX_ST_FP(STORE(std, %f2, %o0))
+	EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
 	bne,pt		%icc, 1b
 	 add		%o0, 0x8, %o0
 
@@ -161,25 +239,25 @@
 	LOAD(prefetch, %o1 + 0x080, #one_read)
 	LOAD(prefetch, %o1 + 0x0c0, #one_read)
 	LOAD(prefetch, %o1 + 0x100, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
 	LOAD(prefetch, %o1 + 0x140, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
 	LOAD(prefetch, %o1 + 0x180, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
 	LOAD(prefetch, %o1 + 0x1c0, #one_read)
 	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
 	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
 	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
 	faligndata	%f6, %f8, %f22
 
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
 	faligndata	%f8, %f10, %f24
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
 	faligndata	%f10, %f12, %f26
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
 
 	subcc		GLOBAL_SPARE, 0x80, GLOBAL_SPARE
 	add		%o1, 0x40, %o1
@@ -190,26 +268,26 @@
 
 	.align		64
 1:
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 	faligndata	%f12, %f14, %f28
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0))
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f0, %f2, %f16
 	add		%o0, 0x40, %o0
 
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	subcc		%o3, 0x01, %o3
 	faligndata	%f6, %f8, %f22
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 
 	faligndata	%f8, %f10, %f24
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 	LOAD(prefetch, %o1 + 0x1c0, #one_read)
 	faligndata	%f10, %f12, %f26
 	bg,pt		%XCC, 1b
@@ -217,29 +295,29 @@
 
 	/* Finally we copy the last full 64-byte block. */
 2:
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 	faligndata	%f12, %f14, %f28
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0))
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f6, %f8, %f22
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	faligndata	%f8, %f10, %f24
 	cmp		%g1, 0
 	be,pt		%XCC, 1f
 	 add		%o0, 0x40, %o0
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 1:	faligndata	%f10, %f12, %f26
 	faligndata	%f12, %f14, %f28
 	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0))
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 	add		%o0, 0x40, %o0
 	add		%o1, 0x40, %o1
 	membar		#Sync
@@ -259,20 +337,20 @@
 
 	sub		%o2, %g2, %o2
 	be,a,pt		%XCC, 1f
-	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
+	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
 
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f0, %f2, %f8
-	EX_ST_FP(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
 	be,pn		%XCC, 2f
 	 add		%o0, 0x8, %o0
-	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
+	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
 	add		%o1, 0x8, %o1
 	subcc		%g2, 0x8, %g2
 	faligndata	%f2, %f0, %f8
-	EX_ST_FP(STORE(std, %f8, %o0))
+	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
 	bne,pn		%XCC, 1b
 	 add		%o0, 0x8, %o0
 
@@ -292,30 +370,33 @@
 	 andcc		%o2, 0x8, %g0
 	be,pt		%icc, 1f
 	 nop
-	EX_LD(LOAD(ldx, %o1, %o5))
-	EX_ST(STORE(stx, %o5, %o1 + %o3))
+	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
 	add		%o1, 0x8, %o1
+	sub		%o2, 8, %o2
 
 1:	andcc		%o2, 0x4, %g0
 	be,pt		%icc, 1f
 	 nop
-	EX_LD(LOAD(lduw, %o1, %o5))
-	EX_ST(STORE(stw, %o5, %o1 + %o3))
+	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
 	add		%o1, 0x4, %o1
+	sub		%o2, 4, %o2
 
 1:	andcc		%o2, 0x2, %g0
 	be,pt		%icc, 1f
 	 nop
-	EX_LD(LOAD(lduh, %o1, %o5))
-	EX_ST(STORE(sth, %o5, %o1 + %o3))
+	EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
 	add		%o1, 0x2, %o1
+	sub		%o2, 2, %o2
 
 1:	andcc		%o2, 0x1, %g0
 	be,pt		%icc, 85f
 	 nop
-	EX_LD(LOAD(ldub, %o1, %o5))
+	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
 	ba,pt		%xcc, 85f
-	 EX_ST(STORE(stb, %o5, %o1 + %o3))
+	 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
 
 	.align		64
 70: /* 16 < len <= 64 */
@@ -326,26 +407,26 @@
 	andn		%o2, 0xf, GLOBAL_SPARE
 	and		%o2, 0xf, %o2
 1:	subcc		GLOBAL_SPARE, 0x10, GLOBAL_SPARE
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
-	EX_ST(STORE(stx, %o5, %o1 + %o3))
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
 	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + %o3))
+	EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 0x8, %o1
 73:	andcc		%o2, 0x8, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%o2, 0x8, %o2
-	EX_LD(LOAD(ldx, %o1, %o5))
-	EX_ST(STORE(stx, %o5, %o1 + %o3))
+	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
 	add		%o1, 0x8, %o1
 1:	andcc		%o2, 0x4, %g0
 	be,pt		%XCC, 1f
 	 nop
 	sub		%o2, 0x4, %o2
-	EX_LD(LOAD(lduw, %o1, %o5))
-	EX_ST(STORE(stw, %o5, %o1 + %o3))
+	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
 	add		%o1, 0x4, %o1
 1:	cmp		%o2, 0
 	be,pt		%XCC, 85f
@@ -361,8 +442,8 @@
 	sub		%o2, %g1, %o2
 
 1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %o1, %o5))
-	EX_ST(STORE(stb, %o5, %o1 + %o3))
+	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
 	bgu,pt		%icc, 1b
 	 add		%o1, 1, %o1
 
@@ -378,16 +459,16 @@
 
 8:	mov		64, %o3
 	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2))
+	EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
 	sub		%o3, %g1, %o3
 	andn		%o2, 0x7, GLOBAL_SPARE
 	sllx		%g2, %g1, %g2
-1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
 	subcc		GLOBAL_SPARE, 0x8, GLOBAL_SPARE
 	add		%o1, 0x8, %o1
 	srlx		%g3, %o3, %o5
 	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0))
+	EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
 	add		%o0, 0x8, %o0
 	bgu,pt		%icc, 1b
 	 sllx		%g3, %g1, %g2
@@ -407,8 +488,8 @@
 
 1:
 	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1))
-	EX_ST(STORE(stw, %g1, %o1 + %o3))
+	EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
+	EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
 	bgu,pt		%XCC, 1b
 	 add		%o1, 4, %o1
 
@@ -418,8 +499,8 @@
 	.align		32
 90:
 	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1))
-	EX_ST(STORE(stb, %g1, %o1 + %o3))
+	EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
+	EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
 	bgu,pt		%XCC, 90b
 	 add		%o1, 1, %o1
 	retl
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
index 482de09..0252b21 100644
--- a/arch/sparc/lib/copy_in_user.S
+++ b/arch/sparc/lib/copy_in_user.S
@@ -9,18 +9,33 @@
 
 #define XCC xcc
 
-#define EX(x,y)			\
+#define EX(x,y,z)		\
 98:	x,y;			\
 	.section __ex_table,"a";\
 	.align 4;		\
-	.word 98b, __retl_one;	\
+	.word 98b, z;		\
 	.text;			\
 	.align 4;
 
+#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
+#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
+#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
+
 	.register	%g2,#scratch
 	.register	%g3,#scratch
 
 	.text
+__retl_o4_plus_8:
+	add	%o4, %o2, %o4
+	retl
+	 add	%o4, 8, %o0
+__retl_o2_plus_4:
+	retl
+	 add	%o2, 4, %o0
+__retl_o2_plus_1:
+	retl
+	 add	%o2, 1, %o0
+
 	.align	32
 
 	/* Don't try to get too fancy here, just nice and
@@ -45,8 +60,8 @@
 	andn		%o2, 0x7, %o4
 	and		%o2, 0x7, %o2
 1:	subcc		%o4, 0x8, %o4
-	EX(ldxa [%o1] %asi, %o5)
-	EX(stxa %o5, [%o0] %asi)
+	EX_O4(ldxa [%o1] %asi, %o5)
+	EX_O4(stxa %o5, [%o0] %asi)
 	add		%o1, 0x8, %o1
 	bgu,pt		%XCC, 1b
 	 add		%o0, 0x8, %o0
@@ -54,8 +69,8 @@
 	be,pt		%XCC, 1f
 	 nop
 	sub		%o2, 0x4, %o2
-	EX(lduwa [%o1] %asi, %o5)
-	EX(stwa %o5, [%o0] %asi)
+	EX_O2_4(lduwa [%o1] %asi, %o5)
+	EX_O2_4(stwa %o5, [%o0] %asi)
 	add		%o1, 0x4, %o1
 	add		%o0, 0x4, %o0
 1:	cmp		%o2, 0
@@ -71,8 +86,8 @@
 
 82:
 	subcc		%o2, 4, %o2
-	EX(lduwa [%o1] %asi, %g1)
-	EX(stwa %g1, [%o0] %asi)
+	EX_O2_4(lduwa [%o1] %asi, %g1)
+	EX_O2_4(stwa %g1, [%o0] %asi)
 	add		%o1, 4, %o1
 	bgu,pt		%XCC, 82b
 	 add		%o0, 4, %o0
@@ -83,8 +98,8 @@
 	.align	32
 90:
 	subcc		%o2, 1, %o2
-	EX(lduba [%o1] %asi, %g1)
-	EX(stba %g1, [%o0] %asi)
+	EX_O2_1(lduba [%o1] %asi, %g1)
+	EX_O2_1(stba %g1, [%o0] %asi)
 	add		%o1, 1, %o1
 	bgu,pt		%XCC, 90b
 	 add		%o0, 1, %o0
diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
deleted file mode 100644
index ac96ae2..0000000
--- a/arch/sparc/lib/user_fixup.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* user_fixup.c: Fix up user copy faults.
- *
- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/uaccess.h>
-
-/* Calculating the exact fault address when using
- * block loads and stores can be very complicated.
- *
- * Instead of trying to be clever and handling all
- * of the cases, just fix things up simply here.
- */
-
-static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
-{
-	unsigned long fault_addr = current_thread_info()->fault_address;
-	unsigned long end = start + size;
-
-	if (fault_addr < start || fault_addr >= end) {
-		*offset = 0;
-	} else {
-		*offset = fault_addr - start;
-		size = end - fault_addr;
-	}
-	return size;
-}
-
-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
-{
-	unsigned long offset;
-
-	size = compute_size((unsigned long) from, size, &offset);
-	if (likely(size))
-		memset(to + offset, 0, size);
-
-	return size;
-}
-EXPORT_SYMBOL(copy_from_user_fixup);
-
-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
-{
-	unsigned long offset;
-
-	return compute_size((unsigned long) to, size, &offset);
-}
-EXPORT_SYMBOL(copy_to_user_fixup);
-
-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
-{
-	unsigned long fault_addr = current_thread_info()->fault_address;
-	unsigned long start = (unsigned long) to;
-	unsigned long end = start + size;
-
-	if (fault_addr >= start && fault_addr < end)
-		return end - fault_addr;
-
-	start = (unsigned long) from;
-	end = start + size;
-	if (fault_addr >= start && fault_addr < end)
-		return end - fault_addr;
-
-	return size;
-}
-EXPORT_SYMBOL(copy_in_user_fixup);
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 4e06750..cd0e32b 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -238,7 +238,8 @@
 		pages += nr;
 
 		ret = get_user_pages_unlocked(start,
-			(end - start) >> PAGE_SHIFT, write, 0, pages);
+			(end - start) >> PAGE_SHIFT, pages,
+			write ? FOLL_WRITE : 0);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index f2b7711..e20fbba 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -27,6 +27,20 @@
 	return (tag == (vaddr >> 22));
 }
 
+static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
+		struct tsb *ent = &swapper_tsb[idx];
+		unsigned long match = idx << 13;
+
+		match |= (ent->tag << 22);
+		if (match >= start && match < end)
+			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+}
+
 /* TSB flushes need only occur on the processor initiating the address
  * space modification, not on each cpu the address space has run on.
  * Only the TLB flush needs that treatment.
@@ -36,6 +50,9 @@
 {
 	unsigned long v;
 
+	if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
+		return flush_tsb_kernel_range_scan(start, end);
+
 	for (v = start; v < end; v += PAGE_SIZE) {
 		unsigned long hash = tsb_hash(v, PAGE_SHIFT,
 					      KERNEL_TSB_NENTRIES);
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index b4f4733..5d2fd6c 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -30,7 +30,7 @@
 	.text
 	.align		32
 	.globl		__flush_tlb_mm
-__flush_tlb_mm:		/* 18 insns */
+__flush_tlb_mm:		/* 19 insns */
 	/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
 	ldxa		[%o1] ASI_DMMU, %g2
 	cmp		%g2, %o0
@@ -81,7 +81,7 @@
 
 	.align		32
 	.globl		__flush_tlb_pending
-__flush_tlb_pending:	/* 26 insns */
+__flush_tlb_pending:	/* 27 insns */
 	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
 	rdpr		%pstate, %g7
 	sllx		%o1, 3, %o1
@@ -113,12 +113,14 @@
 
 	.align		32
 	.globl		__flush_tlb_kernel_range
-__flush_tlb_kernel_range:	/* 16 insns */
+__flush_tlb_kernel_range:	/* 31 insns */
 	/* %o0=start, %o1=end */
 	cmp		%o0, %o1
 	be,pn		%xcc, 2f
+	 sub		%o1, %o0, %o3
+	srlx		%o3, 18, %o4
+	brnz,pn		%o4, __spitfire_flush_tlb_kernel_range_slow
 	 sethi		%hi(PAGE_SIZE), %o4
-	sub		%o1, %o0, %o3
 	sub		%o3, %o4, %o3
 	or		%o0, 0x20, %o0		! Nucleus
 1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
@@ -131,6 +133,41 @@
 	retl
 	 nop
 	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+__spitfire_flush_tlb_kernel_range_slow:
+	mov		63 * 8, %o4
+1:	ldxa		[%o4] ASI_ITLB_DATA_ACCESS, %o3
+	andcc		%o3, 0x40, %g0			/* _PAGE_L_4U */
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %o3
+	stxa		%g0, [%o3] ASI_IMMU
+	stxa		%g0, [%o4] ASI_ITLB_DATA_ACCESS
+	membar		#Sync
+2:	ldxa		[%o4] ASI_DTLB_DATA_ACCESS, %o3
+	andcc		%o3, 0x40, %g0
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %o3
+	stxa		%g0, [%o3] ASI_DMMU
+	stxa		%g0, [%o4] ASI_DTLB_DATA_ACCESS
+	membar		#Sync
+2:	sub		%o4, 8, %o4
+	brgez,pt	%o4, 1b
+	 nop
+	retl
+	 nop
 
 __spitfire_flush_tlb_mm_slow:
 	rdpr		%pstate, %g1
@@ -285,6 +322,40 @@
 	retl
 	 wrpr		%g7, 0x0, %pstate
 
+__cheetah_flush_tlb_kernel_range:	/* 31 insns */
+	/* %o0=start, %o1=end */
+	cmp		%o0, %o1
+	be,pn		%xcc, 2f
+	 sub		%o1, %o0, %o3
+	srlx		%o3, 18, %o4
+	brnz,pn		%o4, 3f
+	 sethi		%hi(PAGE_SIZE), %o4
+	sub		%o3, %o4, %o3
+	or		%o0, 0x20, %o0		! Nucleus
+1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
+	stxa		%g0, [%o0 + %o3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%o3, 1b
+	 sub		%o3, %o4, %o3
+2:	sethi		%hi(KERNBASE), %o3
+	flush		%o3
+	retl
+	 nop
+3:	mov		0x80, %o4
+	stxa		%g0, [%o4] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g0, [%o4] ASI_IMMU_DEMAP
+	membar		#Sync
+	retl
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
 #ifdef DCACHE_ALIASING_POSSIBLE
 __cheetah_flush_dcache_page: /* 11 insns */
 	sethi		%hi(PAGE_OFFSET), %g1
@@ -309,19 +380,28 @@
 	ret
 	 restore
 
-__hypervisor_flush_tlb_mm: /* 10 insns */
+__hypervisor_flush_tlb_mm: /* 19 insns */
 	mov		%o0, %o2	/* ARG2: mmu context */
 	mov		0, %o0		/* ARG0: CPU lists unimplemented */
 	mov		0, %o1		/* ARG1: CPU lists unimplemented */
 	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
 	mov		HV_FAST_MMU_DEMAP_CTX, %o5
 	ta		HV_FAST_TRAP
-	brnz,pn		%o0, __hypervisor_tlb_tl0_error
+	brnz,pn		%o0, 1f
 	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
 	retl
 	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o5
+	jmpl		%o5 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 
-__hypervisor_flush_tlb_page: /* 11 insns */
+__hypervisor_flush_tlb_page: /* 22 insns */
 	/* %o0 = context, %o1 = vaddr */
 	mov		%o0, %g2
 	mov		%o1, %o0              /* ARG0: vaddr + IMMU-bit */
@@ -330,12 +410,23 @@
 	srlx		%o0, PAGE_SHIFT, %o0
 	sllx		%o0, PAGE_SHIFT, %o0
 	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, __hypervisor_tlb_tl0_error
+	brnz,pn		%o0, 1f
 	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
 	retl
 	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 
-__hypervisor_flush_tlb_pending: /* 16 insns */
+__hypervisor_flush_tlb_pending: /* 27 insns */
 	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
 	sllx		%o1, 3, %g1
 	mov		%o2, %g2
@@ -347,31 +438,57 @@
 	srlx		%o0, PAGE_SHIFT, %o0
 	sllx		%o0, PAGE_SHIFT, %o0
 	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, __hypervisor_tlb_tl0_error
+	brnz,pn		%o0, 1f
 	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
 	brnz,pt		%g1, 1b
 	 nop
 	retl
 	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 
-__hypervisor_flush_tlb_kernel_range: /* 16 insns */
+__hypervisor_flush_tlb_kernel_range: /* 31 insns */
 	/* %o0=start, %o1=end */
 	cmp		%o0, %o1
 	be,pn		%xcc, 2f
-	 sethi		%hi(PAGE_SIZE), %g3
-	mov		%o0, %g1
-	sub		%o1, %g1, %g2
+	 sub		%o1, %o0, %g2
+	srlx		%g2, 18, %g3
+	brnz,pn		%g3, 4f
+	 mov		%o0, %g1
+	sethi		%hi(PAGE_SIZE), %g3
 	sub		%g2, %g3, %g2
 1:	add		%g1, %g2, %o0	/* ARG0: virtual address */
 	mov		0, %o1		/* ARG1: mmu context */
 	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
 	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, __hypervisor_tlb_tl0_error
+	brnz,pn		%o0, 3f
 	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
 	brnz,pt		%g2, 1b
 	 sub		%g2, %g3, %g2
 2:	retl
 	 nop
+3:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+4:	mov		0, %o0		/* ARG0: CPU lists unimplemented */
+	mov		0, %o1		/* ARG1: CPU lists unimplemented */
+	mov		0, %o2		/* ARG2: mmu context == nucleus */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	brnz,pn		%o0, 3b
+	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
+	retl
+	 nop
 
 #ifdef DCACHE_ALIASING_POSSIBLE
 	/* XXX Niagara and friends have an 8K cache, so no aliasing is
@@ -394,43 +511,6 @@
 	retl
 	 nop
 
-	.globl		cheetah_patch_cachetlbops
-cheetah_patch_cachetlbops:
-	save		%sp, -128, %sp
-
-	sethi		%hi(__flush_tlb_mm), %o0
-	or		%o0, %lo(__flush_tlb_mm), %o0
-	sethi		%hi(__cheetah_flush_tlb_mm), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_mm), %o1
-	call		tlb_patch_one
-	 mov		19, %o2
-
-	sethi		%hi(__flush_tlb_page), %o0
-	or		%o0, %lo(__flush_tlb_page), %o0
-	sethi		%hi(__cheetah_flush_tlb_page), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_page), %o1
-	call		tlb_patch_one
-	 mov		22, %o2
-
-	sethi		%hi(__flush_tlb_pending), %o0
-	or		%o0, %lo(__flush_tlb_pending), %o0
-	sethi		%hi(__cheetah_flush_tlb_pending), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_pending), %o1
-	call		tlb_patch_one
-	 mov		27, %o2
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-	sethi		%hi(__flush_dcache_page), %o0
-	or		%o0, %lo(__flush_dcache_page), %o0
-	sethi		%hi(__cheetah_flush_dcache_page), %o1
-	or		%o1, %lo(__cheetah_flush_dcache_page), %o1
-	call		tlb_patch_one
-	 mov		11, %o2
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-	ret
-	 restore
-
 #ifdef CONFIG_SMP
 	/* These are all called by the slaves of a cross call, at
 	 * trap level 1, with interrupts fully disabled.
@@ -447,7 +527,7 @@
 	 */
 	.align		32
 	.globl		xcall_flush_tlb_mm
-xcall_flush_tlb_mm:	/* 21 insns */
+xcall_flush_tlb_mm:	/* 24 insns */
 	mov		PRIMARY_CONTEXT, %g2
 	ldxa		[%g2] ASI_DMMU, %g3
 	srlx		%g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -469,9 +549,12 @@
 	nop
 	nop
 	nop
+	nop
+	nop
+	nop
 
 	.globl		xcall_flush_tlb_page
-xcall_flush_tlb_page:	/* 17 insns */
+xcall_flush_tlb_page:	/* 20 insns */
 	/* %g5=context, %g1=vaddr */
 	mov		PRIMARY_CONTEXT, %g4
 	ldxa		[%g4] ASI_DMMU, %g2
@@ -490,15 +573,20 @@
 	retry
 	nop
 	nop
+	nop
+	nop
+	nop
 
 	.globl		xcall_flush_tlb_kernel_range
-xcall_flush_tlb_kernel_range:	/* 25 insns */
+xcall_flush_tlb_kernel_range:	/* 44 insns */
 	sethi		%hi(PAGE_SIZE - 1), %g2
 	or		%g2, %lo(PAGE_SIZE - 1), %g2
 	andn		%g1, %g2, %g1
 	andn		%g7, %g2, %g7
 	sub		%g7, %g1, %g3
-	add		%g2, 1, %g2
+	srlx		%g3, 18, %g2
+	brnz,pn		%g2, 2f
+	 add		%g2, 1, %g2
 	sub		%g3, %g2, %g3
 	or		%g1, 0x20, %g1		! Nucleus
 1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
@@ -507,8 +595,25 @@
 	brnz,pt		%g3, 1b
 	 sub		%g3, %g2, %g3
 	retry
-	nop
-	nop
+2:	mov		63 * 8, %g1
+1:	ldxa		[%g1] ASI_ITLB_DATA_ACCESS, %g2
+	andcc		%g2, 0x40, %g0			/* _PAGE_L_4U */
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %g2
+	stxa		%g0, [%g2] ASI_IMMU
+	stxa		%g0, [%g1] ASI_ITLB_DATA_ACCESS
+	membar		#Sync
+2:	ldxa		[%g1] ASI_DTLB_DATA_ACCESS, %g2
+	andcc		%g2, 0x40, %g0
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %g2
+	stxa		%g0, [%g2] ASI_DMMU
+	stxa		%g0, [%g1] ASI_DTLB_DATA_ACCESS
+	membar		#Sync
+2:	sub		%g1, 8, %g1
+	brgez,pt	%g1, 1b
+	 nop
+	retry
 	nop
 	nop
 	nop
@@ -637,6 +742,52 @@
 
 	retry
 
+__cheetah_xcall_flush_tlb_kernel_range:	/* 44 insns */
+	sethi		%hi(PAGE_SIZE - 1), %g2
+	or		%g2, %lo(PAGE_SIZE - 1), %g2
+	andn		%g1, %g2, %g1
+	andn		%g7, %g2, %g7
+	sub		%g7, %g1, %g3
+	srlx		%g3, 18, %g2
+	brnz,pn		%g2, 2f
+	 add		%g2, 1, %g2
+	sub		%g3, %g2, %g3
+	or		%g1, 0x20, %g1		! Nucleus
+1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%g3, 1b
+	 sub		%g3, %g2, %g3
+	retry
+2:	mov		0x80, %g2
+	stxa		%g0, [%g2] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g0, [%g2] ASI_IMMU_DEMAP
+	membar		#Sync
+	retry
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
 #ifdef DCACHE_ALIASING_POSSIBLE
 	.align		32
 	.globl		xcall_flush_dcache_page_cheetah
@@ -700,7 +851,7 @@
 	ba,a,pt	%xcc, rtrap
 
 	.globl		__hypervisor_xcall_flush_tlb_mm
-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
 	/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
 	mov		%o0, %g2
 	mov		%o1, %g3
@@ -714,7 +865,7 @@
 	mov		HV_FAST_MMU_DEMAP_CTX, %o5
 	ta		HV_FAST_TRAP
 	mov		HV_FAST_MMU_DEMAP_CTX, %g6
-	brnz,pn		%o0, __hypervisor_tlb_xcall_error
+	brnz,pn		%o0, 1f
 	 mov		%o0, %g5
 	mov		%g2, %o0
 	mov		%g3, %o1
@@ -723,9 +874,12 @@
 	mov		%g7, %o5
 	membar		#Sync
 	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
 
 	.globl		__hypervisor_xcall_flush_tlb_page
-__hypervisor_xcall_flush_tlb_page: /* 17 insns */
+__hypervisor_xcall_flush_tlb_page: /* 20 insns */
 	/* %g5=ctx, %g1=vaddr */
 	mov		%o0, %g2
 	mov		%o1, %g3
@@ -737,42 +891,64 @@
 	sllx		%o0, PAGE_SHIFT, %o0
 	ta		HV_MMU_UNMAP_ADDR_TRAP
 	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
-	brnz,a,pn	%o0, __hypervisor_tlb_xcall_error
+	brnz,a,pn	%o0, 1f
 	 mov		%o0, %g5
 	mov		%g2, %o0
 	mov		%g3, %o1
 	mov		%g4, %o2
 	membar		#Sync
 	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
 
 	.globl		__hypervisor_xcall_flush_tlb_kernel_range
-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
+__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
 	/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
 	sethi		%hi(PAGE_SIZE - 1), %g2
 	or		%g2, %lo(PAGE_SIZE - 1), %g2
 	andn		%g1, %g2, %g1
 	andn		%g7, %g2, %g7
 	sub		%g7, %g1, %g3
+	srlx		%g3, 18, %g7
 	add		%g2, 1, %g2
 	sub		%g3, %g2, %g3
 	mov		%o0, %g2
 	mov		%o1, %g4
-	mov		%o2, %g7
+	brnz,pn		%g7, 2f
+	 mov		%o2, %g7
 1:	add		%g1, %g3, %o0	/* ARG0: virtual address */
 	mov		0, %o1		/* ARG1: mmu context */
 	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
 	ta		HV_MMU_UNMAP_ADDR_TRAP
 	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
-	brnz,pn		%o0, __hypervisor_tlb_xcall_error
+	brnz,pn		%o0, 1f
 	 mov		%o0, %g5
 	sethi		%hi(PAGE_SIZE), %o2
 	brnz,pt		%g3, 1b
 	 sub		%g3, %o2, %g3
-	mov		%g2, %o0
+5:	mov		%g2, %o0
 	mov		%g4, %o1
 	mov		%g7, %o2
 	membar		#Sync
 	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
+2:	mov		%o3, %g1
+	mov		%o5, %g3
+	mov		0, %o0		/* ARG0: CPU lists unimplemented */
+	mov		0, %o1		/* ARG1: CPU lists unimplemented */
+	mov		0, %o2		/* ARG2: mmu context == nucleus */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	mov		%g1, %o3
+	brz,pt		%o0, 5b
+	 mov		%g3, %o5
+	mov		HV_FAST_MMU_DEMAP_CTX, %g6
+	ba,pt		%xcc, 1b
+	 clr		%g5
 
 	/* These just get rescheduled to PIL vectors. */
 	.globl		xcall_call_function
@@ -809,6 +985,58 @@
 
 #endif /* CONFIG_SMP */
 
+	.globl		cheetah_patch_cachetlbops
+cheetah_patch_cachetlbops:
+	save		%sp, -128, %sp
+
+	sethi		%hi(__flush_tlb_mm), %o0
+	or		%o0, %lo(__flush_tlb_mm), %o0
+	sethi		%hi(__cheetah_flush_tlb_mm), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_mm), %o1
+	call		tlb_patch_one
+	 mov		19, %o2
+
+	sethi		%hi(__flush_tlb_page), %o0
+	or		%o0, %lo(__flush_tlb_page), %o0
+	sethi		%hi(__cheetah_flush_tlb_page), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_page), %o1
+	call		tlb_patch_one
+	 mov		22, %o2
+
+	sethi		%hi(__flush_tlb_pending), %o0
+	or		%o0, %lo(__flush_tlb_pending), %o0
+	sethi		%hi(__cheetah_flush_tlb_pending), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_pending), %o1
+	call		tlb_patch_one
+	 mov		27, %o2
+
+	sethi		%hi(__flush_tlb_kernel_range), %o0
+	or		%o0, %lo(__flush_tlb_kernel_range), %o0
+	sethi		%hi(__cheetah_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		31, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	sethi		%hi(__flush_dcache_page), %o0
+	or		%o0, %lo(__flush_dcache_page), %o0
+	sethi		%hi(__cheetah_flush_dcache_page), %o1
+	or		%o1, %lo(__cheetah_flush_dcache_page), %o1
+	call		tlb_patch_one
+	 mov		11, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
+	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
+	sethi		%hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		44, %o2
+#endif /* CONFIG_SMP */
+
+	ret
+	 restore
 
 	.globl		hypervisor_patch_cachetlbops
 hypervisor_patch_cachetlbops:
@@ -819,28 +1047,28 @@
 	sethi		%hi(__hypervisor_flush_tlb_mm), %o1
 	or		%o1, %lo(__hypervisor_flush_tlb_mm), %o1
 	call		tlb_patch_one
-	 mov		10, %o2
+	 mov		19, %o2
 
 	sethi		%hi(__flush_tlb_page), %o0
 	or		%o0, %lo(__flush_tlb_page), %o0
 	sethi		%hi(__hypervisor_flush_tlb_page), %o1
 	or		%o1, %lo(__hypervisor_flush_tlb_page), %o1
 	call		tlb_patch_one
-	 mov		11, %o2
+	 mov		22, %o2
 
 	sethi		%hi(__flush_tlb_pending), %o0
 	or		%o0, %lo(__flush_tlb_pending), %o0
 	sethi		%hi(__hypervisor_flush_tlb_pending), %o1
 	or		%o1, %lo(__hypervisor_flush_tlb_pending), %o1
 	call		tlb_patch_one
-	 mov		16, %o2
+	 mov		27, %o2
 
 	sethi		%hi(__flush_tlb_kernel_range), %o0
 	or		%o0, %lo(__flush_tlb_kernel_range), %o0
 	sethi		%hi(__hypervisor_flush_tlb_kernel_range), %o1
 	or		%o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
 	call		tlb_patch_one
-	 mov		16, %o2
+	 mov		31, %o2
 
 #ifdef DCACHE_ALIASING_POSSIBLE
 	sethi		%hi(__flush_dcache_page), %o0
@@ -857,21 +1085,21 @@
 	sethi		%hi(__hypervisor_xcall_flush_tlb_mm), %o1
 	or		%o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
 	call		tlb_patch_one
-	 mov		21, %o2
+	 mov		24, %o2
 
 	sethi		%hi(xcall_flush_tlb_page), %o0
 	or		%o0, %lo(xcall_flush_tlb_page), %o0
 	sethi		%hi(__hypervisor_xcall_flush_tlb_page), %o1
 	or		%o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
 	call		tlb_patch_one
-	 mov		17, %o2
+	 mov		20, %o2
 
 	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
 	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
 	sethi		%hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
 	or		%o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
 	call		tlb_patch_one
-	 mov		25, %o2
+	 mov		44, %o2
 #endif /* CONFIG_SMP */
 
 	ret
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 6160761..4810e48 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -61,4 +61,7 @@
  */
 #define __write_once __read_mostly
 
+/* __ro_after_init is the generic name for the tile arch __write_once. */
+#define __ro_after_init __read_mostly
+
 #endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 0ab5ee1..aa8b067 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -888,7 +888,7 @@
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	struct scatter_walk src_sg_walk;
-	struct scatter_walk dst_sg_walk;
+	struct scatter_walk dst_sg_walk = {};
 	unsigned int i;
 
 	/* Assuming we are supporting rfc4106 64-bit extended */
@@ -968,7 +968,7 @@
 	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
-	struct scatter_walk dst_sg_walk;
+	struct scatter_walk dst_sg_walk = {};
 	unsigned int i;
 
 	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 77f28ce..9976fce 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,8 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
-CFLAGS_syscall_64.o		+= -Wno-override-init
-CFLAGS_syscall_32.o		+= -Wno-override-init
+CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_32.o		+= $(call cc-option,-Wno-override-init,)
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ff6ef7b..2b36185 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -389,5 +389,3 @@
 380	i386	pkey_mprotect		sys_pkey_mprotect
 381	i386	pkey_alloc		sys_pkey_alloc
 382	i386	pkey_free		sys_pkey_free
-#383	i386	pkey_get		sys_pkey_get
-#384	i386	pkey_set		sys_pkey_set
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 2f024d0..e93ef0b 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -338,8 +338,6 @@
 329	common	pkey_mprotect		sys_pkey_mprotect
 330	common	pkey_alloc		sys_pkey_alloc
 331	common	pkey_free		sys_pkey_free
-#332	common	pkey_get		sys_pkey_get
-#333	common	pkey_set		sys_pkey_set
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a3a9eb8..a74a2db 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3607,10 +3607,14 @@
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
-	 * assume at least 3 events:
+	 * assume at least 3 events, when not running in a hypervisor:
 	 */
-	if (version > 1)
-		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+	if (version > 1) {
+		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
+
+		x86_pmu.num_counters_fixed =
+			max((int)edx.split.num_counters_fixed, assume);
+	}
 
 	if (boot_cpu_has(X86_FEATURE_PDCM)) {
 		u64 capabilities;
@@ -3898,6 +3902,7 @@
 		break;
 
 	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
 		memcpy(hw_cache_event_ids,
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
@@ -3912,7 +3917,7 @@
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
-		pr_cont("Knights Landing events, ");
+		pr_cont("Knights Landing/Mill events, ");
 		break;
 
 	case INTEL_FAM6_SKYLAKE_MOBILE:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 3ca87b5..4f5ac72 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -48,7 +48,8 @@
  *			       Scope: Core
  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *			       perf code: 0x02
- *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *						SKL,KNL
  *			       Scope: Core
  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *			       perf code: 0x03
@@ -56,15 +57,16 @@
  *			       Scope: Core
  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *			       perf code: 0x00
- *			       Available model: SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *			       perf code: 0x01
- *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *			       perf code: 0x02
- *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *						SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *			       perf code: 0x03
@@ -118,6 +120,7 @@
 
 /* Quirk flags */
 #define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
+#define KNL_CORE_C6_MSR		(1UL << 1)
 
 struct perf_cstate_msr {
 	u64	msr;
@@ -488,6 +491,18 @@
 	.quirks			= SLM_PKG_C6_USE_C7_MSR,
 };
 
+
+static const struct cstate_model knl_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES),
+	.quirks			= KNL_CORE_C6_MSR,
+};
+
+
+
 #define X86_CSTATES_MODEL(model, states)				\
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
 
@@ -523,6 +538,8 @@
 
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@ -558,6 +575,11 @@
 	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
 		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
 
+	/* KNL has different MSR for CORE C6 */
+	if (cm->quirks & KNL_CORE_C6_MSR)
+		pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
+
+
 	has_cstate_core = cstate_probe_msr(cm->core_events,
 					   PERF_CSTATE_CORE_EVENT_MAX,
 					   core_msr, core_events_attrs);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index fc6cf21..81b321a 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -458,8 +458,8 @@
 	if (!x86_pmu.lbr_nr)
 		return;
 
-	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-					event->ctx->task_ctx_data) {
+	if (branch_user_callstack(cpuc->br_sel) &&
+	    event->ctx->task_ctx_data) {
 		task_ctx = event->ctx->task_ctx_data;
 		task_ctx->lbr_callstack_users--;
 	}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b0f0e83..0a535ce 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -763,6 +763,7 @@
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d9844cc..efca268 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1349,6 +1349,7 @@
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 5f845ee..81195cc 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -8,8 +8,12 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
-#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x1904
+#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC	0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC	0x1900
+#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC	0x1910
+#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC	0x190f
+#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC	0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -616,13 +620,29 @@
 
 static const struct pci_device_id skl_uncore_pci_ids[] = {
 	{ /* IMC */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
 	{ /* IMC */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 
 	{ /* end: all zeroes */ },
 };
@@ -666,8 +686,12 @@
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
-	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+	IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core Y */
 	IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core U */
+	IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Dual Core */
+	IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
+	IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
+	IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
 	{  /* end marker */ }
 };
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1188bc8..a396292 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -194,6 +194,8 @@
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9ae5ab8..34a46dc 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -64,5 +64,6 @@
 /* Xeon Phi */
 
 #define INTEL_FAM6_XEON_PHI_KNL		0x57 /* Knights Landing */
+#define INTEL_FAM6_XEON_PHI_KNM		0x85 /* Knights Mill */
 
 #endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 5b6753d..49da9f4 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -17,6 +17,7 @@
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev);
 
 extern void intel_mid_pwr_power_off(void);
 
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index de25aad..d34bd37 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -351,4 +351,10 @@
 #define arch_phys_wc_add arch_phys_wc_add
 #endif
 
+#ifdef CONFIG_X86_PAT
+extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
+extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
+#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
+#endif
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4b20f73..bdde807 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -948,7 +948,6 @@
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
 	bool (*invpcid_supported)(void);
-	void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
 
 	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 
@@ -958,8 +957,6 @@
 
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
-	u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
-
 	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 56f4c66..78f3760 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -88,7 +88,6 @@
 
 #define MSR_IA32_RTIT_CTL		0x00000570
 #define MSR_IA32_RTIT_STATUS		0x00000571
-#define MSR_IA32_RTIT_STATUS		0x00000571
 #define MSR_IA32_RTIT_ADDR0_A		0x00000580
 #define MSR_IA32_RTIT_ADDR0_B		0x00000581
 #define MSR_IA32_RTIT_ADDR1_A		0x00000582
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 3d33a71..a34e0d4 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,8 +103,10 @@
 ({							\
 	long tmp;					\
 	struct rw_semaphore* ret;			\
+	register void *__sp asm(_ASM_SP);		\
+							\
 	asm volatile("# beginning down_write\n\t"	\
-		     LOCK_PREFIX "  xadd      %1,(%3)\n\t"	\
+		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
 		     /* adds 0xffff0001, returns the old value */ \
 		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
 		     /* was the active mask 0 before? */\
@@ -112,7 +114,7 @@
 		     "  call " slow_path "\n"		\
 		     "1:\n"				\
 		     "# ending down_write"		\
-		     : "+m" (sem->count), "=d" (tmp), "=a" (ret)	\
+		     : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
 		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
 		     : "memory", "cc");			\
 	ret;						\
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2aaca53..ad6f5eb0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,6 +52,15 @@
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.flags		= 0,			\
+}
+
 #define init_stack		(init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8a5abaa..931ced8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -454,6 +454,7 @@
 		polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
 	mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
 	/*
 	 * stash over-ride to indicate we've been here
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index c7364bd..51287cd 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1042,8 +1042,11 @@
 
 	if (apm_info.get_power_status_broken)
 		return APM_32_UNSUPPORTED;
-	if (apm_bios_call(&call))
+	if (apm_bios_call(&call)) {
+		if (!call.err)
+			return APM_NO_ERROR;
 		return call.err;
+	}
 	*status = call.ebx;
 	*bat = call.ecx;
 	if (apm_info.get_power_status_swabinminutes) {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b81fe2d..1e81a37 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -347,7 +347,6 @@
 #ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
-	unsigned int socket_id, core_complex_id;
 
 	bits = c->x86_coreid_bits;
 	/* Low order bits define the core id (index of core in socket) */
@@ -365,10 +364,7 @@
 	 if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
 		return;
 
-	socket_id	= (c->apicid >> bits) - 1;
-	core_complex_id	= (c->apicid & ((1 << bits) - 1)) >> 3;
-
-	per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id;
+	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 #endif
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bd910a..cc9e980 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -979,6 +979,35 @@
 }
 
 /*
+ * The physical to logical package id mapping is initialized from the
+ * acpi/mptables information. Make sure that CPUID actually agrees with
+ * that.
+ */
+static void sanitize_package_id(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned int pkg, apicid, cpu = smp_processor_id();
+
+	apicid = apic->cpu_present_to_apicid(cpu);
+	pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+	if (apicid != c->initial_apicid) {
+		pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n",
+		       cpu, apicid, c->initial_apicid);
+		c->initial_apicid = apicid;
+	}
+	if (pkg != c->phys_proc_id) {
+		pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n",
+		       cpu, pkg, c->phys_proc_id);
+		c->phys_proc_id = pkg;
+	}
+	c->logical_proc_id = topology_phys_to_logical_pkg(pkg);
+#else
+	c->logical_proc_id = 0;
+#endif
+}
+
+/*
  * This does the hard work of actually picking apart the CPU stuff...
  */
 static void identify_cpu(struct cpuinfo_x86 *c)
@@ -1103,8 +1132,7 @@
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
-	/* The boot/hotplug time assigment got cleared, restore it */
-	c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
+	sanitize_package_id(c);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 620ab06..017bda1 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -429,7 +429,7 @@
 	 * We need the physical address of the container for both bitness since
 	 * boot_params.hdr.ramdisk_image is a physical address.
 	 */
-	cont    = __pa(container);
+	cont    = __pa_nodebug(container);
 	cont_va = container;
 #endif
 
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 8cb57df..1db8dc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,8 @@
 
 	static const struct cpuid_bit cpuid_bits[] = {
 		{ X86_FEATURE_INTEL_PT,		CR_EBX,25, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4VNNIW,	CR_EDX, 2, 0x00000007, 0 },
+		{ X86_FEATURE_AVX512_4FMAPS,	CR_EDX, 3, 0x00000007, 0 },
 		{ X86_FEATURE_APERFMPERF,	CR_ECX, 0, 0x00000006, 0 },
 		{ X86_FEATURE_EPB,		CR_ECX, 3, 0x00000006, 0 },
 		{ X86_FEATURE_HW_PSTATE,	CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 8116057..5130985 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -27,6 +27,7 @@
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
+#include <asm/timer.h>
 #include <asm/apic.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
@@ -94,6 +95,10 @@
 	} else {
 		pr_warn("Failed to get TSC freq from the hypervisor\n");
 	}
+
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
 }
 
 /*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b85fe5f..90e8dde 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -350,7 +350,7 @@
 		 * continue building up new bios map based on this
 		 * information
 		 */
-		if (current_type != last_type) {
+		if (current_type != last_type || current_type == E820_PRAM) {
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 124aa5c..095ef7d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -74,6 +74,8 @@
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 	setup_clear_cpu_cap(X86_FEATURE_PKU);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 28cee01..d9d8d16 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -50,6 +50,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/frame.h>
+#include <linux/kasan.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -1057,9 +1058,10 @@
 	 * tailcall optimization. So, to be absolutely safe
 	 * we also save and restore enough stack bytes to cover
 	 * the argument area.
+	 * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
+	 * raw stack chunk with redzones:
 	 */
-	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
-	       MIN_STACK_SIZE(addr));
+	__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
 	regs->flags &= ~X86_EFLAGS_IF;
 	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
@@ -1080,6 +1082,9 @@
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+	/* Unpoison stack redzones in the frames we are going to jump over. */
+	kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
+
 	asm volatile (
 #ifdef CONFIG_X86_64
 			"       xchg   %%rbx,%%rsp	\n"
@@ -1118,7 +1123,7 @@
 		/* It's OK to start function graph tracing again */
 		unpause_graph_tracing();
 		*regs = kcb->jprobe_saved_regs;
-		memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
+		__memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
 		preempt_enable_no_resched();
 		return 1;
 	}
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index efe73aa..7b0d3da 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -18,8 +18,10 @@
 
 #ifdef CC_USING_FENTRY
 # define function_hook	__fentry__
+EXPORT_SYMBOL(__fentry__)
 #else
 # define function_hook	mcount
+EXPORT_SYMBOL(mcount)
 #endif
 
 /* All cases save the original rbp (8 bytes) */
@@ -295,7 +297,6 @@
 	jmp fgraph_trace
 END(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
-EXPORT_SYMBOL(function_hook)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 51402a7..0bee04d 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -625,8 +625,6 @@
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
 			amd_disable_seq_and_redirect_scrub);
 
-#endif
-
 #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 #include <linux/jump_label.h>
 #include <asm/string_64.h>
@@ -657,3 +655,4 @@
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
+#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bbfbca5..9c337b0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1221,11 +1221,16 @@
 	 */
 	get_smp_config();
 
+	/*
+	 * Systems w/o ACPI and mptables might not have it mapped the local
+	 * APIC yet, but prefill_possible_map() might need to access it.
+	 */
+	init_apic_mappings();
+
 	prefill_possible_map();
 
 	init_cpu_to_node();
 
-	init_apic_mappings();
 	io_apic_init_mappings();
 
 	kvm_guest_init();
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 40df337..ec1f756 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -105,9 +105,6 @@
 	/* Don't let flags to be set from userspace */
 	act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
 
-	if (user_64bit_mode(current_pt_regs()))
-		return;
-
 	if (in_ia32_syscall())
 		act->sa.sa_flags |= SA_IA32_ABI;
 	if (in_x32_syscall())
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc2..c00cb64 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -261,8 +261,10 @@
 
 __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 {
+	irq_enter();
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
+	irq_exit();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 951f093..42f5eb7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1409,15 +1409,17 @@
 
 	/* No boot processor was found in mptable or ACPI MADT */
 	if (!num_processors) {
-		int apicid = boot_cpu_physical_apicid;
-		int cpu = hard_smp_processor_id();
+		if (boot_cpu_has(X86_FEATURE_APIC)) {
+			int apicid = boot_cpu_physical_apicid;
+			int cpu = hard_smp_processor_id();
 
-		pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
+			pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
 
-		/* Make sure boot cpu is enumerated */
-		if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
-		    apic->apic_id_valid(apicid))
-			generic_processor_info(apicid, boot_cpu_apic_version);
+			/* Make sure boot cpu is enumerated */
+			if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
+			    apic->apic_id_valid(apicid))
+				generic_processor_info(apicid, boot_cpu_apic_version);
+		}
 
 		if (!num_processors)
 			num_processors = 1;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c9a0738..a23ce84 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -57,7 +57,8 @@
 	unsigned char opcode[15];
 	unsigned long addr = convert_ip_to_linear(child, regs);
 
-	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+	copied = access_process_vm(child, addr, opcode, sizeof(opcode),
+			FOLL_FORCE);
 	for (i = 0; i < copied; i++) {
 		switch (opcode[i]) {
 		/* popf and iret */
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 9298993..2d721e5 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -47,7 +47,14 @@
 	get_stack_info(first_frame, state->task, &state->stack_info,
 		       &state->stack_mask);
 
-	if (!__kernel_text_address(*first_frame))
+	/*
+	 * The caller can provide the address of the first frame directly
+	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+	 * to start unwinding at.  Skip ahead until we reach it.
+	 */
+	if (!unwind_done(state) &&
+	    (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+	    !__kernel_text_address(*first_frame)))
 		unwind_next_frame(state);
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4e95d3e..cbd7b92 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5045,7 +5045,7 @@
 	/* Decode and fetch the destination operand: register or memory. */
 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
-	if (ctxt->rip_relative)
+	if (ctxt->rip_relative && likely(ctxt->memopp))
 		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
 					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index c7220ba..1a22de7 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -594,7 +594,7 @@
 	ioapic->irr = 0;
 	ioapic->irr_delivered = 0;
 	ioapic->id = 0;
-	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
+	memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi));
 	rtc_irq_eoi_tracking_reset(ioapic);
 }
 
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 25810b1..4da0303 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -156,6 +156,16 @@
 }
 
 
+static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
+		    struct kvm *kvm, int irq_source_id, int level,
+		    bool line_status)
+{
+	if (!level)
+		return -1;
+
+	return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
+}
+
 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
 			      struct kvm *kvm, int irq_source_id, int level,
 			      bool line_status)
@@ -163,18 +173,26 @@
 	struct kvm_lapic_irq irq;
 	int r;
 
-	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
-		return -EWOULDBLOCK;
+	switch (e->type) {
+	case KVM_IRQ_ROUTING_HV_SINT:
+		return kvm_hv_set_sint(e, kvm, irq_source_id, level,
+				       line_status);
 
-	if (kvm_msi_route_invalid(kvm, e))
-		return -EINVAL;
+	case KVM_IRQ_ROUTING_MSI:
+		if (kvm_msi_route_invalid(kvm, e))
+			return -EINVAL;
 
-	kvm_set_msi_irq(kvm, e, &irq);
+		kvm_set_msi_irq(kvm, e, &irq);
 
-	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
-		return r;
-	else
-		return -EWOULDBLOCK;
+		if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+			return r;
+		break;
+
+	default:
+		break;
+	}
+
+	return -EWOULDBLOCK;
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -254,16 +272,6 @@
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
-		    struct kvm *kvm, int irq_source_id, int level,
-		    bool line_status)
-{
-	if (!level)
-		return -1;
-
-	return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
-}
-
 int kvm_set_routing_entry(struct kvm *kvm,
 			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
@@ -423,18 +431,6 @@
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
-		     int irq_source_id, int level, bool line_status)
-{
-	switch (irq->type) {
-	case KVM_IRQ_ROUTING_HV_SINT:
-		return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
-				       line_status);
-	default:
-		return -EWOULDBLOCK;
-	}
-}
-
 void kvm_arch_irq_routing_update(struct kvm *kvm)
 {
 	kvm_hv_irq_routing_update(kvm);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8157a3..8ca1eca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1138,21 +1138,6 @@
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
-static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	svm->vmcb->control.tsc_offset += adjustment;
-	if (is_guest_mode(vcpu))
-		svm->nested.hsave->control.tsc_offset += adjustment;
-	else
-		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
-				     svm->vmcb->control.tsc_offset - adjustment,
-				     svm->vmcb->control.tsc_offset);
-
-	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-}
-
 static void avic_init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb *vmcb = svm->vmcb;
@@ -3449,12 +3434,6 @@
 	return 0;
 }
 
-static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
-	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
-	return vmcb->control.tsc_offset + host_tsc;
-}
-
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5422,8 +5401,6 @@
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
 	.write_tsc_offset = svm_write_tsc_offset,
-	.adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
-	.read_l1_tsc = svm_read_l1_tsc,
 
 	.set_tdp_cr3 = set_tdp_cr3,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cf1b16d..5382b82 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -187,6 +187,7 @@
  */
 struct loaded_vmcs {
 	struct vmcs *vmcs;
+	struct vmcs *shadow_vmcs;
 	int cpu;
 	int launched;
 	struct list_head loaded_vmcss_on_cpu_link;
@@ -411,7 +412,6 @@
 	 * memory during VMXOFF, VMCLEAR, VMPTRLD.
 	 */
 	struct vmcs12 *cached_vmcs12;
-	struct vmcs *current_shadow_vmcs;
 	/*
 	 * Indicates if the shadow vmcs must be updated with the
 	 * data hold by vmcs12
@@ -421,7 +421,6 @@
 	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
 	struct list_head vmcs02_pool;
 	int vmcs02_num;
-	u64 vmcs01_tsc_offset;
 	bool change_vmcs01_virtual_x2apic_mode;
 	/* L2 must run next, and mustn't decide to exit to L1. */
 	bool nested_run_pending;
@@ -1419,6 +1418,8 @@
 static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 {
 	vmcs_clear(loaded_vmcs->vmcs);
+	if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
+		vmcs_clear(loaded_vmcs->shadow_vmcs);
 	loaded_vmcs->cpu = -1;
 	loaded_vmcs->launched = 0;
 }
@@ -2605,20 +2606,6 @@
 }
 
 /*
- * Like guest_read_tsc, but always returns L1's notion of the timestamp
- * counter, even if a nested guest (L2) is currently running.
- */
-static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
-	u64 tsc_offset;
-
-	tsc_offset = is_guest_mode(vcpu) ?
-		to_vmx(vcpu)->nested.vmcs01_tsc_offset :
-		vmcs_read64(TSC_OFFSET);
-	return host_tsc + tsc_offset;
-}
-
-/*
  * writes 'offset' into guest's timestamp counter offset register
  */
 static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -2631,7 +2618,6 @@
 		 * to the newly set TSC to get L2's TSC.
 		 */
 		struct vmcs12 *vmcs12;
-		to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
 		/* recalculate vmcs02.TSC_OFFSET: */
 		vmcs12 = get_vmcs12(vcpu);
 		vmcs_write64(TSC_OFFSET, offset +
@@ -2644,19 +2630,6 @@
 	}
 }
 
-static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
-	u64 offset = vmcs_read64(TSC_OFFSET);
-
-	vmcs_write64(TSC_OFFSET, offset + adjustment);
-	if (is_guest_mode(vcpu)) {
-		/* Even when running L2, the adjustment needs to apply to L1 */
-		to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
-	} else
-		trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
-					   offset + adjustment);
-}
-
 static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
@@ -3562,6 +3535,7 @@
 	loaded_vmcs_clear(loaded_vmcs);
 	free_vmcs(loaded_vmcs->vmcs);
 	loaded_vmcs->vmcs = NULL;
+	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
 }
 
 static void free_kvm_area(void)
@@ -6696,6 +6670,7 @@
 	if (!item)
 		return NULL;
 	item->vmcs02.vmcs = alloc_vmcs();
+	item->vmcs02.shadow_vmcs = NULL;
 	if (!item->vmcs02.vmcs) {
 		kfree(item);
 		return NULL;
@@ -7072,7 +7047,7 @@
 		shadow_vmcs->revision_id |= (1u << 31);
 		/* init shadow vmcs */
 		vmcs_clear(shadow_vmcs);
-		vmx->nested.current_shadow_vmcs = shadow_vmcs;
+		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
 	}
 
 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
@@ -7174,8 +7149,11 @@
 		free_page((unsigned long)vmx->nested.msr_bitmap);
 		vmx->nested.msr_bitmap = NULL;
 	}
-	if (enable_shadow_vmcs)
-		free_vmcs(vmx->nested.current_shadow_vmcs);
+	if (enable_shadow_vmcs) {
+		vmcs_clear(vmx->vmcs01.shadow_vmcs);
+		free_vmcs(vmx->vmcs01.shadow_vmcs);
+		vmx->vmcs01.shadow_vmcs = NULL;
+	}
 	kfree(vmx->nested.cached_vmcs12);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
@@ -7352,7 +7330,7 @@
 	int i;
 	unsigned long field;
 	u64 field_value;
-	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
 	const unsigned long *fields = shadow_read_write_fields;
 	const int num_fields = max_shadow_read_write_fields;
 
@@ -7401,7 +7379,7 @@
 	int i, q;
 	unsigned long field;
 	u64 field_value = 0;
-	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
 
 	vmcs_load(shadow_vmcs);
 
@@ -7591,7 +7569,7 @@
 			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
 				      SECONDARY_EXEC_SHADOW_VMCS);
 			vmcs_write64(VMCS_LINK_POINTER,
-				     __pa(vmx->nested.current_shadow_vmcs));
+				     __pa(vmx->vmcs01.shadow_vmcs));
 			vmx->nested.sync_shadow_vmcs = true;
 		}
 	}
@@ -7659,7 +7637,7 @@
 
 	types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
-	if (!(types & (1UL << type))) {
+	if (type >= 32 || !(types & (1 << type))) {
 		nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 		skip_emulated_instruction(vcpu);
@@ -7722,7 +7700,7 @@
 
 	types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
 
-	if (!(types & (1UL << type))) {
+	if (type >= 32 || !(types & (1 << type))) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 		skip_emulated_instruction(vcpu);
@@ -9156,6 +9134,7 @@
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	vmx->loaded_vmcs->vmcs = alloc_vmcs();
+	vmx->loaded_vmcs->shadow_vmcs = NULL;
 	if (!vmx->loaded_vmcs->vmcs)
 		goto free_msrs;
 	if (!vmm_exclusive)
@@ -10061,9 +10040,9 @@
 
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
 		vmcs_write64(TSC_OFFSET,
-			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+			vcpu->arch.tsc_offset + vmcs12->tsc_offset);
 	else
-		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+		vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 	if (kvm_has_tsc_control)
 		decache_tsc_multiplier(vmx);
 
@@ -10293,8 +10272,6 @@
 
 	enter_guest_mode(vcpu);
 
-	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
-
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 
@@ -10818,7 +10795,7 @@
 	load_vmcs12_host_state(vcpu, vmcs12);
 
 	/* Update any VMCS fields that might have changed while L2 ran */
-	vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 	if (vmx->hv_deadline_tsc == -1)
 		vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
 				PIN_BASED_VMX_PREEMPTION_TIMER);
@@ -11339,8 +11316,6 @@
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
 	.write_tsc_offset = vmx_write_tsc_offset,
-	.adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
-	.read_l1_tsc = vmx_read_l1_tsc,
 
 	.set_tdp_cr3 = vmx_set_cr3,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c633de..04c5d96 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -210,7 +210,18 @@
 	struct kvm_shared_msrs *locals
 		= container_of(urn, struct kvm_shared_msrs, urn);
 	struct kvm_shared_msr_values *values;
+	unsigned long flags;
 
+	/*
+	 * Disabling irqs at this point since the following code could be
+	 * interrupted and executed through kvm_arch_hardware_disable()
+	 */
+	local_irq_save(flags);
+	if (locals->registered) {
+		locals->registered = false;
+		user_return_notifier_unregister(urn);
+	}
+	local_irq_restore(flags);
 	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
 		values = &locals->values[slot];
 		if (values->host != values->curr) {
@@ -218,8 +229,6 @@
 			values->curr = values->host;
 		}
 	}
-	locals->registered = false;
-	user_return_notifier_unregister(urn);
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -1409,7 +1418,7 @@
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-	return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
+	return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
@@ -1547,7 +1556,7 @@
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
 					   s64 adjustment)
 {
-	kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+	kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
 }
 
 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -1555,7 +1564,7 @@
 	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
 		WARN_ON(adjustment < 0);
 	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
-	kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+	adjust_tsc_offset_guest(vcpu, adjustment);
 }
 
 #ifdef CONFIG_X86_64
@@ -1724,18 +1733,23 @@
 
 static u64 __get_kvmclock_ns(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
 	struct kvm_arch *ka = &kvm->arch;
-	s64 ns;
+	struct pvclock_vcpu_time_info hv_clock;
 
-	if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
-		u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-		ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
-	} else {
-		ns = ktime_get_boot_ns() + ka->kvmclock_offset;
+	spin_lock(&ka->pvclock_gtod_sync_lock);
+	if (!ka->use_master_clock) {
+		spin_unlock(&ka->pvclock_gtod_sync_lock);
+		return ktime_get_boot_ns() + ka->kvmclock_offset;
 	}
 
-	return ns;
+	hv_clock.tsc_timestamp = ka->master_cycle_now;
+	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
+	spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+	kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+			   &hv_clock.tsc_shift,
+			   &hv_clock.tsc_to_system_mul);
+	return __pvclock_read_cycles(&hv_clock, rdtsc());
 }
 
 u64 get_kvmclock_ns(struct kvm *kvm)
@@ -2262,7 +2276,7 @@
 		/* Drop writes to this legacy MSR -- see rdmsr
 		 * counterpart for further detail.
 		 */
-		vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
+		vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
 		break;
 	case MSR_AMD64_OSVW_ID_LENGTH:
 		if (!guest_cpuid_has_osvw(vcpu))
@@ -2280,11 +2294,11 @@
 		if (kvm_pmu_is_valid_msr(vcpu, msr))
 			return kvm_pmu_set_msr(vcpu, msr_info);
 		if (!ignore_msrs) {
-			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
+			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
 				    msr, data);
 			return 1;
 		} else {
-			vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
+			vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
 				    msr, data);
 			break;
 		}
@@ -2596,7 +2610,6 @@
 	case KVM_CAP_PIT_STATE2:
 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
 	case KVM_CAP_XEN_HVM:
-	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
@@ -2623,6 +2636,9 @@
 #endif
 		r = 1;
 		break;
+	case KVM_CAP_ADJUST_CLOCK:
+		r = KVM_CLOCK_TSC_STABLE;
+		break;
 	case KVM_CAP_X86_SMM:
 		/* SMBASE is usually relocated above 1M on modern chipsets,
 		 * and SMM handlers might indeed rely on 4G segment limits,
@@ -3415,6 +3431,7 @@
 	};
 	case KVM_SET_VAPIC_ADDR: {
 		struct kvm_vapic_addr va;
+		int idx;
 
 		r = -EINVAL;
 		if (!lapic_in_kernel(vcpu))
@@ -3422,7 +3439,9 @@
 		r = -EFAULT;
 		if (copy_from_user(&va, argp, sizeof va))
 			goto out;
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
 	case KVM_X86_SETUP_MCE: {
@@ -4103,9 +4122,11 @@
 		struct kvm_clock_data user_ns;
 		u64 now_ns;
 
-		now_ns = get_kvmclock_ns(kvm);
+		local_irq_disable();
+		now_ns = __get_kvmclock_ns(kvm);
 		user_ns.clock = now_ns;
-		user_ns.flags = 0;
+		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
+		local_irq_enable();
 		memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
 		r = -EFAULT;
@@ -5733,13 +5754,13 @@
 
 static void kvm_timer_init(void)
 {
-	int cpu;
-
 	max_tsc_khz = tsc_khz;
 
 	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_CPU_FREQ
 		struct cpufreq_policy policy;
+		int cpu;
+
 		memset(&policy, 0, sizeof(policy));
 		cpu = get_cpu();
 		cpufreq_get_policy(&policy, cpu);
@@ -7410,10 +7431,12 @@
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+
 	kvmclock_reset(vcpu);
 
-	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 	kvm_x86_ops->vcpu_free(vcpu);
+	free_cpumask_var(wbinvd_dirty_mask);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index b8b6a60..0d4fb3e 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -435,7 +435,7 @@
 
 		ret = get_user_pages_unlocked(start,
 					      (end - start) >> PAGE_SHIFT,
-					      write, 0, pages);
+					      pages, write ? FOLL_WRITE : 0);
 
 		/* Have to be a bit careful with return values */
 		if (nr > 0) {
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index ddd2661..887e571 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -104,10 +104,10 @@
 	 * consistent with the vaddr_start/vaddr_end variables.
 	 */
 	BUILD_BUG_ON(vaddr_start >= vaddr_end);
-	BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
 		     vaddr_end >= EFI_VA_START);
-	BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
-		      config_enabled(CONFIG_EFI)) &&
+	BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
+		      IS_ENABLED(CONFIG_EFI)) &&
 		     vaddr_end >= __START_KERNEL_map);
 	BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
 
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 8047687..e4f8009 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -544,10 +544,9 @@
 {
 	long gup_ret;
 	int nr_pages = 1;
-	int force = 0;
 
-	gup_ret = get_user_pages((unsigned long)addr, nr_pages, write,
-			force, NULL, NULL);
+	gup_ret = get_user_pages((unsigned long)addr, nr_pages,
+			write ? FOLL_WRITE : 0,	NULL, NULL);
 	/*
 	 * get_user_pages() returns number of pages gotten.
 	 * 0 means we failed to fault in and get anything,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 170cc4f..83e701f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -730,6 +730,20 @@
 	free_memtype(start, end);
 }
 
+int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
+{
+	enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
+
+	return io_reserve_memtype(start, start + size, &type);
+}
+EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
+
+void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
+{
+	io_free_memtype(start, start + size);
+}
+EXPORT_SYMBOL(arch_io_free_memtype_wc);
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t vma_prot)
 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index bf99aa7..936a488 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -861,7 +861,7 @@
 	int count = 0, pg_shift = 0;
 	void *new_memmap = NULL;
 	efi_status_t status;
-	phys_addr_t pa;
+	unsigned long pa;
 
 	efi.systab = NULL;
 
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 58b0f80..319148b 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -31,6 +31,7 @@
 #include <linux/io.h>
 #include <linux/reboot.h>
 #include <linux/slab.h>
+#include <linux/ucs2_string.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
@@ -211,6 +212,35 @@
 	memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
 }
 
+/*
+ * Wrapper for slow_virt_to_phys() that handles NULL addresses.
+ */
+static inline phys_addr_t
+virt_to_phys_or_null_size(void *va, unsigned long size)
+{
+	bool bad_size;
+
+	if (!va)
+		return 0;
+
+	if (virt_addr_valid(va))
+		return virt_to_phys(va);
+
+	/*
+	 * A fully aligned variable on the stack is guaranteed not to
+	 * cross a page bounary. Try to catch strings on the stack by
+	 * checking that 'size' is a power of two.
+	 */
+	bad_size = size > PAGE_SIZE || !is_power_of_2(size);
+
+	WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size);
+
+	return slow_virt_to_phys(va);
+}
+
+#define virt_to_phys_or_null(addr)				\
+	virt_to_phys_or_null_size((addr), sizeof(*(addr)))
+
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	unsigned long pfn, text;
@@ -494,8 +524,8 @@
 
 	spin_lock(&rtc_lock);
 
-	phys_tm = virt_to_phys(tm);
-	phys_tc = virt_to_phys(tc);
+	phys_tm = virt_to_phys_or_null(tm);
+	phys_tc = virt_to_phys_or_null(tc);
 
 	status = efi_thunk(get_time, phys_tm, phys_tc);
 
@@ -511,7 +541,7 @@
 
 	spin_lock(&rtc_lock);
 
-	phys_tm = virt_to_phys(tm);
+	phys_tm = virt_to_phys_or_null(tm);
 
 	status = efi_thunk(set_time, phys_tm);
 
@@ -529,9 +559,9 @@
 
 	spin_lock(&rtc_lock);
 
-	phys_enabled = virt_to_phys(enabled);
-	phys_pending = virt_to_phys(pending);
-	phys_tm = virt_to_phys(tm);
+	phys_enabled = virt_to_phys_or_null(enabled);
+	phys_pending = virt_to_phys_or_null(pending);
+	phys_tm = virt_to_phys_or_null(tm);
 
 	status = efi_thunk(get_wakeup_time, phys_enabled,
 			     phys_pending, phys_tm);
@@ -549,7 +579,7 @@
 
 	spin_lock(&rtc_lock);
 
-	phys_tm = virt_to_phys(tm);
+	phys_tm = virt_to_phys_or_null(tm);
 
 	status = efi_thunk(set_wakeup_time, enabled, phys_tm);
 
@@ -558,6 +588,10 @@
 	return status;
 }
 
+static unsigned long efi_name_size(efi_char16_t *name)
+{
+	return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1;
+}
 
 static efi_status_t
 efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
@@ -567,11 +601,11 @@
 	u32 phys_name, phys_vendor, phys_attr;
 	u32 phys_data_size, phys_data;
 
-	phys_data_size = virt_to_phys(data_size);
-	phys_vendor = virt_to_phys(vendor);
-	phys_name = virt_to_phys(name);
-	phys_attr = virt_to_phys(attr);
-	phys_data = virt_to_phys(data);
+	phys_data_size = virt_to_phys_or_null(data_size);
+	phys_vendor = virt_to_phys_or_null(vendor);
+	phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+	phys_attr = virt_to_phys_or_null(attr);
+	phys_data = virt_to_phys_or_null_size(data, *data_size);
 
 	status = efi_thunk(get_variable, phys_name, phys_vendor,
 			   phys_attr, phys_data_size, phys_data);
@@ -586,9 +620,9 @@
 	u32 phys_name, phys_vendor, phys_data;
 	efi_status_t status;
 
-	phys_name = virt_to_phys(name);
-	phys_vendor = virt_to_phys(vendor);
-	phys_data = virt_to_phys(data);
+	phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+	phys_vendor = virt_to_phys_or_null(vendor);
+	phys_data = virt_to_phys_or_null_size(data, data_size);
 
 	/* If data_size is > sizeof(u32) we've got problems */
 	status = efi_thunk(set_variable, phys_name, phys_vendor,
@@ -605,9 +639,9 @@
 	efi_status_t status;
 	u32 phys_name_size, phys_name, phys_vendor;
 
-	phys_name_size = virt_to_phys(name_size);
-	phys_vendor = virt_to_phys(vendor);
-	phys_name = virt_to_phys(name);
+	phys_name_size = virt_to_phys_or_null(name_size);
+	phys_vendor = virt_to_phys_or_null(vendor);
+	phys_name = virt_to_phys_or_null_size(name, *name_size);
 
 	status = efi_thunk(get_next_variable, phys_name_size,
 			   phys_name, phys_vendor);
@@ -621,7 +655,7 @@
 	efi_status_t status;
 	u32 phys_count;
 
-	phys_count = virt_to_phys(count);
+	phys_count = virt_to_phys_or_null(count);
 	status = efi_thunk(get_next_high_mono_count, phys_count);
 
 	return status;
@@ -633,7 +667,7 @@
 {
 	u32 phys_data;
 
-	phys_data = virt_to_phys(data);
+	phys_data = virt_to_phys_or_null_size(data, data_size);
 
 	efi_thunk(reset_system, reset_type, status, data_size, phys_data);
 }
@@ -661,9 +695,9 @@
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	phys_storage = virt_to_phys(storage_space);
-	phys_remaining = virt_to_phys(remaining_space);
-	phys_max = virt_to_phys(max_variable_size);
+	phys_storage = virt_to_phys_or_null(storage_space);
+	phys_remaining = virt_to_phys_or_null(remaining_space);
+	phys_max = virt_to_phys_or_null(max_variable_size);
 
 	status = efi_thunk(query_variable_info, attr, phys_storage,
 			   phys_remaining, phys_max);
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 5d3b45a..67375dd 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -272,6 +272,25 @@
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	struct mid_pwr *pwr = midpwr;
+	int id, reg, bit;
+	u32 power;
+
+	if (!pwr || !pwr->available)
+		return PCI_UNKNOWN;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return PCI_UNKNOWN;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+	power = mid_pwr_get_state(pwr, reg);
+	return (__force pci_power_t)((power >> bit) & 3);
+}
+
 void intel_mid_pwr_power_off(void)
 {
 	struct mid_pwr *pwr = midpwr;
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index b4d5e95..4a6a5a2 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,7 +40,15 @@
 		 */
 		return BIOS_STATUS_UNIMPLEMENTED;
 
-	ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+	/*
+	 * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+	 * callback method, which uses efi_call() directly, with the kernel page tables:
+	 */
+	if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+		ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
+	else
+		ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index ac58c16..555b9fa 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,6 +16,7 @@
 
 KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
 KBUILD_CFLAGS += -m$(BITS)
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 5766ead..60a5a5a 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -36,7 +36,8 @@
 		 * slow, but that doesn't matter, since it will be called only
 		 * in case of singlestepping, if copy_from_user failed.
 		 */
-		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		n = access_process_vm(current, addr, &instr, sizeof(instr),
+				FOLL_FORCE);
 		if (n != sizeof(instr)) {
 			printk(KERN_ERR "is_syscall : failed to read "
 			       "instruction from 0x%lx\n", addr);
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 0b5c184..e30202b 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -212,7 +212,8 @@
 		 * slow, but that doesn't matter, since it will be called only
 		 * in case of singlestepping, if copy_from_user failed.
 		 */
-		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+		n = access_process_vm(current, addr, &instr, sizeof(instr),
+				FOLL_FORCE);
 		if (n != sizeof(instr)) {
 			printk("is_syscall : failed to read instruction from "
 			       "0x%lx\n", addr);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0fdd57..bdd8556 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1837,6 +1837,7 @@
 
 	xen_domain_type = XEN_HVM_DOMAIN;
 }
+#endif
 
 static int xen_cpu_up_prepare(unsigned int cpu)
 {
@@ -1887,6 +1888,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_XEN_PVHVM
 #ifdef CONFIG_KEXEC_CORE
 static void xen_hvm_shutdown(void)
 {
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index de9b14b..cd400af 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -767,7 +767,14 @@
 #define __NR_pwritev2				347
 __SYSCALL(347, sys_pwritev2, 6)
 
-#define __NR_syscall_count			348
+#define __NR_pkey_mprotect			348
+__SYSCALL(348, sys_pkey_mprotect, 4)
+#define __NR_pkey_alloc				349
+__SYSCALL(349, sys_pkey_alloc, 2)
+#define __NR_pkey_free				350
+__SYSCALL(350, sys_pkey_free, 1)
+
+#define __NR_syscall_count			351
 
 /*
  * sysxtensa syscall handler
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 9a5bcd0..be81e69 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -172,10 +172,11 @@
 {
 	of_clk_init(NULL);
 #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
-	printk("Calibrating CPU frequency ");
+	pr_info("Calibrating CPU frequency ");
 	calibrate_ccount();
-	printk("%d.%02d MHz\n", (int)ccount_freq/1000000,
-			(int)(ccount_freq/10000)%100);
+	pr_cont("%d.%02d MHz\n",
+		(int)ccount_freq / 1000000,
+		(int)(ccount_freq / 10000) % 100);
 #else
 	ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL;
 #endif
@@ -210,9 +211,8 @@
 void calibrate_delay(void)
 {
 	loops_per_jiffy = ccount_freq / HZ;
-	printk("Calibrating delay loop (skipped)... "
-	       "%lu.%02lu BogoMIPS preset\n",
-	       loops_per_jiffy/(1000000/HZ),
-	       (loops_per_jiffy/(10000/HZ)) % 100);
+	pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n",
+		loops_per_jiffy / (1000000 / HZ),
+		(loops_per_jiffy / (10000 / HZ)) % 100);
 }
 #endif
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d02fc30..ce37d5b 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -465,26 +465,25 @@
 
 	for (i = 0; i < 16; i++) {
 		if ((i % 8) == 0)
-			printk(KERN_INFO "a%02d:", i);
-		printk(KERN_CONT " %08lx", regs->areg[i]);
+			pr_info("a%02d:", i);
+		pr_cont(" %08lx", regs->areg[i]);
 	}
-	printk(KERN_CONT "\n");
-
-	printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
-	       regs->pc, regs->ps, regs->depc, regs->excvaddr);
-	printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
-	       regs->lbeg, regs->lend, regs->lcount, regs->sar);
+	pr_cont("\n");
+	pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
+		regs->pc, regs->ps, regs->depc, regs->excvaddr);
+	pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
+		regs->lbeg, regs->lend, regs->lcount, regs->sar);
 	if (user_mode(regs))
-		printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
-		       regs->windowbase, regs->windowstart, regs->wmask,
-		       regs->syscall);
+		pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
+			regs->windowbase, regs->windowstart, regs->wmask,
+			regs->syscall);
 }
 
 static int show_trace_cb(struct stackframe *frame, void *data)
 {
 	if (kernel_text_address(frame->pc)) {
-		printk(" [<%08lx>] ", frame->pc);
-		print_symbol("%s\n", frame->pc);
+		pr_cont(" [<%08lx>]", frame->pc);
+		print_symbol(" %s\n", frame->pc);
 	}
 	return 0;
 }
@@ -494,19 +493,13 @@
 	if (!sp)
 		sp = stack_pointer(task);
 
-	printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
-	printk("\n");
-#endif
+	pr_info("Call Trace:\n");
 	walk_stackframe(sp, show_trace_cb, NULL);
-	printk("\n");
+#ifndef CONFIG_KALLSYMS
+	pr_cont("\n");
+#endif
 }
 
-/*
- * This routine abuses get_user()/put_user() to reference pointers
- * with at least a bit of error checking ...
- */
-
 static int kstack_depth_to_print = 24;
 
 void show_stack(struct task_struct *task, unsigned long *sp)
@@ -518,52 +511,29 @@
 		sp = stack_pointer(task);
 	stack = sp;
 
-	printk("\nStack: ");
+	pr_info("Stack:\n");
 
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(sp))
 			break;
-		if (i && ((i % 8) == 0))
-			printk("\n       ");
-		printk("%08lx ", *sp++);
+		pr_cont(" %08lx", *sp++);
+		if (i % 8 == 7)
+			pr_cont("\n");
 	}
-	printk("\n");
 	show_trace(task, stack);
 }
 
-void show_code(unsigned int *pc)
-{
-	long i;
-
-	printk("\nCode:");
-
-	for(i = -3 ; i < 6 ; i++) {
-		unsigned long insn;
-		if (__get_user(insn, pc + i)) {
-			printk(" (Bad address in pc)\n");
-			break;
-		}
-		printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>'));
-	}
-}
-
 DEFINE_SPINLOCK(die_lock);
 
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static int die_counter;
-	int nl = 0;
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
 
-	printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
-	nl = 1;
-#endif
-	if (nl)
-		printk("\n");
+	pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter,
+		IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "");
 	show_regs(regs);
 	if (!user_mode(regs))
 		show_stack(NULL, (unsigned long*)regs->areg[1]);
diff --git a/block/badblocks.c b/block/badblocks.c
index 7be53cb..6ebcef2 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -133,6 +133,26 @@
 }
 EXPORT_SYMBOL_GPL(badblocks_check);
 
+static void badblocks_update_acked(struct badblocks *bb)
+{
+	u64 *p = bb->page;
+	int i;
+	bool unacked = false;
+
+	if (!bb->unacked_exist)
+		return;
+
+	for (i = 0; i < bb->count ; i++) {
+		if (!BB_ACK(p[i])) {
+			unacked = true;
+			break;
+		}
+	}
+
+	if (!unacked)
+		bb->unacked_exist = 0;
+}
+
 /**
  * badblocks_set() - Add a range of bad blocks to the table.
  * @bb:		the badblocks structure that holds all badblock information
@@ -294,6 +314,8 @@
 	bb->changed = 1;
 	if (!acknowledged)
 		bb->unacked_exist = 1;
+	else
+		badblocks_update_acked(bb);
 	write_sequnlock_irqrestore(&bb->lock, flags);
 
 	return rv;
@@ -354,7 +376,8 @@
 		 * current range.  Earlier ranges could also overlap,
 		 * but only this one can overlap the end of the range.
 		 */
-		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
+		    (BB_OFFSET(p[lo]) < target)) {
 			/* Partial overlap, leave the tail of this range */
 			int ack = BB_ACK(p[lo]);
 			sector_t a = BB_OFFSET(p[lo]);
@@ -377,7 +400,8 @@
 			lo--;
 		}
 		while (lo >= 0 &&
-		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
+		       (BB_OFFSET(p[lo]) < target)) {
 			/* This range does overlap */
 			if (BB_OFFSET(p[lo]) < s) {
 				/* Keep the early parts of this range. */
@@ -399,6 +423,7 @@
 		}
 	}
 
+	badblocks_update_acked(bb);
 	bb->changed = 1;
 out:
 	write_sequnlock_irq(&bb->lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6a14b68..3c882cb 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -343,6 +343,34 @@
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
 	/*
+	 * Updating q->in_flight[] here for making this tag usable
+	 * early. Because in blk_queue_start_tag(),
+	 * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
+	 * reserve tags for sync I/O.
+	 *
+	 * More importantly this way can avoid the following I/O
+	 * deadlock:
+	 *
+	 * - suppose there are 40 fua requests comming to flush queue
+	 *   and queue depth is 31
+	 * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
+	 *   tag for async I/O any more
+	 * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
+	 *   and flush_data_end_io() is called
+	 * - the other rqs still can't go ahead if not updating
+	 *   q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
+	 *   are held in flush data queue and make no progress of
+	 *   handling post flush rq
+	 * - only after the post flush rq is handled, all these rqs
+	 *   can be completed
+	 */
+
+	elv_completed_request(q, rq);
+
+	/* for avoiding double accounting */
+	rq->cmd_flags &= ~REQ_STARTED;
+
+	/*
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ddc2eed..f3d27a6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1217,9 +1217,9 @@
 	blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
 	rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
 
-	hctx->queued++;
-	data->hctx = hctx;
-	data->ctx = ctx;
+	data->hctx = alloc_data.hctx;
+	data->ctx = alloc_data.ctx;
+	data->hctx->queued++;
 	return rq;
 }
 
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 2d8466f..05e21b4 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -214,23 +214,26 @@
 
 	ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 
-	if (ctx->more) {
+	if (!result) {
+		err = af_alg_wait_for_completion(
+				crypto_ahash_init(&ctx->req),
+				&ctx->completion);
+		if (err)
+			goto unlock;
+	}
+
+	if (!result || ctx->more) {
 		ctx->more = 0;
 		err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
 						 &ctx->completion);
 		if (err)
 			goto unlock;
-	} else if (!result) {
-		err = af_alg_wait_for_completion(
-				crypto_ahash_digest(&ctx->req),
-				&ctx->completion);
 	}
 
 	err = memcpy_to_msg(msg, ctx->result, len);
 
-	hash_free_result(sk, ctx);
-
 unlock:
+	hash_free_result(sk, ctx);
 	release_sock(sk);
 
 	return err ?: len;
diff --git a/drivers/Makefile b/drivers/Makefile
index f0afdfb..194d20b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -21,7 +21,7 @@
 obj-y				+= idle/
 
 # IPMI must come before ACPI in order to provide IPMI opregion support
-obj-$(CONFIG_IPMI_HANDLER)	+= char/ipmi/
+obj-y				+= char/ipmi/
 
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_SFI)		+= sfi/
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index d58fbf7..7dd7092 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -122,7 +122,7 @@
 	int ret;
 
 	if (!dev_desc) {
-		pdev = acpi_create_platform_device(adev);
+		pdev = acpi_create_platform_device(adev, NULL);
 		return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1;
 	}
 
@@ -139,14 +139,8 @@
 			goto err_out;
 	}
 
-	if (dev_desc->properties) {
-		ret = device_add_properties(&adev->dev, dev_desc->properties);
-		if (ret)
-			goto err_out;
-	}
-
 	adev->driver_data = pdata;
-	pdev = acpi_create_platform_device(adev);
+	pdev = acpi_create_platform_device(adev, dev_desc->properties);
 	if (!IS_ERR_OR_NULL(pdev))
 		return 1;
 
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 5520102..373657f 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -395,7 +395,7 @@
 
 	dev_desc = (const struct lpss_device_desc *)id->driver_data;
 	if (!dev_desc) {
-		pdev = acpi_create_platform_device(adev);
+		pdev = acpi_create_platform_device(adev, NULL);
 		return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1;
 	}
 	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
@@ -451,14 +451,8 @@
 		goto err_out;
 	}
 
-	if (dev_desc->properties) {
-		ret = device_add_properties(&adev->dev, dev_desc->properties);
-		if (ret)
-			goto err_out;
-	}
-
 	adev->driver_data = pdata;
-	pdev = acpi_create_platform_device(adev);
+	pdev = acpi_create_platform_device(adev, dev_desc->properties);
 	if (!IS_ERR_OR_NULL(pdev)) {
 		return 1;
 	}
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b200ae1..b4c1a6a 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -50,6 +50,7 @@
 /**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
+ * @properties: Optional collection of build-in properties.
  *
  * Check if the given @adev can be represented as a platform device and, if
  * that's the case, create and register a platform device, populate its common
@@ -57,7 +58,8 @@
  *
  * Name of the platform device will be the same as @adev's.
  */
-struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
+struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
+					struct property_entry *properties)
 {
 	struct platform_device *pdev = NULL;
 	struct platform_device_info pdevinfo;
@@ -106,6 +108,7 @@
 	pdevinfo.res = resources;
 	pdevinfo.num_res = count;
 	pdevinfo.fwnode = acpi_fwnode_handle(adev);
+	pdevinfo.properties = properties;
 
 	if (acpi_dma_supported(adev))
 		pdevinfo.dma_mask = DMA_BIT_MASK(32);
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index f1e6dcc..54d48b9 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -46,6 +46,7 @@
 #include "acdispat.h"
 #include "acnamesp.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_DISPATCHER
 ACPI_MODULE_NAME("dsinit")
@@ -214,23 +215,17 @@
 
 	/* Walk entire namespace from the supplied root */
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
 	/*
 	 * We don't use acpi_walk_namespace since we do not want to acquire
 	 * the namespace reader lock.
 	 */
 	status =
 	    acpi_ns_walk_namespace(ACPI_TYPE_ANY, start_node, ACPI_UINT32_MAX,
-				   ACPI_NS_WALK_UNLOCK, acpi_ds_init_one_object,
-				   NULL, &info, NULL);
+				   0, acpi_ds_init_one_object, NULL, &info,
+				   NULL);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace"));
 	}
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 
 	status = acpi_get_table_by_index(table_index, &table);
 	if (ACPI_FAILURE(status)) {
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 32e9ddc..2b3210f 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -99,14 +99,11 @@
 			  "Method auto-serialization parse [%4.4s] %p\n",
 			  acpi_ut_get_node_name(node), node));
 
-	acpi_ex_enter_interpreter();
-
 	/* Create/Init a root op for the method parse tree */
 
 	op = acpi_ps_alloc_op(AML_METHOD_OP, obj_desc->method.aml_start);
 	if (!op) {
-		status = AE_NO_MEMORY;
-		goto unlock;
+		return_ACPI_STATUS(AE_NO_MEMORY);
 	}
 
 	acpi_ps_set_name(op, node->name.integer);
@@ -118,8 +115,7 @@
 	    acpi_ds_create_walk_state(node->owner_id, NULL, NULL, NULL);
 	if (!walk_state) {
 		acpi_ps_free_op(op);
-		status = AE_NO_MEMORY;
-		goto unlock;
+		return_ACPI_STATUS(AE_NO_MEMORY);
 	}
 
 	status = acpi_ds_init_aml_walk(walk_state, op, node,
@@ -138,8 +134,6 @@
 	status = acpi_ps_parse_aml(walk_state);
 
 	acpi_ps_delete_parse_tree(op);
-unlock:
-	acpi_ex_exit_interpreter();
 	return_ACPI_STATUS(status);
 }
 
@@ -731,26 +725,6 @@
 		acpi_ds_method_data_delete_all(walk_state);
 
 		/*
-		 * If method is serialized, release the mutex and restore the
-		 * current sync level for this thread
-		 */
-		if (method_desc->method.mutex) {
-
-			/* Acquisition Depth handles recursive calls */
-
-			method_desc->method.mutex->mutex.acquisition_depth--;
-			if (!method_desc->method.mutex->mutex.acquisition_depth) {
-				walk_state->thread->current_sync_level =
-				    method_desc->method.mutex->mutex.
-				    original_sync_level;
-
-				acpi_os_release_mutex(method_desc->method.
-						      mutex->mutex.os_mutex);
-				method_desc->method.mutex->mutex.thread_id = 0;
-			}
-		}
-
-		/*
 		 * Delete any namespace objects created anywhere within the
 		 * namespace by the execution of this method. Unless:
 		 * 1) This method is a module-level executable code method, in which
@@ -786,6 +760,26 @@
 				    ~ACPI_METHOD_MODIFIED_NAMESPACE;
 			}
 		}
+
+		/*
+		 * If method is serialized, release the mutex and restore the
+		 * current sync level for this thread
+		 */
+		if (method_desc->method.mutex) {
+
+			/* Acquisition Depth handles recursive calls */
+
+			method_desc->method.mutex->mutex.acquisition_depth--;
+			if (!method_desc->method.mutex->mutex.acquisition_depth) {
+				walk_state->thread->current_sync_level =
+				    method_desc->method.mutex->mutex.
+				    original_sync_level;
+
+				acpi_os_release_mutex(method_desc->method.
+						      mutex->mutex.os_mutex);
+				method_desc->method.mutex->mutex.thread_id = 0;
+			}
+		}
 	}
 
 	/* Decrement the thread count on the method */
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 028b22a..e362182 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -607,11 +607,9 @@
 				}
 			}
 
-			acpi_ex_exit_interpreter();
 			status =
 			    acpi_ev_initialize_region
 			    (acpi_ns_get_attached_object(node), FALSE);
-			acpi_ex_enter_interpreter();
 
 			if (ACPI_FAILURE(status)) {
 				/*
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 3843f1f..75ddd16 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acevents.h"
 #include "acnamesp.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_EVENTS
 ACPI_MODULE_NAME("evrgnini")
@@ -597,9 +598,11 @@
 					}
 				}
 
+				acpi_ex_exit_interpreter();
 				status =
 				    acpi_ev_execute_reg_method(region_obj,
 							       ACPI_REG_CONNECT);
+				acpi_ex_enter_interpreter();
 
 				if (acpi_ns_locked) {
 					status =
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 334d3c5..d1f2014 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -137,7 +137,9 @@
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 			  "**** Begin Table Object Initialization\n"));
 
+	acpi_ex_enter_interpreter();
 	status = acpi_ds_initialize_objects(table_index, node);
+	acpi_ex_exit_interpreter();
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 			  "**** Completed Table Object Initialization\n"));
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 046c4d0..5fb838e 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -480,19 +480,17 @@
 	u32 i;
 
 	/*
-	 * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which
+	 * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which
 	 * should be zero are indeed zero. This will workaround BIOSs that
 	 * inadvertently place values in these fields.
 	 *
 	 * The ACPI 1.0 reserved fields that will be zeroed are the bytes located
 	 * at offset 45, 55, 95, and the word located at offset 109, 110.
 	 *
-	 * Note: The FADT revision value is unreliable because of BIOS errors.
-	 * The table length is instead used as the final word on the version.
-	 *
-	 * Note: FADT revision 3 is the ACPI 2.0 version of the FADT.
+	 * Note: The FADT revision value is unreliable. Only the length can be
+	 * trusted.
 	 */
-	if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) {
+	if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) {
 		acpi_gbl_FADT.preferred_profile = 0;
 		acpi_gbl_FADT.pstate_control = 0;
 		acpi_gbl_FADT.cst_control = 0;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index f0a029e..0d099a2 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -662,7 +662,7 @@
 	ghes_do_proc(ghes, ghes->estatus);
 out:
 	ghes_clear_estatus(ghes);
-	return 0;
+	return rc;
 }
 
 static void ghes_add_timer(struct ghes *ghes)
diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
index 33505c6..8636409 100644
--- a/drivers/acpi/dptf/int340x_thermal.c
+++ b/drivers/acpi/dptf/int340x_thermal.c
@@ -34,11 +34,11 @@
 					const struct acpi_device_id *id)
 {
 	if (IS_ENABLED(CONFIG_INT340X_THERMAL))
-		acpi_create_platform_device(adev);
+		acpi_create_platform_device(adev, NULL);
 	/* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */
 	else if (IS_ENABLED(CONFIG_INTEL_SOC_DTS_THERMAL) &&
 		 id->driver_data == INT3401_DEVICE)
-		acpi_create_platform_device(adev);
+		acpi_create_platform_device(adev, NULL);
 	return 1;
 }
 
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index c983bf7..bc3d914 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -87,6 +87,7 @@
 
 static LIST_HEAD(acpi_link_list);
 static DEFINE_MUTEX(acpi_link_lock);
+static int sci_irq = -1, sci_penalty;
 
 /* --------------------------------------------------------------------------
                             PCI Link Device Management
@@ -496,25 +497,13 @@
 {
 	int penalty = 0;
 
-	/*
-	* Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
-	* with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
-	* use for PCI IRQs.
-	*/
-	if (irq == acpi_gbl_FADT.sci_interrupt) {
-		u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK;
-
-		if (type != IRQ_TYPE_LEVEL_LOW)
-			penalty += PIRQ_PENALTY_ISA_ALWAYS;
-		else
-			penalty += PIRQ_PENALTY_PCI_USING;
-	}
+	if (irq == sci_irq)
+		penalty += sci_penalty;
 
 	if (irq < ACPI_MAX_ISA_IRQS)
 		return penalty + acpi_isa_irq_penalty[irq];
 
-	penalty += acpi_irq_pci_sharing_penalty(irq);
-	return penalty;
+	return penalty + acpi_irq_pci_sharing_penalty(irq);
 }
 
 int __init acpi_irq_penalty_init(void)
@@ -619,6 +608,10 @@
 			    acpi_device_bid(link->device));
 		return -ENODEV;
 	} else {
+		if (link->irq.active < ACPI_MAX_ISA_IRQS)
+			acpi_isa_irq_penalty[link->irq.active] +=
+				PIRQ_PENALTY_PCI_USING;
+
 		printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
 		       acpi_device_name(link->device),
 		       acpi_device_bid(link->device), link->irq.active);
@@ -849,7 +842,7 @@
 			continue;
 
 		if (used)
-			new_penalty = acpi_irq_get_penalty(irq) +
+			new_penalty = acpi_isa_irq_penalty[irq] +
 					PIRQ_PENALTY_ISA_USED;
 		else
 			new_penalty = 0;
@@ -871,7 +864,7 @@
 void acpi_penalize_isa_irq(int irq, int active)
 {
 	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
-		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
+		acpi_isa_irq_penalty[irq] +=
 		  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
@@ -881,6 +874,17 @@
 		    acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
+void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
+{
+	sci_irq = irq;
+
+	if (trigger == ACPI_MADT_TRIGGER_LEVEL &&
+	    polarity == ACPI_MADT_POLARITY_ACTIVE_LOW)
+		sci_penalty = PIRQ_PENALTY_PCI_USING;
+	else
+		sci_penalty = PIRQ_PENALTY_ISA_ALWAYS;
+}
+
 /*
  * Over-ride default table to reserve additional IRQs for use by ISA
  * e.g. acpi_irq_isa=5
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 035ac64..3d1856f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1734,7 +1734,7 @@
 			       &is_spi_i2c_slave);
 	acpi_dev_free_resource_list(&resource_list);
 	if (!is_spi_i2c_slave) {
-		acpi_create_platform_device(device);
+		acpi_create_platform_device(device, NULL);
 		acpi_device_set_enumerated(device);
 	} else {
 		blocking_notifier_call_chain(&acpi_reconfig_chain,
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 562af94..3c71b98 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1002,7 +1002,7 @@
 
 
 static struct binder_ref *binder_get_ref(struct binder_proc *proc,
-					 uint32_t desc)
+					 u32 desc, bool need_strong_ref)
 {
 	struct rb_node *n = proc->refs_by_desc.rb_node;
 	struct binder_ref *ref;
@@ -1010,12 +1010,16 @@
 	while (n) {
 		ref = rb_entry(n, struct binder_ref, rb_node_desc);
 
-		if (desc < ref->desc)
+		if (desc < ref->desc) {
 			n = n->rb_left;
-		else if (desc > ref->desc)
+		} else if (desc > ref->desc) {
 			n = n->rb_right;
-		else
+		} else if (need_strong_ref && !ref->strong) {
+			binder_user_error("tried to use weak ref as strong ref\n");
+			return NULL;
+		} else {
 			return ref;
+		}
 	}
 	return NULL;
 }
@@ -1285,7 +1289,10 @@
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
-			struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+			struct binder_ref *ref;
+
+			ref = binder_get_ref(proc, fp->handle,
+					     fp->type == BINDER_TYPE_HANDLE);
 
 			if (ref == NULL) {
 				pr_err("transaction release %d bad handle %d\n",
@@ -1380,7 +1387,7 @@
 		if (tr->target.handle) {
 			struct binder_ref *ref;
 
-			ref = binder_get_ref(proc, tr->target.handle);
+			ref = binder_get_ref(proc, tr->target.handle, true);
 			if (ref == NULL) {
 				binder_user_error("%d:%d got transaction to invalid handle\n",
 					proc->pid, thread->pid);
@@ -1577,7 +1584,9 @@
 				fp->type = BINDER_TYPE_HANDLE;
 			else
 				fp->type = BINDER_TYPE_WEAK_HANDLE;
+			fp->binder = 0;
 			fp->handle = ref->desc;
+			fp->cookie = 0;
 			binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE,
 				       &thread->todo);
 
@@ -1589,7 +1598,10 @@
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
-			struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+			struct binder_ref *ref;
+
+			ref = binder_get_ref(proc, fp->handle,
+					     fp->type == BINDER_TYPE_HANDLE);
 
 			if (ref == NULL) {
 				binder_user_error("%d:%d got transaction with invalid handle, %d\n",
@@ -1624,7 +1636,9 @@
 					return_error = BR_FAILED_REPLY;
 					goto err_binder_get_ref_for_node_failed;
 				}
+				fp->binder = 0;
 				fp->handle = new_ref->desc;
+				fp->cookie = 0;
 				binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL);
 				trace_binder_transaction_ref_to_ref(t, ref,
 								    new_ref);
@@ -1678,6 +1692,7 @@
 			binder_debug(BINDER_DEBUG_TRANSACTION,
 				     "        fd %d -> %d\n", fp->handle, target_fd);
 			/* TODO: fput? */
+			fp->binder = 0;
 			fp->handle = target_fd;
 		} break;
 
@@ -1800,7 +1815,9 @@
 						ref->desc);
 				}
 			} else
-				ref = binder_get_ref(proc, target);
+				ref = binder_get_ref(proc, target,
+						     cmd == BC_ACQUIRE ||
+						     cmd == BC_RELEASE);
 			if (ref == NULL) {
 				binder_user_error("%d:%d refcount change on invalid ref %d\n",
 					proc->pid, thread->pid, target);
@@ -1996,7 +2013,7 @@
 			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
-			ref = binder_get_ref(proc, target);
+			ref = binder_get_ref(proc, target, false);
 			if (ref == NULL) {
 				binder_user_error("%d:%d %s invalid ref %d\n",
 					proc->pid, thread->pid,
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ba5f11c..9669fc7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1418,31 +1418,34 @@
 	 * Message mode could be enforced. In this case assume that advantage
 	 * of multipe MSIs is negated and use single MSI mode instead.
 	 */
-	nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX,
-			PCI_IRQ_MSIX | PCI_IRQ_MSI);
-	if (nvec > 0) {
-		if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) {
-			hpriv->get_irq_vector = ahci_get_irq_vector;
-			hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
-			return nvec;
+	if (n_ports > 1) {
+		nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX,
+				PCI_IRQ_MSIX | PCI_IRQ_MSI);
+		if (nvec > 0) {
+			if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) {
+				hpriv->get_irq_vector = ahci_get_irq_vector;
+				hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
+				return nvec;
+			}
+
+			/*
+			 * Fallback to single MSI mode if the controller
+			 * enforced MRSM mode.
+			 */
+			printk(KERN_INFO
+				"ahci: MRSM is on, fallback to single MSI\n");
+			pci_free_irq_vectors(pdev);
 		}
 
 		/*
-		 * Fallback to single MSI mode if the controller enforced MRSM
-		 * mode.
+		 * -ENOSPC indicated we don't have enough vectors.  Don't bother
+		 * trying a single vectors for any other error:
 		 */
-		printk(KERN_INFO "ahci: MRSM is on, fallback to single MSI\n");
-		pci_free_irq_vectors(pdev);
+		if (nvec < 0 && nvec != -ENOSPC)
+			return nvec;
 	}
 
 	/*
-	 * -ENOSPC indicated we don't have enough vectors.  Don't bother trying
-	 * a single vectors for any other error:
-	 */
-	if (nvec < 0 && nvec != -ENOSPC)
-		return nvec;
-
-	/*
 	 * If the host is not capable of supporting per-port vectors, fall
 	 * back to single MSI before finally attempting single MSI-X.
 	 */
@@ -1617,7 +1620,7 @@
 		/* legacy intx interrupts */
 		pci_intx(pdev, 1);
 	}
-	hpriv->irq = pdev->irq;
+	hpriv->irq = pci_irq_vector(pdev, 0);
 
 	if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss)
 		host->flags |= ATA_HOST_PARALLEL_SCAN;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fdf44ca..d02e7c0 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -213,14 +213,16 @@
 	  If you are unsure about this, Say N here.
 
 config DEBUG_TEST_DRIVER_REMOVE
-	bool "Test driver remove calls during probe"
+	bool "Test driver remove calls during probe (UNSTABLE)"
 	depends on DEBUG_KERNEL
 	help
 	  Say Y here if you want the Driver core to test driver remove functions
 	  by calling probe, remove, probe. This tests the remove path without
 	  having to unbind the driver or unload the driver module.
 
-	  If you are unsure about this, say N here.
+	  This option is expected to find errors and may render your system
+	  unusable. You should say N here unless you are explicitly looking to
+	  test this functionality.
 
 config SYS_HYPERVISOR
 	bool
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d22a726..d76cd97 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -324,7 +324,8 @@
 {
 	int ret = -EPROBE_DEFER;
 	int local_trigger_count = atomic_read(&deferred_trigger_count);
-	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE);
+	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
+			   !drv->suppress_bind_attrs;
 
 	if (defer_all_probes) {
 		/*
@@ -383,7 +384,7 @@
 	if (test_remove) {
 		test_remove = false;
 
-		if (dev->bus && dev->bus->remove)
+		if (dev->bus->remove)
 			dev->bus->remove(dev);
 		else if (drv->remove)
 			drv->remove(dev);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e44944f..2932a5b 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1027,6 +1027,8 @@
 	TRACE_DEVICE(dev);
 	TRACE_SUSPEND(0);
 
+	dpm_wait_for_children(dev, async);
+
 	if (async_error)
 		goto Complete;
 
@@ -1038,8 +1040,6 @@
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
-	dpm_wait_for_children(dev, async);
-
 	if (dev->pm_domain) {
 		info = "noirq power domain ";
 		callback = pm_noirq_op(&dev->pm_domain->ops, state);
@@ -1174,6 +1174,8 @@
 
 	__pm_runtime_disable(dev, false);
 
+	dpm_wait_for_children(dev, async);
+
 	if (async_error)
 		goto Complete;
 
@@ -1185,8 +1187,6 @@
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
-	dpm_wait_for_children(dev, async);
-
 	if (dev->pm_domain) {
 		info = "late power domain ";
 		callback = pm_late_early_op(&dev->pm_domain->ops, state);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 811e11c..0809cda 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2954,7 +2954,7 @@
 	case DAC960_PD_Controller:
 	  if (!request_region(Controller->IO_Address, 0x80,
 			      Controller->FullModelName)) {
-		DAC960_Error("IO port 0x%d busy for Controller at\n",
+		DAC960_Error("IO port 0x%lx busy for Controller at\n",
 			     Controller, Controller->IO_Address);
 		goto Failure;
 	  }
@@ -2990,7 +2990,7 @@
 	case DAC960_P_Controller:
 	  if (!request_region(Controller->IO_Address, 0x80,
 			      Controller->FullModelName)){
-		DAC960_Error("IO port 0x%d busy for Controller at\n",
+		DAC960_Error("IO port 0x%lx busy for Controller at\n",
 		   	     Controller, Controller->IO_Address);
 		goto Failure;
 	  }
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index ab19adb..3c606c0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -853,45 +853,6 @@
 	return n;
 }
 
-/* This can be removed if we are certain that no users of the block
- * layer will ever use zero-count pages in bios.  Otherwise we have to
- * protect against the put_page sometimes done by the network layer.
- *
- * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for
- * discussion.
- *
- * We cannot use get_page in the workaround, because it insists on a
- * positive page count as a precondition.  So we use _refcount directly.
- */
-static void
-bio_pageinc(struct bio *bio)
-{
-	struct bio_vec bv;
-	struct page *page;
-	struct bvec_iter iter;
-
-	bio_for_each_segment(bv, bio, iter) {
-		/* Non-zero page count for non-head members of
-		 * compound pages is no longer allowed by the kernel.
-		 */
-		page = compound_head(bv.bv_page);
-		page_ref_inc(page);
-	}
-}
-
-static void
-bio_pagedec(struct bio *bio)
-{
-	struct page *page;
-	struct bio_vec bv;
-	struct bvec_iter iter;
-
-	bio_for_each_segment(bv, bio, iter) {
-		page = compound_head(bv.bv_page);
-		page_ref_dec(page);
-	}
-}
-
 static void
 bufinit(struct buf *buf, struct request *rq, struct bio *bio)
 {
@@ -899,7 +860,6 @@
 	buf->rq = rq;
 	buf->bio = bio;
 	buf->iter = bio->bi_iter;
-	bio_pageinc(bio);
 }
 
 static struct buf *
@@ -1127,7 +1087,6 @@
 	if (buf == d->ip.buf)
 		d->ip.buf = NULL;
 	rq = buf->rq;
-	bio_pagedec(buf->bio);
 	mempool_free(buf, d->bufpool);
 	n = (unsigned long) rq->special;
 	rq->special = (void *) --n;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 100be55..8348272 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1871,7 +1871,7 @@
 		drbd_update_congested(connection);
 	}
 	do {
-		rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
+		rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
 		if (rv == -EAGAIN) {
 			if (we_should_drop_the_connection(connection, sock))
 				break;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ba405b5..7a10487 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -164,7 +164,7 @@
 	spin_lock(&nbd->sock_lock);
 
 	if (!nbd->sock) {
-		spin_unlock_irq(&nbd->sock_lock);
+		spin_unlock(&nbd->sock_lock);
 		return;
 	}
 
@@ -599,7 +599,7 @@
 			return -EINVAL;
 
 		sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
-		if (!sreq)
+		if (IS_ERR(sreq))
 			return -ENOMEM;
 
 		mutex_unlock(&nbd->tx_lock);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index abb7162..7b274ff 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -415,15 +415,15 @@
 };
 
 /*
- * Flag bits for rbd_dev->flags.  If atomicity is required,
- * rbd_dev->lock is used to protect access.
- *
- * Currently, only the "removing" flag (which is coupled with the
- * "open_count" field) requires atomic access.
+ * Flag bits for rbd_dev->flags:
+ * - REMOVING (which is coupled with rbd_dev->open_count) is protected
+ *   by rbd_dev->lock
+ * - BLACKLISTED is protected by rbd_dev->lock_rwsem
  */
 enum rbd_dev_flags {
 	RBD_DEV_FLAG_EXISTS,	/* mapped snapshot has not been deleted */
 	RBD_DEV_FLAG_REMOVING,	/* this mapping is being removed */
+	RBD_DEV_FLAG_BLACKLISTED, /* our ceph_client is blacklisted */
 };
 
 static DEFINE_MUTEX(client_mutex);	/* Serialize client creation */
@@ -3926,6 +3926,7 @@
 	struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
 					    struct rbd_device, watch_dwork);
 	bool was_lock_owner = false;
+	bool need_to_wake = false;
 	int ret;
 
 	dout("%s rbd_dev %p\n", __func__, rbd_dev);
@@ -3935,19 +3936,27 @@
 		was_lock_owner = rbd_release_lock(rbd_dev);
 
 	mutex_lock(&rbd_dev->watch_mutex);
-	if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR)
-		goto fail_unlock;
+	if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
+		mutex_unlock(&rbd_dev->watch_mutex);
+		goto out;
+	}
 
 	ret = __rbd_register_watch(rbd_dev);
 	if (ret) {
 		rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
-		if (ret != -EBLACKLISTED)
+		if (ret == -EBLACKLISTED || ret == -ENOENT) {
+			set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
+			need_to_wake = true;
+		} else {
 			queue_delayed_work(rbd_dev->task_wq,
 					   &rbd_dev->watch_dwork,
 					   RBD_RETRY_DELAY);
-		goto fail_unlock;
+		}
+		mutex_unlock(&rbd_dev->watch_mutex);
+		goto out;
 	}
 
+	need_to_wake = true;
 	rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
 	rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
 	mutex_unlock(&rbd_dev->watch_mutex);
@@ -3963,13 +3972,10 @@
 				 ret);
 	}
 
+out:
 	up_write(&rbd_dev->lock_rwsem);
-	wake_requests(rbd_dev, true);
-	return;
-
-fail_unlock:
-	mutex_unlock(&rbd_dev->watch_mutex);
-	up_write(&rbd_dev->lock_rwsem);
+	if (need_to_wake)
+		wake_requests(rbd_dev, true);
 }
 
 /*
@@ -4074,7 +4080,9 @@
 		up_read(&rbd_dev->lock_rwsem);
 		schedule();
 		down_read(&rbd_dev->lock_rwsem);
-	} while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+	} while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
+		 !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
+
 	finish_wait(&rbd_dev->lock_waitq, &wait);
 }
 
@@ -4166,8 +4174,16 @@
 
 	if (must_be_locked) {
 		down_read(&rbd_dev->lock_rwsem);
-		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
+		if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
+		    !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
 			rbd_wait_state_locked(rbd_dev);
+
+		WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
+			!test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
+		if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+			result = -EBLACKLISTED;
+			goto err_unlock;
+		}
 	}
 
 	img_request = rbd_img_request_create(rbd_dev, offset, length, op_type,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2dc5c96..5545a67 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -376,7 +376,7 @@
 
 static int init_vq(struct virtio_blk *vblk)
 {
-	int err = 0;
+	int err;
 	int i;
 	vq_callback_t **callbacks;
 	const char **names;
@@ -390,13 +390,13 @@
 	if (err)
 		num_vqs = 1;
 
-	vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
+	vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
 	if (!vblk->vqs)
 		return -ENOMEM;
 
-	names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-	callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-	vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+	names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
+	callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
+	vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
 	if (!names || !callbacks || !vqs) {
 		err = -ENOMEM;
 		goto out;
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index ef51c9c..b6bb58c 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -310,7 +310,7 @@
 	BT_DBG("HCI device registered (hdev %p)", hdev);
 
 	dev_set_drvdata(&pdev->dev, hst);
-	return err;
+	return 0;
 }
 
 static int bt_ti_remove(struct platform_device *pdev)
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 5ccb90e..8f6c23c 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -643,6 +643,14 @@
 		},
 		.driver_data = &acpi_active_low,
 	},
+	{	/* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
+		.ident = "Lenovo ThinkPad 8",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
+		},
+		.driver_data = &acpi_active_low,
+	},
 	{ }
 };
 
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 7010dca..7875105 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -111,6 +111,7 @@
 config QCOM_EBI2
 	bool "Qualcomm External Bus Interface 2 (EBI2)"
 	depends on HAS_IOMEM
+	depends on ARCH_QCOM || COMPILE_TEST
 	help
 	  Say y here to enable support for the Qualcomm External Bus
 	  Interface 2, which can be used to connect things like NAND Flash,
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 4827945..d2d2c89 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -84,14 +84,14 @@
 
 static void add_early_randomness(struct hwrng *rng)
 {
-	unsigned char bytes[16];
 	int bytes_read;
+	size_t size = min_t(size_t, 16, rng_buffer_size());
 
 	mutex_lock(&reading_mutex);
-	bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+	bytes_read = rng_get_data(rng, rng_buffer, size, 1);
 	mutex_unlock(&reading_mutex);
 	if (bytes_read > 0)
-		add_device_randomness(bytes, bytes_read);
+		add_device_randomness(rng_buffer, bytes_read);
 }
 
 static inline void cleanup_rng(struct kref *kref)
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 5a9350b..7f81665 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -76,3 +76,11 @@
 	 the IPMI management controller is capable of this.
 
 endif # IPMI_HANDLER
+
+config ASPEED_BT_IPMI_BMC
+	depends on ARCH_ASPEED
+	tristate "BT IPMI bmc driver"
+	help
+	  Provides a driver for the BT (Block Transfer) IPMI interface
+	  found on Aspeed SOCs (AST2400 and AST2500). The driver
+	  implements the BMC side of the BT interface.
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index f3ffde1..0d98cd9 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -11,3 +11,4 @@
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
+obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
new file mode 100644
index 0000000..fc9e889
--- /dev/null
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2015-2016, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bt-bmc.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+/*
+ * This is a BMC device used to communicate to the host
+ */
+#define DEVICE_NAME	"ipmi-bt-host"
+
+#define BT_IO_BASE	0xe4
+#define BT_IRQ		10
+
+#define BT_CR0		0x0
+#define   BT_CR0_IO_BASE		16
+#define   BT_CR0_IRQ			12
+#define   BT_CR0_EN_CLR_SLV_RDP		0x8
+#define   BT_CR0_EN_CLR_SLV_WRP		0x4
+#define   BT_CR0_ENABLE_IBT		0x1
+#define BT_CR1		0x4
+#define   BT_CR1_IRQ_H2B	0x01
+#define   BT_CR1_IRQ_HBUSY	0x40
+#define BT_CR2		0x8
+#define   BT_CR2_IRQ_H2B	0x01
+#define   BT_CR2_IRQ_HBUSY	0x40
+#define BT_CR3		0xc
+#define BT_CTRL		0x10
+#define   BT_CTRL_B_BUSY		0x80
+#define   BT_CTRL_H_BUSY		0x40
+#define   BT_CTRL_OEM0			0x20
+#define   BT_CTRL_SMS_ATN		0x10
+#define   BT_CTRL_B2H_ATN		0x08
+#define   BT_CTRL_H2B_ATN		0x04
+#define   BT_CTRL_CLR_RD_PTR		0x02
+#define   BT_CTRL_CLR_WR_PTR		0x01
+#define BT_BMC2HOST	0x14
+#define BT_INTMASK	0x18
+#define   BT_INTMASK_B2H_IRQEN		0x01
+#define   BT_INTMASK_B2H_IRQ		0x02
+#define   BT_INTMASK_BMC_HWRST		0x80
+
+#define BT_BMC_BUFFER_SIZE 256
+
+struct bt_bmc {
+	struct device		dev;
+	struct miscdevice	miscdev;
+	void __iomem		*base;
+	int			irq;
+	wait_queue_head_t	queue;
+	struct timer_list	poll_timer;
+	struct mutex		mutex;
+};
+
+static atomic_t open_count = ATOMIC_INIT(0);
+
+static u8 bt_inb(struct bt_bmc *bt_bmc, int reg)
+{
+	return ioread8(bt_bmc->base + reg);
+}
+
+static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg)
+{
+	iowrite8(data, bt_bmc->base + reg);
+}
+
+static void clr_rd_ptr(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_CLR_RD_PTR, BT_CTRL);
+}
+
+static void clr_wr_ptr(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_CLR_WR_PTR, BT_CTRL);
+}
+
+static void clr_h2b_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_H2B_ATN, BT_CTRL);
+}
+
+static void set_b_busy(struct bt_bmc *bt_bmc)
+{
+	if (!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY))
+		bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void clr_b_busy(struct bt_bmc *bt_bmc)
+{
+	if (bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY)
+		bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void set_b2h_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_B2H_ATN, BT_CTRL);
+}
+
+static u8 bt_read(struct bt_bmc *bt_bmc)
+{
+	return bt_inb(bt_bmc, BT_BMC2HOST);
+}
+
+static ssize_t bt_readn(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		buf[i] = bt_read(bt_bmc);
+	return n;
+}
+
+static void bt_write(struct bt_bmc *bt_bmc, u8 c)
+{
+	bt_outb(bt_bmc, c, BT_BMC2HOST);
+}
+
+static ssize_t bt_writen(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		bt_write(bt_bmc, buf[i]);
+	return n;
+}
+
+static void set_sms_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_SMS_ATN, BT_CTRL);
+}
+
+static struct bt_bmc *file_bt_bmc(struct file *file)
+{
+	return container_of(file->private_data, struct bt_bmc, miscdev);
+}
+
+static int bt_bmc_open(struct inode *inode, struct file *file)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	if (atomic_inc_return(&open_count) == 1) {
+		clr_b_busy(bt_bmc);
+		return 0;
+	}
+
+	atomic_dec(&open_count);
+	return -EBUSY;
+}
+
+/*
+ * The BT (Block Transfer) interface means that entire messages are
+ * buffered by the host before a notification is sent to the BMC that
+ * there is data to be read. The first byte is the length and the
+ * message data follows. The read operation just tries to capture the
+ * whole before returning it to userspace.
+ *
+ * BT Message format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5:N
+ *    Length  NetFn/LUN  Seq     Cmd     Data
+ *
+ */
+static ssize_t bt_bmc_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	u8 len;
+	int len_byte = 1;
+	u8 kbuffer[BT_BMC_BUFFER_SIZE];
+	ssize_t ret = 0;
+	ssize_t nread;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	WARN_ON(*ppos);
+
+	if (wait_event_interruptible(bt_bmc->queue,
+				     bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))
+		return -ERESTARTSYS;
+
+	mutex_lock(&bt_bmc->mutex);
+
+	if (unlikely(!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))) {
+		ret = -EIO;
+		goto out_unlock;
+	}
+
+	set_b_busy(bt_bmc);
+	clr_h2b_atn(bt_bmc);
+	clr_rd_ptr(bt_bmc);
+
+	/*
+	 * The BT frames start with the message length, which does not
+	 * include the length byte.
+	 */
+	kbuffer[0] = bt_read(bt_bmc);
+	len = kbuffer[0];
+
+	/* We pass the length back to userspace as well */
+	if (len + 1 > count)
+		len = count - 1;
+
+	while (len) {
+		nread = min_t(ssize_t, len, sizeof(kbuffer) - len_byte);
+
+		bt_readn(bt_bmc, kbuffer + len_byte, nread);
+
+		if (copy_to_user(buf, kbuffer, nread + len_byte)) {
+			ret = -EFAULT;
+			break;
+		}
+		len -= nread;
+		buf += nread + len_byte;
+		ret += nread + len_byte;
+		len_byte = 0;
+	}
+
+	clr_b_busy(bt_bmc);
+
+out_unlock:
+	mutex_unlock(&bt_bmc->mutex);
+	return ret;
+}
+
+/*
+ * BT Message response format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5  Byte 6:N
+ *    Length  NetFn/LUN  Seq     Cmd     Code    Data
+ */
+static ssize_t bt_bmc_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	u8 kbuffer[BT_BMC_BUFFER_SIZE];
+	ssize_t ret = 0;
+	ssize_t nwritten;
+
+	/*
+	 * send a minimum response size
+	 */
+	if (count < 5)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, buf, count))
+		return -EFAULT;
+
+	WARN_ON(*ppos);
+
+	/*
+	 * There's no interrupt for clearing bmc busy so we have to
+	 * poll
+	 */
+	if (wait_event_interruptible(bt_bmc->queue,
+				     !(bt_inb(bt_bmc, BT_CTRL) &
+				       (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))))
+		return -ERESTARTSYS;
+
+	mutex_lock(&bt_bmc->mutex);
+
+	if (unlikely(bt_inb(bt_bmc, BT_CTRL) &
+		     (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))) {
+		ret = -EIO;
+		goto out_unlock;
+	}
+
+	clr_wr_ptr(bt_bmc);
+
+	while (count) {
+		nwritten = min_t(ssize_t, count, sizeof(kbuffer));
+		if (copy_from_user(&kbuffer, buf, nwritten)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		bt_writen(bt_bmc, kbuffer, nwritten);
+
+		count -= nwritten;
+		buf += nwritten;
+		ret += nwritten;
+	}
+
+	set_b2h_atn(bt_bmc);
+
+out_unlock:
+	mutex_unlock(&bt_bmc->mutex);
+	return ret;
+}
+
+static long bt_bmc_ioctl(struct file *file, unsigned int cmd,
+			 unsigned long param)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	switch (cmd) {
+	case BT_BMC_IOCTL_SMS_ATN:
+		set_sms_atn(bt_bmc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int bt_bmc_release(struct inode *inode, struct file *file)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	atomic_dec(&open_count);
+	set_b_busy(bt_bmc);
+	return 0;
+}
+
+static unsigned int bt_bmc_poll(struct file *file, poll_table *wait)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	unsigned int mask = 0;
+	u8 ctrl;
+
+	poll_wait(file, &bt_bmc->queue, wait);
+
+	ctrl = bt_inb(bt_bmc, BT_CTRL);
+
+	if (ctrl & BT_CTRL_H2B_ATN)
+		mask |= POLLIN;
+
+	if (!(ctrl & (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN)))
+		mask |= POLLOUT;
+
+	return mask;
+}
+
+static const struct file_operations bt_bmc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= bt_bmc_open,
+	.read		= bt_bmc_read,
+	.write		= bt_bmc_write,
+	.release	= bt_bmc_release,
+	.poll		= bt_bmc_poll,
+	.unlocked_ioctl	= bt_bmc_ioctl,
+};
+
+static void poll_timer(unsigned long data)
+{
+	struct bt_bmc *bt_bmc = (void *)data;
+
+	bt_bmc->poll_timer.expires += msecs_to_jiffies(500);
+	wake_up(&bt_bmc->queue);
+	add_timer(&bt_bmc->poll_timer);
+}
+
+static irqreturn_t bt_bmc_irq(int irq, void *arg)
+{
+	struct bt_bmc *bt_bmc = arg;
+	u32 reg;
+
+	reg = ioread32(bt_bmc->base + BT_CR2);
+	reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY;
+	if (!reg)
+		return IRQ_NONE;
+
+	/* ack pending IRQs */
+	iowrite32(reg, bt_bmc->base + BT_CR2);
+
+	wake_up(&bt_bmc->queue);
+	return IRQ_HANDLED;
+}
+
+static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
+			     struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	u32 reg;
+	int rc;
+
+	bt_bmc->irq = platform_get_irq(pdev, 0);
+	if (!bt_bmc->irq)
+		return -ENODEV;
+
+	rc = devm_request_irq(dev, bt_bmc->irq, bt_bmc_irq, IRQF_SHARED,
+			      DEVICE_NAME, bt_bmc);
+	if (rc < 0) {
+		dev_warn(dev, "Unable to request IRQ %d\n", bt_bmc->irq);
+		bt_bmc->irq = 0;
+		return rc;
+	}
+
+	/*
+	 * Configure IRQs on the bmc clearing the H2B and HBUSY bits;
+	 * H2B will be asserted when the bmc has data for us; HBUSY
+	 * will be cleared (along with B2H) when we can write the next
+	 * message to the BT buffer
+	 */
+	reg = ioread32(bt_bmc->base + BT_CR1);
+	reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY;
+	iowrite32(reg, bt_bmc->base + BT_CR1);
+
+	return 0;
+}
+
+static int bt_bmc_probe(struct platform_device *pdev)
+{
+	struct bt_bmc *bt_bmc;
+	struct device *dev;
+	struct resource *res;
+	int rc;
+
+	if (!pdev || !pdev->dev.of_node)
+		return -ENODEV;
+
+	dev = &pdev->dev;
+	dev_info(dev, "Found bt bmc device\n");
+
+	bt_bmc = devm_kzalloc(dev, sizeof(*bt_bmc), GFP_KERNEL);
+	if (!bt_bmc)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, bt_bmc);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	bt_bmc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(bt_bmc->base))
+		return PTR_ERR(bt_bmc->base);
+
+	mutex_init(&bt_bmc->mutex);
+	init_waitqueue_head(&bt_bmc->queue);
+
+	bt_bmc->miscdev.minor	= MISC_DYNAMIC_MINOR,
+		bt_bmc->miscdev.name	= DEVICE_NAME,
+		bt_bmc->miscdev.fops	= &bt_bmc_fops,
+		bt_bmc->miscdev.parent = dev;
+	rc = misc_register(&bt_bmc->miscdev);
+	if (rc) {
+		dev_err(dev, "Unable to register misc device\n");
+		return rc;
+	}
+
+	bt_bmc_config_irq(bt_bmc, pdev);
+
+	if (bt_bmc->irq) {
+		dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
+	} else {
+		dev_info(dev, "No IRQ; using timer\n");
+		setup_timer(&bt_bmc->poll_timer, poll_timer,
+			    (unsigned long)bt_bmc);
+		bt_bmc->poll_timer.expires = jiffies + msecs_to_jiffies(10);
+		add_timer(&bt_bmc->poll_timer);
+	}
+
+	iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) |
+		  (BT_IRQ << BT_CR0_IRQ) |
+		  BT_CR0_EN_CLR_SLV_RDP |
+		  BT_CR0_EN_CLR_SLV_WRP |
+		  BT_CR0_ENABLE_IBT,
+		  bt_bmc->base + BT_CR0);
+
+	clr_b_busy(bt_bmc);
+
+	return 0;
+}
+
+static int bt_bmc_remove(struct platform_device *pdev)
+{
+	struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&bt_bmc->miscdev);
+	if (!bt_bmc->irq)
+		del_timer_sync(&bt_bmc->poll_timer);
+	return 0;
+}
+
+static const struct of_device_id bt_bmc_match[] = {
+	{ .compatible = "aspeed,ast2400-ibt-bmc" },
+	{ },
+};
+
+static struct platform_driver bt_bmc_driver = {
+	.driver = {
+		.name		= DEVICE_NAME,
+		.of_match_table = bt_bmc_match,
+	},
+	.probe = bt_bmc_probe,
+	.remove = bt_bmc_remove,
+};
+
+module_platform_driver(bt_bmc_driver);
+
+MODULE_DEVICE_TABLE(of, bt_bmc_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alistair Popple <alistair@popple.id.au>");
+MODULE_DESCRIPTION("Linux device interface to the IPMI BT interface");
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index d8619998..fcdd886 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2891,11 +2891,11 @@
 		intf->curr_channel = IPMI_MAX_CHANNELS;
 	}
 
+	rv = ipmi_bmc_register(intf, i);
+
 	if (rv == 0)
 		rv = add_proc_entries(intf, i);
 
-	rv = ipmi_bmc_register(intf, i);
-
  out:
 	if (rv) {
 		if (intf->proc_dir)
@@ -2982,8 +2982,6 @@
 	int intf_num = intf->intf_num;
 	ipmi_user_t user;
 
-	ipmi_bmc_unregister(intf);
-
 	mutex_lock(&smi_watchers_mutex);
 	mutex_lock(&ipmi_interfaces_mutex);
 	intf->intf_num = -1;
@@ -3007,6 +3005,7 @@
 	mutex_unlock(&ipmi_interfaces_mutex);
 
 	remove_proc_entries(intf);
+	ipmi_bmc_unregister(intf);
 
 	/*
 	 * Call all the watcher interfaces to tell them that
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index d233688..6af1ce0 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -748,10 +748,7 @@
 	}
 
 	if (pp->pdev) {
-		const char *name = pp->pdev->name;
-
 		parport_unregister_device(pp->pdev);
-		kfree(name);
 		pp->pdev = NULL;
 		pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
 	}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 8de6187..3a9149c 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -813,9 +813,6 @@
 			continue;
 		}
 
-		if (rc < TPM_HEADER_SIZE)
-			return -EFAULT;
-
 		if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
 			dev_info(&chip->dev,
 				 "TPM is disabled/deactivated (0x%X)\n", rc);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index d433b1d..5649234 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1539,19 +1539,29 @@
 	spin_lock_irq(&port->inbuf_lock);
 	/* Remove unused data this port might have received. */
 	discard_port_data(port);
+	spin_unlock_irq(&port->inbuf_lock);
 
 	/* Remove buffers we queued up for the Host to send us data in. */
-	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf, true);
-	spin_unlock_irq(&port->inbuf_lock);
+	do {
+		spin_lock_irq(&port->inbuf_lock);
+		buf = virtqueue_detach_unused_buf(port->in_vq);
+		spin_unlock_irq(&port->inbuf_lock);
+		if (buf)
+			free_buf(buf, true);
+	} while (buf);
 
 	spin_lock_irq(&port->outvq_lock);
 	reclaim_consumed_buffers(port);
+	spin_unlock_irq(&port->outvq_lock);
 
 	/* Free pending buffers from the out-queue. */
-	while ((buf = virtqueue_detach_unused_buf(port->out_vq)))
-		free_buf(buf, true);
-	spin_unlock_irq(&port->outvq_lock);
+	do {
+		spin_lock_irq(&port->outvq_lock);
+		buf = virtqueue_detach_unused_buf(port->out_vq);
+		spin_unlock_irq(&port->outvq_lock);
+		if (buf)
+			free_buf(buf, true);
+	} while (buf);
 }
 
 /*
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 190122e64..85a449c 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -203,7 +203,7 @@
 	ret = clk_hw_register(NULL, &prog->hw);
 	if (ret) {
 		kfree(prog);
-		hw = &prog->hw;
+		hw = ERR_PTR(ret);
 	}
 
 	return hw;
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index b68bf57..8c7763f 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -502,8 +502,12 @@
 static long bcm2835_pll_round_rate(struct clk_hw *hw, unsigned long rate,
 				   unsigned long *parent_rate)
 {
+	struct bcm2835_pll *pll = container_of(hw, struct bcm2835_pll, hw);
+	const struct bcm2835_pll_data *data = pll->data;
 	u32 ndiv, fdiv;
 
+	rate = clamp(rate, data->min_rate, data->max_rate);
+
 	bcm2835_pll_choose_ndiv_and_fdiv(rate, *parent_rate, &ndiv, &fdiv);
 
 	return bcm2835_pll_rate_from_divisors(*parent_rate, ndiv, fdiv, 1);
@@ -608,13 +612,6 @@
 	u32 ana[4];
 	int i;
 
-	if (rate < data->min_rate || rate > data->max_rate) {
-		dev_err(cprman->dev, "%s: rate out of spec: %lu vs (%lu, %lu)\n",
-			clk_hw_get_name(hw), rate,
-			data->min_rate, data->max_rate);
-		return -EINVAL;
-	}
-
 	if (rate > data->max_fb_rate) {
 		use_fb_prediv = true;
 		rate /= 2;
diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c
index b637f59..eb953d3 100644
--- a/drivers/clk/clk-max77686.c
+++ b/drivers/clk/clk-max77686.c
@@ -216,6 +216,7 @@
 		return -EINVAL;
 	}
 
+	drv_data->num_clks = num_clks;
 	drv_data->max_clk_data = devm_kcalloc(dev, num_clks,
 					      sizeof(*drv_data->max_clk_data),
 					      GFP_KERNEL);
diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
index 20b1055..80ae2a5 100644
--- a/drivers/clk/clk-qoriq.c
+++ b/drivers/clk/clk-qoriq.c
@@ -700,6 +700,7 @@
 					     struct mux_hwclock *hwc,
 					     const struct clk_ops *ops,
 					     unsigned long min_rate,
+					     unsigned long max_rate,
 					     unsigned long pct80_rate,
 					     const char *fmt, int idx)
 {
@@ -728,6 +729,8 @@
 			continue;
 		if (rate < min_rate)
 			continue;
+		if (rate > max_rate)
+			continue;
 
 		parent_names[j] = div->name;
 		hwc->parent_to_clksel[j] = i;
@@ -759,7 +762,7 @@
 	struct mux_hwclock *hwc;
 	const struct clockgen_pll_div *div;
 	unsigned long plat_rate, min_rate;
-	u64 pct80_rate;
+	u64 max_rate, pct80_rate;
 	u32 clksel;
 
 	hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
@@ -787,8 +790,8 @@
 		return NULL;
 	}
 
-	pct80_rate = clk_get_rate(div->clk);
-	pct80_rate *= 8;
+	max_rate = clk_get_rate(div->clk);
+	pct80_rate = max_rate * 8;
 	do_div(pct80_rate, 10);
 
 	plat_rate = clk_get_rate(cg->pll[PLATFORM_PLL].div[PLL_DIV1].clk);
@@ -798,7 +801,7 @@
 	else
 		min_rate = plat_rate / 2;
 
-	return create_mux_common(cg, hwc, &cmux_ops, min_rate,
+	return create_mux_common(cg, hwc, &cmux_ops, min_rate, max_rate,
 				 pct80_rate, "cg-cmux%d", idx);
 }
 
@@ -813,7 +816,7 @@
 	hwc->reg = cg->regs + 0x20 * idx + 0x10;
 	hwc->info = cg->info.hwaccel[idx];
 
-	return create_mux_common(cg, hwc, &hwaccel_ops, 0, 0,
+	return create_mux_common(cg, hwc, &hwaccel_ops, 0, ULONG_MAX, 0,
 				 "cg-hwaccel%d", idx);
 }
 
diff --git a/drivers/clk/clk-xgene.c b/drivers/clk/clk-xgene.c
index 5daddf5..bc37030 100644
--- a/drivers/clk/clk-xgene.c
+++ b/drivers/clk/clk-xgene.c
@@ -463,22 +463,20 @@
 	struct xgene_clk *pclk = to_xgene_clk(hw);
 	unsigned long flags = 0;
 	u32 data;
-	phys_addr_t reg;
 
 	if (pclk->lock)
 		spin_lock_irqsave(pclk->lock, flags);
 
 	if (pclk->param.csr_reg != NULL) {
 		pr_debug("%s clock enabled\n", clk_hw_get_name(hw));
-		reg = __pa(pclk->param.csr_reg);
 		/* First enable the clock */
 		data = xgene_clk_read(pclk->param.csr_reg +
 					pclk->param.reg_clk_offset);
 		data |= pclk->param.reg_clk_mask;
 		xgene_clk_write(data, pclk->param.csr_reg +
 					pclk->param.reg_clk_offset);
-		pr_debug("%s clock PADDR base %pa clk offset 0x%08X mask 0x%08X value 0x%08X\n",
-			clk_hw_get_name(hw), &reg,
+		pr_debug("%s clk offset 0x%08X mask 0x%08X value 0x%08X\n",
+			clk_hw_get_name(hw),
 			pclk->param.reg_clk_offset, pclk->param.reg_clk_mask,
 			data);
 
@@ -488,8 +486,8 @@
 		data &= ~pclk->param.reg_csr_mask;
 		xgene_clk_write(data, pclk->param.csr_reg +
 					pclk->param.reg_csr_offset);
-		pr_debug("%s CSR RESET PADDR base %pa csr offset 0x%08X mask 0x%08X value 0x%08X\n",
-			clk_hw_get_name(hw), &reg,
+		pr_debug("%s csr offset 0x%08X mask 0x%08X value 0x%08X\n",
+			clk_hw_get_name(hw),
 			pclk->param.reg_csr_offset, pclk->param.reg_csr_mask,
 			data);
 	}
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index fe364e6..c0e8e1f 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -195,7 +195,7 @@
 	hi6220_clk_register_divider(hi6220_div_clks_sys,
 			ARRAY_SIZE(hi6220_div_clks_sys), clk_data);
 }
-CLK_OF_DECLARE(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init);
+CLK_OF_DECLARE_DRIVER(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init);
 
 
 /* clocks in media controller */
@@ -252,7 +252,7 @@
 	hi6220_clk_register_divider(hi6220_div_clks_media,
 				ARRAY_SIZE(hi6220_div_clks_media), clk_data);
 }
-CLK_OF_DECLARE(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init);
+CLK_OF_DECLARE_DRIVER(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init);
 
 
 /* clocks in pmctrl */
diff --git