Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux

Pull i2c fixes from Wolfram Sang:
 "i2c driver bugfixes (s3c2410, slave-eeprom, sh_mobile), size
  regression "bugfix" (i2c slave), documentation bugfix (st).

  Also, one documentation update (da9063), so some devicetrees can now
  be verified"

* 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  i2c: sh_mobile: terminate DMA reads properly
  i2c: Only include slave support if selected
  i2c: s3c2410: fix ABBA deadlock by keeping clock prepared
  i2c: slave-eeprom: fix boundary check when using sysfs
  i2c: st: Rename clock reference to something that exists
  DT: i2c: Add devices handled by the da9063 MFD driver
diff --git a/Documentation/ABI/testing/sysfs-class-mei b/Documentation/ABI/testing/sysfs-class-mei
index 0ec8b81..80d9888 100644
--- a/Documentation/ABI/testing/sysfs-class-mei
+++ b/Documentation/ABI/testing/sysfs-class-mei
@@ -14,3 +14,18 @@
 		The /sys/class/mei/meiN directory is created for
 		each probed mei device
 
+What:		/sys/class/mei/meiN/fw_status
+Date:		Nov 2014
+KernelVersion:	3.19
+Contact:	Tomas Winkler <tomas.winkler@intel.com>
+Description:	Display fw status registers content
+
+		The ME FW writes its status information into fw status
+		registers for BIOS and OS to monitor fw health.
+
+		The register contains running state, power management
+		state, error codes, and others. The way the registers
+		are decoded depends on PCH or SoC generation.
+		Also number of registers varies between 1 and 6
+		depending on generation.
+
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
deleted file mode 100644
index 7969443..0000000
--- a/Documentation/ABI/testing/sysfs-platform-dell-laptop
+++ /dev/null
@@ -1,60 +0,0 @@
-What:		/sys/class/leds/dell::kbd_backlight/als_setting
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to control the automatic keyboard
-		illumination mode on some systems that have an ambient
-		light sensor. Write 1 to this file to enable the auto
-		mode, 0 to disable it.
-
-What:		/sys/class/leds/dell::kbd_backlight/start_triggers
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to control the input triggers that
-		turn on the keyboard backlight illumination that is
-		disabled because of inactivity.
-		Read the file to see the triggers available. The ones
-		enabled are preceded by '+', those disabled by '-'.
-
-		To enable a trigger, write its name preceded by '+' to
-		this file. To disable a trigger, write its name preceded
-		by '-' instead.
-
-		For example, to enable the keyboard as trigger run:
-		    echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
-		To disable it:
-		    echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
-
-		Note that not all the available triggers can be configured.
-
-What:		/sys/class/leds/dell::kbd_backlight/stop_timeout
-Date:		December 2014
-KernelVersion:	3.19
-Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
-Description:
-		This file allows to specify the interval after which the
-		keyboard illumination is disabled because of inactivity.
-		The timeouts are expressed in seconds, minutes, hours and
-		days, for which the symbols are 's', 'm', 'h' and 'd'
-		respectively.
-
-		To configure the timeout, write to this file a value along
-		with any the above units. If no unit is specified, the value
-		is assumed to be expressed in seconds.
-
-		For example, to set the timeout to 10 minutes run:
-		    echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
-
-		Note that when this file is read, the returned value might be
-		expressed in a different unit than the one used when the timeout
-		was set.
-
-		Also note that only some timeouts are supported and that
-		some systems might fall back to a specific timeout in case
-		an invalid timeout is written to this file.
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
index 556c866..b78564b2 100644
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -23,7 +23,7 @@
     range of 0x200 bytes.
 
 - syscon: the root node of the Integrator platforms must have a
-  system controller node pointong to the control registers,
+  system controller node pointing to the control registers,
   with the compatible string
   "arm,integrator-ap-syscon"
   "arm,integrator-cp-syscon"
diff --git a/Documentation/devicetree/bindings/arm/fw-cfg.txt b/Documentation/devicetree/bindings/arm/fw-cfg.txt
new file mode 100644
index 0000000..953fb64
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/fw-cfg.txt
@@ -0,0 +1,72 @@
+* QEMU Firmware Configuration bindings for ARM
+
+QEMU's arm-softmmu and aarch64-softmmu emulation / virtualization targets
+provide the following Firmware Configuration interface on the "virt" machine
+type:
+
+- A write-only, 16-bit wide selector (or control) register,
+- a read-write, 64-bit wide data register.
+
+QEMU exposes the control and data register to ARM guests as memory mapped
+registers; their location is communicated to the guest's UEFI firmware in the
+DTB that QEMU places at the bottom of the guest's DRAM.
+
+The guest writes a selector value (a key) to the selector register, and then
+can read the corresponding data (produced by QEMU) via the data register. If
+the selected entry is writable, the guest can rewrite it through the data
+register.
+
+The selector register takes keys in big endian byte order.
+
+The data register allows accesses with 8, 16, 32 and 64-bit width (only at
+offset 0 of the register). Accesses larger than a byte are interpreted as
+arrays, bundled together only for better performance. The bytes constituting
+such a word, in increasing address order, correspond to the bytes that would
+have been transferred by byte-wide accesses in chronological order.
+
+The interface allows guest firmware to download various parameters and blobs
+that affect how the firmware works and what tables it installs for the guest
+OS. For example, boot order of devices, ACPI tables, SMBIOS tables, kernel and
+initrd images for direct kernel booting, virtual machine UUID, SMP information,
+virtual NUMA topology, and so on.
+
+The authoritative registry of the valid selector values and their meanings is
+the QEMU source code; the structure of the data blobs corresponding to the
+individual key values is also defined in the QEMU source code.
+
+The presence of the registers can be verified by selecting the "signature" blob
+with key 0x0000, and reading four bytes from the data register. The returned
+signature is "QEMU".
+
+The outermost protocol (involving the write / read sequences of the control and
+data registers) is expected to be versioned, and/or described by feature bits.
+The interface revision / feature bitmap can be retrieved with key 0x0001. The
+blob to be read from the data register has size 4, and it is to be interpreted
+as a uint32_t value in little endian byte order. The current value
+(corresponding to the above outer protocol) is zero.
+
+The guest kernel is not expected to use these registers (although it is
+certainly allowed to); the device tree bindings are documented here because
+this is where device tree bindings reside in general.
+
+Required properties:
+
+- compatible: "qemu,fw-cfg-mmio".
+
+- reg: the MMIO region used by the device.
+  * Bytes 0x0 to 0x7 cover the data register.
+  * Bytes 0x8 to 0x9 cover the selector register.
+  * Further registers may be appended to the region in case of future interface
+    revisions / feature bits.
+
+Example:
+
+/ {
+	#size-cells = <0x2>;
+	#address-cells = <0x2>;
+
+	fw-cfg@9020000 {
+		compatible = "qemu,fw-cfg-mmio";
+		reg = <0x0 0x9020000 0x0 0xa>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/graph.txt b/Documentation/devicetree/bindings/graph.txt
index 1a69c07..fcb1c6a 100644
--- a/Documentation/devicetree/bindings/graph.txt
+++ b/Documentation/devicetree/bindings/graph.txt
@@ -19,7 +19,7 @@
 may be described by specialized bindings depending on the type of connection.
 
 To see how this binding applies to video pipelines, for example, see
-Documentation/device-tree/bindings/media/video-interfaces.txt.
+Documentation/devicetree/bindings/media/video-interfaces.txt.
 Here the ports describe data interfaces, and the links between them are
 the connecting data buses. A single port with multiple connections can
 correspond to multiple devices being connected to the same physical bus.
diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt
index 0328088..24c5cda 100644
--- a/Documentation/devicetree/bindings/net/davinci_emac.txt
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -4,7 +4,8 @@
 for the davinci_emac interface contains.
 
 Required properties:
-- compatible: "ti,davinci-dm6467-emac" or "ti,am3517-emac"
+- compatible: "ti,davinci-dm6467-emac", "ti,am3517-emac" or
+  "ti,dm816-emac"
 - reg: Offset and length of the register set for the device
 - ti,davinci-ctrl-reg-offset: offset to control register
 - ti,davinci-ctrl-mod-reg-offset: offset to control module register
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index b1df0ad..d443279 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -9,7 +9,6 @@
 adapteva	Adapteva, Inc.
 adi	Analog Devices, Inc.
 aeroflexgaisler	Aeroflex Gaisler AB
-ak	Asahi Kasei Corp.
 allwinner	Allwinner Technology Co., Ltd.
 altr	Altera Corp.
 amcc	Applied Micro Circuits Corporation (APM, formally AMCC)
@@ -20,6 +19,7 @@
 apm	Applied Micro Circuits Corporation (APM)
 arm	ARM Ltd.
 armadeus	ARMadeus Systems SARL
+asahi-kasei	Asahi Kasei Corp.
 atmel	Atmel Corporation
 auo	AU Optronics Corporation
 avago	Avago Technologies
@@ -127,6 +127,7 @@
 powervr	PowerVR (deprecated, use img)
 qca	Qualcomm Atheros, Inc.
 qcom	Qualcomm Technologies, Inc
+qemu	QEMU, a generic and open source machine emulator and virtualizer
 qnap	QNAP Systems, Inc.
 radxa	Radxa
 raidsonic	RaidSonic Technology GmbH
@@ -168,6 +169,7 @@
 v3	V3 Semiconductor
 variscite	Variscite Ltd.
 via	VIA Technologies, Inc.
+virtio	Virtual I/O Device Specification, developed by the OASIS consortium
 voipac	Voipac Technologies s.r.o.
 winbond Winbond Electronics corp.
 wlf	Wolfson Microelectronics
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4df73da..176d4fe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1277,6 +1277,7 @@
 	i8042.notimeout	[HW] Ignore timeout condition signalled by controller
 	i8042.reset	[HW] Reset the controller during init and cleanup
 	i8042.unlock	[HW] Unlock (ignore) the keylock
+	i8042.kbdreset  [HW] Reset device connected to KBD port
 
 	i810=		[HW,DRM]
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 9bffdfc..85b0221 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -66,6 +66,8 @@
 route/max_size - INTEGER
 	Maximum number of routes allowed in the kernel.  Increase
 	this when using large numbers of interfaces and/or routes.
+	From linux kernel 3.6 onwards, this is deprecated for ipv4
+	as route cache is no longer used.
 
 neigh/default/gc_thresh1 - INTEGER
 	Minimum number of entries to keep.  Garbage collector will not
diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
index 230ce71..2b47704 100755
--- a/Documentation/target/tcm_mod_builder.py
+++ b/Documentation/target/tcm_mod_builder.py
@@ -389,9 +389,6 @@
 	buf += "	.release_cmd			= " + fabric_mod_name + "_release_cmd,\n"
 	buf += "	.shutdown_session		= " + fabric_mod_name + "_shutdown_session,\n"
 	buf += "	.close_session			= " + fabric_mod_name + "_close_session,\n"
-	buf += "	.stop_session			= " + fabric_mod_name + "_stop_session,\n"
-	buf += "	.fall_back_to_erl0		= " + fabric_mod_name + "_reset_nexus,\n"
-	buf += "	.sess_logged_in			= " + fabric_mod_name + "_sess_logged_in,\n"
 	buf += "	.sess_get_index			= " + fabric_mod_name + "_sess_get_index,\n"
 	buf += "	.sess_get_initiator_sid		= NULL,\n"
 	buf += "	.write_pending			= " + fabric_mod_name + "_write_pending,\n"
@@ -402,7 +399,7 @@
 	buf += "	.queue_data_in			= " + fabric_mod_name + "_queue_data_in,\n"
 	buf += "	.queue_status			= " + fabric_mod_name + "_queue_status,\n"
 	buf += "	.queue_tm_rsp			= " + fabric_mod_name + "_queue_tm_rsp,\n"
-	buf += "	.is_state_remove		= " + fabric_mod_name + "_is_state_remove,\n"
+	buf += "	.aborted_task			= " + fabric_mod_name + "_aborted_task,\n"
 	buf += "	/*\n"
 	buf += "	 * Setup function pointers for generic logic in target_core_fabric_configfs.c\n"
 	buf += "	 */\n"
@@ -428,7 +425,7 @@
 	buf += "	/*\n"
 	buf += "	 * Register the top level struct config_item_type with TCM core\n"
 	buf += "	 */\n"
-	buf += "	fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name[4:] + "\");\n"
+	buf += "	fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name + "\");\n"
 	buf += "	if (IS_ERR(fabric)) {\n"
 	buf += "		printk(KERN_ERR \"target_fabric_configfs_init() failed\\n\");\n"
 	buf += "		return PTR_ERR(fabric);\n"
@@ -595,7 +592,7 @@
 		if re.search('get_fabric_name', fo):
 			buf += "char *" + fabric_mod_name + "_get_fabric_name(void)\n"
 			buf += "{\n"
-			buf += "	return \"" + fabric_mod_name[4:] + "\";\n"
+			buf += "	return \"" + fabric_mod_name + "\";\n"
 			buf += "}\n\n"
 			bufi += "char *" + fabric_mod_name + "_get_fabric_name(void);\n"
 			continue
@@ -820,27 +817,6 @@
 			buf += "}\n\n"
 			bufi += "void " + fabric_mod_name + "_close_session(struct se_session *);\n"
 
-		if re.search('stop_session\)\(', fo):
-			buf += "void " + fabric_mod_name + "_stop_session(struct se_session *se_sess, int sess_sleep , int conn_sleep)\n"
-			buf += "{\n"
-			buf += "	return;\n"
-			buf += "}\n\n"
-			bufi += "void " + fabric_mod_name + "_stop_session(struct se_session *, int, int);\n"
-
-		if re.search('fall_back_to_erl0\)\(', fo):
-			buf += "void " + fabric_mod_name + "_reset_nexus(struct se_session *se_sess)\n"
-			buf += "{\n"
-			buf += "	return;\n"
-			buf += "}\n\n"
-			bufi += "void " + fabric_mod_name + "_reset_nexus(struct se_session *);\n"
-
-		if re.search('sess_logged_in\)\(', fo):
-			buf += "int " + fabric_mod_name + "_sess_logged_in(struct se_session *se_sess)\n"
-			buf += "{\n"
-			buf += "	return 0;\n"
-			buf += "}\n\n"
-			bufi += "int " + fabric_mod_name + "_sess_logged_in(struct se_session *);\n"
-
 		if re.search('sess_get_index\)\(', fo):
 			buf += "u32 " + fabric_mod_name + "_sess_get_index(struct se_session *se_sess)\n"
 			buf += "{\n"
@@ -898,19 +874,18 @@
 			bufi += "int " + fabric_mod_name + "_queue_status(struct se_cmd *);\n"
 
 		if re.search('queue_tm_rsp\)\(', fo):
-			buf += "int " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *se_cmd)\n"
+			buf += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *se_cmd)\n"
 			buf += "{\n"
-			buf += "	return 0;\n"
+			buf += "	return;\n"
 			buf += "}\n\n"
-			bufi += "int " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *);\n"
+			bufi += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *);\n"
 
-		if re.search('is_state_remove\)\(', fo):
-			buf += "int " + fabric_mod_name + "_is_state_remove(struct se_cmd *se_cmd)\n"
+		if re.search('aborted_task\)\(', fo):
+			buf += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *se_cmd)\n"
 			buf += "{\n"
-			buf += "	return 0;\n"
+			buf += "	return;\n"
 			buf += "}\n\n"
-			bufi += "int " + fabric_mod_name + "_is_state_remove(struct se_cmd *);\n"
-
+			bufi += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *);\n"
 
 	ret = p.write(buf)
 	if ret:
@@ -1018,11 +993,11 @@
 	tcm_mod_build_kbuild(fabric_mod_dir, fabric_mod_name)
 	tcm_mod_build_kconfig(fabric_mod_dir, fabric_mod_name)
 
-	input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Makefile..? [yes,no]: ")
+	input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Makefile..? [yes,no]: ")
 	if input == "yes" or input == "y":
 		tcm_mod_add_kbuild(tcm_dir, fabric_mod_name)
 
-	input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Kconfig..? [yes,no]: ")
+	input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Kconfig..? [yes,no]: ")
 	if input == "yes" or input == "y":
 		tcm_mod_add_kconfig(tcm_dir, fabric_mod_name)
 
diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt
index fca24c9..753e47c 100644
--- a/Documentation/thermal/cpu-cooling-api.txt
+++ b/Documentation/thermal/cpu-cooling-api.txt
@@ -3,7 +3,7 @@
 
 Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
 
-Updated: 12 May 2012
+Updated: 6 Jan 2015
 
 Copyright (c)  2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
 
@@ -25,7 +25,18 @@
 
    clip_cpus: cpumask of cpus where the frequency constraints will happen.
 
-1.1.2 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
+	struct device_node *np, const struct cpumask *clip_cpus)
+
+    This interface function registers the cpufreq cooling device with
+    the name "thermal-cpufreq-%x" linking it with a device tree node, in
+    order to bind it via the thermal DT code. This api can support multiple
+    instances of cpufreq cooling devices.
+
+    np: pointer to the cooling device device tree node
+    clip_cpus: cpumask of cpus where the frequency constraints will happen.
+
+1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
     This interface function unregisters the "thermal-cpufreq-%x" cooling device.
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 3589d67..aaa039d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -696,7 +696,7 @@
 W:	http://blackfin.uclinux.org/
 S:	Supported
 F:	sound/soc/blackfin/*
- 
+
 ANALOG DEVICES INC IIO DRIVERS
 M:	Lars-Peter Clausen <lars@metafoo.de>
 M:	Michael Hennerich <Michael.Hennerich@analog.com>
@@ -708,6 +708,16 @@
 F:	drivers/staging/iio/*/ad*
 F:	staging/iio/trigger/iio-trig-bfin-timer.c
 
+ANDROID DRIVERS
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:	Arve Hjønnevåg <arve@android.com>
+M:	Riley Andrews <riandrews@android.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/gregkh/staging.git
+L:	devel@driverdev.osuosl.org
+S:	Supported
+F:	drivers/android/
+F:	drivers/staging/android/
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 M:	Johannes Berg <johannes@sipsolutions.net>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -754,13 +764,6 @@
 S:	Maintained
 F:	drivers/media/i2c/aptina-pll.*
 
-ARASAN COMPACT FLASH PATA CONTROLLER
-M:	Viresh Kumar <viresh.linux@gmail.com>
-L:	linux-ide@vger.kernel.org
-S:	Maintained
-F:	include/linux/pata_arasan_cf_data.h
-F:	drivers/ata/pata_arasan_cf.c
-
 ARC FRAMEBUFFER DRIVER
 M:	Jaya Kumar <jayalk@intworks.biz>
 S:	Maintained
@@ -2346,7 +2349,8 @@
 M:	Oliver Hartkopp <socketcan@hartkopp.net>
 L:	linux-can@vger.kernel.org
 W:	http://gitorious.org/linux-can
-T:	git git://gitorious.org/linux-can/linux-can-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
 S:	Maintained
 F:	Documentation/networking/can.txt
 F:	net/can/
@@ -2361,7 +2365,8 @@
 M:	Marc Kleine-Budde <mkl@pengutronix.de>
 L:	linux-can@vger.kernel.org
 W:	http://gitorious.org/linux-can
-T:	git git://gitorious.org/linux-can/linux-can-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
 S:	Maintained
 F:	drivers/net/can/
 F:	include/linux/can/dev.h
@@ -3183,7 +3188,7 @@
 Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:	Maintained
 F:	drivers/dma/
-F:	include/linux/dma*
+F:	include/linux/dmaengine.h
 F:	Documentation/dmaengine/
 T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
@@ -4749,20 +4754,20 @@
 F:	drivers/scsi/ipr.*
 
 IBM Power Virtual Ethernet Device Driver
-M:	Santiago Leon <santil@linux.vnet.ibm.com>
+M:	Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmveth.*
 
 IBM Power Virtual SCSI Device Drivers
-M:	Nathan Fontenot <nfont@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvscsi*
 F:	drivers/scsi/ibmvscsi/viosrp.h
 
 IBM Power Virtual FC Device Drivers
-M:	Brian King <brking@linux.vnet.ibm.com>
+M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvfc*
@@ -4930,7 +4935,6 @@
 
 INPUT (KEYBOARD, MOUSE, JOYSTICK, TOUCHSCREEN) DRIVERS
 M:	Dmitry Torokhov <dmitry.torokhov@gmail.com>
-M:	Dmitry Torokhov <dtor@mail.ru>
 L:	linux-input@vger.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-input/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git
@@ -4952,7 +4956,6 @@
 INTEL C600 SERIES SAS CONTROLLER DRIVER
 M:	Intel SCU Linux support <intel-linux-scu@intel.com>
 M:	Artur Paszkiewicz <artur.paszkiewicz@intel.com>
-M:	Dave Jiang <dave.jiang@intel.com>
 L:	linux-scsi@vger.kernel.org
 T:	git git://git.code.sf.net/p/intel-sas/isci
 S:	Supported
@@ -5280,6 +5283,15 @@
 Q:	http://patchwork.kernel.org/project/linux-rdma/list/
 F:	drivers/infiniband/ulp/iser/
 
+ISCSI EXTENSIONS FOR RDMA (ISER) TARGET
+M:	Sagi Grimberg <sagig@mellanox.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
+L:	linux-rdma@vger.kernel.org
+L:	target-devel@vger.kernel.org
+S:	Supported
+W:	http://www.linux-iscsi.org
+F:	drivers/infiniband/ulp/isert
+
 ISDN SUBSYSTEM
 M:	Karsten Keil <isdn@linux-pingi.de>
 L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
@@ -5694,6 +5706,49 @@
 F:	include/linux/lguest*.h
 F:	tools/lguest/
 
+LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
+M:	Tejun Heo <tj@kernel.org>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	drivers/ata/
+F:	include/linux/ata.h
+F:	include/linux/libata.h
+
+LIBATA PATA ARASAN COMPACT FLASH CONTROLLER
+M:	Viresh Kumar <viresh.linux@gmail.com>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	include/linux/pata_arasan_cf_data.h
+F:	drivers/ata/pata_arasan_cf.c
+
+LIBATA PATA DRIVERS
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+M:	Tejun Heo <tj@kernel.org>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	drivers/ata/pata_*.c
+F:	drivers/ata/ata_generic.c
+
+LIBATA SATA AHCI PLATFORM devices support
+M:	Hans de Goede <hdegoede@redhat.com>
+M:	Tejun Heo <tj@kernel.org>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	drivers/ata/ahci_platform.c
+F:	drivers/ata/libahci_platform.c
+F:	include/linux/ahci_platform.h
+
+LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER
+M:	Mikael Pettersson <mikpelinux@gmail.com>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Maintained
+F:	drivers/ata/sata_promise.*
+
 LIBLOCKDEP
 M:	Sasha Levin <sasha.levin@oracle.com>
 S:	Maintained
@@ -6978,14 +7033,12 @@
 M:	Grant Likely <grant.likely@linaro.org>
 M:	Rob Herring <robh+dt@kernel.org>
 L:	devicetree@vger.kernel.org
-W:	http://fdt.secretlab.ca
-T:	git git://git.secretlab.ca/git/linux-2.6.git
+W:	http://www.devicetree.org/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/glikely/linux.git
 S:	Maintained
 F:	drivers/of/
 F:	include/linux/of*.h
 F:	scripts/dtc/
-K:	of_get_property
-K:	of_match_table
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:	Rob Herring <robh+dt@kernel.org>
@@ -7230,7 +7283,7 @@
 F:	drivers/pci/host/*layerscape*
 
 PCI DRIVER FOR IMX6
-M:	Richard Zhu <r65037@freescale.com>
+M:	Richard Zhu <Richard.Zhu@freescale.com>
 M:	Lucas Stach <l.stach@pengutronix.de>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -7400,6 +7453,7 @@
 PIN CONTROL SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
 L:	linux-gpio@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
@@ -7567,12 +7621,6 @@
 S:	Obsolete
 F:	drivers/net/wireless/prism54/
 
-PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER
-M:	Mikael Pettersson <mikpelinux@gmail.com>
-L:	linux-ide@vger.kernel.org
-S:	Maintained
-F:	drivers/ata/sata_promise.*
-
 PS3 NETWORK SUPPORT
 M:	Geoff Levand <geoff@infradead.org>
 L:	netdev@vger.kernel.org
@@ -7738,8 +7786,7 @@
 F:	drivers/scsi/qla2xxx/
 
 QLOGIC QLA4XXX iSCSI DRIVER
-M:	Vikas Chaudhary <vikas.chaudhary@qlogic.com>
-M:	iscsi-driver@qlogic.com
+M:	QLogic-Storage-Upstream@qlogic.com
 L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	Documentation/scsi/LICENSE.qla4xxx
@@ -8547,25 +8594,6 @@
 F:	drivers/misc/phantom.c
 F:	include/uapi/linux/phantom.h
 
-SERIAL ATA (SATA) SUBSYSTEM
-M:	Tejun Heo <tj@kernel.org>
-L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
-S:	Supported
-F:	drivers/ata/
-F:	include/linux/ata.h
-F:	include/linux/libata.h
-
-SERIAL ATA AHCI PLATFORM devices support
-M:	Hans de Goede <hdegoede@redhat.com>
-M:	Tejun Heo <tj@kernel.org>
-L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
-S:	Supported
-F:	drivers/ata/ahci_platform.c
-F:	drivers/ata/libahci_platform.c
-F:	include/linux/ahci_platform.h
-
 SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
 M:	Jayamohan Kallickal <jayamohan.kallickal@emulex.com>
 L:	linux-scsi@vger.kernel.org
@@ -9534,7 +9562,8 @@
 TI BANDGAP AND THERMAL DRIVER
 M:	Eduardo Valentin <edubezval@gmail.com>
 L:	linux-pm@vger.kernel.org
-S:	Supported
+L:	linux-omap@vger.kernel.org
+S:	Maintained
 F:	drivers/thermal/ti-soc-thermal/
 
 TI CLOCK DRIVER
@@ -10147,6 +10176,7 @@
 M:	"Hans J. Koch" <hjk@hansjkoch.de>
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
 F:	Documentation/DocBook/uio-howto.tmpl
 F:	drivers/uio/
 F:	include/linux/uio*.h
diff --git a/Makefile b/Makefile
index e41a335..95a0e82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc6
 NAME = Diseased Newt
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 076c35c..98a1525 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -285,8 +285,12 @@
 			if (r->parent || !r->start || !r->flags)
 				continue;
 			if (pci_has_flag(PCI_PROBE_ONLY) ||
-			    (r->flags & IORESOURCE_PCI_FIXED))
-				pci_claim_resource(dev, i);
+			    (r->flags & IORESOURCE_PCI_FIXED)) {
+				if (pci_claim_resource(dev, i) == 0)
+					continue;
+
+				pci_claim_bridge_resource(dev, i);
+			}
 		}
 	}
 
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 98838a0..9d0ac09 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -156,6 +156,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 6f7e3a6..563cb27 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -161,6 +161,8 @@
 
 	if (fault & VM_FAULT_OOM)
 		goto out_of_memory;
+	else if (fault & VM_FAULT_SIGSEGV)
+		goto bad_area;
 	else if (fault & VM_FAULT_SIGBUS)
 		goto do_sigbus;
 
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 1467750..e8c6c60 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -953,6 +953,8 @@
 			interrupts = <26 IRQ_TYPE_LEVEL_HIGH 3>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_fb>;
+			clocks = <&lcd_clk>, <&lcd_clk>;
+			clock-names = "lcdc_clk", "hclk";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/berlin2q-marvell-dmp.dts b/arch/arm/boot/dts/berlin2q-marvell-dmp.dts
index 28e7e20..a98ac1b 100644
--- a/arch/arm/boot/dts/berlin2q-marvell-dmp.dts
+++ b/arch/arm/boot/dts/berlin2q-marvell-dmp.dts
@@ -65,6 +65,8 @@
 };
 
 &sdhci2 {
+	broken-cd;
+	bus-width = <8>;
 	non-removable;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi
index 35253c9..e2f61f2 100644
--- a/arch/arm/boot/dts/berlin2q.dtsi
+++ b/arch/arm/boot/dts/berlin2q.dtsi
@@ -83,7 +83,8 @@
 			compatible = "mrvl,pxav3-mmc";
 			reg = <0xab1000 0x200>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
-			clocks = <&chip CLKID_SDIO1XIN>;
+			clocks = <&chip CLKID_NFC_ECC>, <&chip CLKID_NFC>;
+			clock-names = "io", "core";
 			status = "disabled";
 		};
 
@@ -348,36 +349,6 @@
 				interrupt-parent = <&gic>;
 				interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
 			};
-
-			gpio4: gpio@5000 {
-				compatible = "snps,dw-apb-gpio";
-				reg = <0x5000 0x400>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				porte: gpio-port@4 {
-					compatible = "snps,dw-apb-gpio-port";
-					gpio-controller;
-					#gpio-cells = <2>;
-					snps,nr-gpios = <32>;
-					reg = <0>;
-				};
-			};
-
-			gpio5: gpio@c000 {
-				compatible = "snps,dw-apb-gpio";
-				reg = <0xc000 0x400>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				portf: gpio-port@5 {
-					compatible = "snps,dw-apb-gpio-port";
-					gpio-controller;
-					#gpio-cells = <2>;
-					snps,nr-gpios = <32>;
-					reg = <0>;
-				};
-			};
 		};
 
 		chip: chip-control@ea0000 {
@@ -466,6 +437,21 @@
 			ranges = <0 0xfc0000 0x10000>;
 			interrupt-parent = <&sic>;
 
+			sm_gpio1: gpio@5000 {
+				compatible = "snps,dw-apb-gpio";
+				reg = <0x5000 0x400>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				portf: gpio-port@5 {
+					compatible = "snps,dw-apb-gpio-port";
+					gpio-controller;
+					#gpio-cells = <2>;
+					snps,nr-gpios = <32>;
+					reg = <0>;
+				};
+			};
+
 			i2c2: i2c@7000 {
 				compatible = "snps,designware-i2c";
 				#address-cells = <1>;
@@ -516,6 +502,21 @@
 				status = "disabled";
 			};
 
+			sm_gpio0: gpio@c000 {
+				compatible = "snps,dw-apb-gpio";
+				reg = <0xc000 0x400>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				porte: gpio-port@4 {
+					compatible = "snps,dw-apb-gpio-port";
+					gpio-controller;
+					#gpio-cells = <2>;
+					snps,nr-gpios = <32>;
+					reg = <0>;
+				};
+			};
+
 			sysctrl: pin-controller@d000 {
 				compatible = "marvell,berlin2q-system-ctrl";
 				reg = <0xd000 0x100>;
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 10b725c..ad4118f 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -499,23 +499,23 @@
 		};
 		partition@5 {
 			label = "QSPI.u-boot-spl-os";
-			reg = <0x00140000 0x00010000>;
+			reg = <0x00140000 0x00080000>;
 		};
 		partition@6 {
 			label = "QSPI.u-boot-env";
-			reg = <0x00150000 0x00010000>;
+			reg = <0x001c0000 0x00010000>;
 		};
 		partition@7 {
 			label = "QSPI.u-boot-env.backup1";
-			reg = <0x00160000 0x0010000>;
+			reg = <0x001d0000 0x0010000>;
 		};
 		partition@8 {
 			label = "QSPI.kernel";
-			reg = <0x00170000 0x0800000>;
+			reg = <0x001e0000 0x0800000>;
 		};
 		partition@9 {
 			label = "QSPI.file-system";
-			reg = <0x00970000 0x01690000>;
+			reg = <0x009e0000 0x01620000>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 22771bc..63f8b00 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1257,6 +1257,8 @@
 				tx-fifo-resize;
 				maximum-speed = "super-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};
 
@@ -1278,6 +1280,8 @@
 				tx-fifo-resize;
 				maximum-speed = "high-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};
 
@@ -1299,6 +1303,8 @@
 				tx-fifo-resize;
 				maximum-speed = "high-speed";
 				dr_mode = "otg";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
 			};
 		};
 
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 0a229fc..d75c89d 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -736,7 +736,7 @@
 
 	dp_phy: video-phy@10040720 {
 		compatible = "samsung,exynos5250-dp-video-phy";
-		reg = <0x10040720 4>;
+		samsung,pmu-syscon = <&pmu_system_controller>;
 		#phy-cells = <0>;
 	};
 
diff --git a/arch/arm/boot/dts/exynos5420-arndale-octa.dts b/arch/arm/boot/dts/exynos5420-arndale-octa.dts
index aa7a7d7..db2c1c4 100644
--- a/arch/arm/boot/dts/exynos5420-arndale-octa.dts
+++ b/arch/arm/boot/dts/exynos5420-arndale-octa.dts
@@ -372,3 +372,7 @@
 &usbdrd_dwc3_1 {
 	dr_mode = "host";
 };
+
+&cci {
+	status = "disabled";
+};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 517e50f..6d38f8b 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -120,7 +120,7 @@
 		};
 	};
 
-	cci@10d20000 {
+	cci: cci@10d20000 {
 		compatible = "arm,cci-400";
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -503,8 +503,8 @@
 	};
 
 	dp_phy: video-phy@10040728 {
-		compatible = "samsung,exynos5250-dp-video-phy";
-		reg = <0x10040728 4>;
+		compatible = "samsung,exynos5420-dp-video-phy";
+		samsung,pmu-syscon = <&pmu_system_controller>;
 		#phy-cells = <0>;
 	};
 
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index 58d3c3c..e4d3aec 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -162,7 +162,7 @@
 				#size-cells = <0>;
 				compatible = "fsl,imx25-cspi", "fsl,imx35-cspi";
 				reg = <0x43fa4000 0x4000>;
-				clocks = <&clks 62>, <&clks 62>;
+				clocks = <&clks 78>, <&clks 78>;
 				clock-names = "ipg", "per";
 				interrupts = <14>;
 				status = "disabled";
@@ -369,7 +369,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fa0000 0x4000>;
-				clocks = <&clks 106>, <&clks 36>;
+				clocks = <&clks 106>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <36>;
 			};
@@ -388,7 +388,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fa8000 0x4000>;
-				clocks = <&clks 107>, <&clks 36>;
+				clocks = <&clks 107>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <41>;
 			};
@@ -429,7 +429,7 @@
 			pwm4: pwm@53fc8000 {
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				reg = <0x53fc8000 0x4000>;
-				clocks = <&clks 108>, <&clks 36>;
+				clocks = <&clks 108>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <42>;
 			};
@@ -476,7 +476,7 @@
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
 				#pwm-cells = <2>;
 				reg = <0x53fe0000 0x4000>;
-				clocks = <&clks 105>, <&clks 36>;
+				clocks = <&clks 105>, <&clks 52>;
 				clock-names = "ipg", "per";
 				interrupts = <26>;
 			};
diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
index 56569ce..649befe 100644
--- a/arch/arm/boot/dts/imx51-babbage.dts
+++ b/arch/arm/boot/dts/imx51-babbage.dts
@@ -127,24 +127,12 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		reg_usbh1_vbus: regulator@0 {
-			compatible = "regulator-fixed";
-			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_usbh1reg>;
-			reg = <0>;
-			regulator-name = "usbh1_vbus";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			gpio = <&gpio2 5 GPIO_ACTIVE_HIGH>;
-			enable-active-high;
-		};
-
-		reg_usbotg_vbus: regulator@1 {
+		reg_hub_reset: regulator@0 {
 			compatible = "regulator-fixed";
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_usbotgreg>;
-			reg = <1>;
-			regulator-name = "usbotg_vbus";
+			reg = <0>;
+			regulator-name = "hub_reset";
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio1 7 GPIO_ACTIVE_HIGH>;
@@ -176,6 +164,7 @@
 			reg = <0>;
 			clocks = <&clks IMX5_CLK_DUMMY>;
 			clock-names = "main_clk";
+			reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
 		};
 	};
 };
@@ -419,7 +408,7 @@
 &usbh1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usbh1>;
-	vbus-supply = <&reg_usbh1_vbus>;
+	vbus-supply = <&reg_hub_reset>;
 	fsl,usbphy = <&usbh1phy>;
 	phy_type = "ulpi";
 	status = "okay";
@@ -429,7 +418,6 @@
 	dr_mode = "otg";
 	disable-over-current;
 	phy_type = "utmi_wide";
-	vbus-supply = <&reg_usbotg_vbus>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 4fc03b7..2109d07 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -335,8 +335,8 @@
 			vpu: vpu@02040000 {
 				compatible = "cnm,coda960";
 				reg = <0x02040000 0x3c000>;
-				interrupts = <0 3 IRQ_TYPE_LEVEL_HIGH>,
-				             <0 12 IRQ_TYPE_LEVEL_HIGH>;
+				interrupts = <0 12 IRQ_TYPE_LEVEL_HIGH>,
+					     <0 3 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-names = "bit", "jpeg";
 				clocks = <&clks IMX6QDL_CLK_VPU_AXI>,
 					 <&clks IMX6QDL_CLK_MMDC_CH0_AXI>,
diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts
index 1e6e5cc..c108bb4 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dts
+++ b/arch/arm/boot/dts/imx6sx-sdb.dts
@@ -159,13 +159,28 @@
 	pinctrl-0 = <&pinctrl_enet1>;
 	phy-supply = <&reg_enet_3v3>;
 	phy-mode = "rgmii";
+	phy-handle = <&ethphy1>;
 	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
+		};
+
+		ethphy2: ethernet-phy@2 {
+			reg = <2>;
+		};
+	};
 };
 
 &fec2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet2>;
 	phy-mode = "rgmii";
+	phy-handle = <&ethphy2>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 657da14..c70bb27 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -142,6 +142,7 @@
 		scfg: scfg@1570000 {
 			compatible = "fsl,ls1021a-scfg", "syscon";
 			reg = <0x0 0x1570000 0x0 0x10000>;
+			big-endian;
 		};
 
 		clockgen: clocking@1ee1000 {
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 53f3ca0..b550c41 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -700,11 +700,9 @@
 		};
 	};
 
+	/* Ethernet is on some early development boards and qemu */
 	ethernet@gpmc {
 		compatible = "smsc,lan91c94";
-
-		status = "disabled";
-
 		interrupt-parent = <&gpio2>;
 		interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;	/* gpio54 */
 		reg = <1 0x300 0xf>;		/* 16 byte IO range at offset 0x300 */
diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi
index 3e067dd..6194d67 100644
--- a/arch/arm/boot/dts/rk3288-evb.dtsi
+++ b/arch/arm/boot/dts/rk3288-evb.dtsi
@@ -155,6 +155,15 @@
 };
 
 &pinctrl {
+	pcfg_pull_none_drv_8ma: pcfg-pull-none-drv-8ma {
+		drive-strength = <8>;
+	};
+
+	pcfg_pull_up_drv_8ma: pcfg-pull-up-drv-8ma {
+		bias-pull-up;
+		drive-strength = <8>;
+	};
+
 	backlight {
 		bl_en: bl-en {
 			rockchip,pins = <7 2 RK_FUNC_GPIO &pcfg_pull_none>;
@@ -173,6 +182,27 @@
 		};
 	};
 
+	sdmmc {
+		/*
+		 * Default drive strength isn't enough to achieve even
+		 * high-speed mode on EVB board so bump up to 8ma.
+		 */
+		sdmmc_bus4: sdmmc-bus4 {
+			rockchip,pins = <6 16 RK_FUNC_1 &pcfg_pull_up_drv_8ma>,
+					<6 17 RK_FUNC_1 &pcfg_pull_up_drv_8ma>,
+					<6 18 RK_FUNC_1 &pcfg_pull_up_drv_8ma>,
+					<6 19 RK_FUNC_1 &pcfg_pull_up_drv_8ma>;
+		};
+
+		sdmmc_clk: sdmmc-clk {
+			rockchip,pins = <6 20 RK_FUNC_1 &pcfg_pull_none_drv_8ma>;
+		};
+
+		sdmmc_cmd: sdmmc-cmd {
+			rockchip,pins = <6 21 RK_FUNC_1 &pcfg_pull_up_drv_8ma>;
+		};
+	};
+
 	usb {
 		host_vbus_drv: host-vbus-drv {
 			rockchip,pins = <0 14 RK_FUNC_GPIO &pcfg_pull_none>;
diff --git a/arch/arm/boot/dts/sama5d3xmb.dtsi b/arch/arm/boot/dts/sama5d3xmb.dtsi
index 49c10d3..77e0365 100644
--- a/arch/arm/boot/dts/sama5d3xmb.dtsi
+++ b/arch/arm/boot/dts/sama5d3xmb.dtsi
@@ -176,7 +176,7 @@
 			"Headphone Jack", "HPOUTR",
 			"IN2L", "Line In Jack",
 			"IN2R", "Line In Jack",
-			"MICBIAS", "IN1L",
+			"Mic", "MICBIAS",
 			"IN1L", "Mic";
 
 		atmel,ssc-controller = <&ssc0>;
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index 1b0f30c..b94995d 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -1008,7 +1008,7 @@
 
 			pit: timer@fc068630 {
 				compatible = "atmel,at91sam9260-pit";
-				reg = <0xfc068630 0xf>;
+				reg = <0xfc068630 0x10>;
 				interrupts = <3 IRQ_TYPE_LEVEL_HIGH 5>;
 				clocks = <&h32ck>;
 			};
diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
index a8c00ee..3d0b875 100644
--- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts
+++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
@@ -25,11 +25,11 @@
 		stmpe2401_1 {
 			stmpe2401_1_nhk_mode: stmpe2401_1_nhk {
 				nhk_cfg1 {
-					ste,pins = "GPIO76_B20"; // IRQ line
+					pins = "GPIO76_B20"; // IRQ line
 					ste,input = <0>;
 				};
 				nhk_cfg2 {
-					ste,pins = "GPIO77_B8"; // reset line
+					pins = "GPIO77_B8"; // reset line
 					ste,output = <1>;
 				};
 			};
@@ -37,11 +37,11 @@
 		stmpe2401_2 {
 			stmpe2401_2_nhk_mode: stmpe2401_2_nhk {
 				nhk_cfg1 {
-					ste,pins = "GPIO78_A8"; // IRQ line
+					pins = "GPIO78_A8"; // IRQ line
 					ste,input = <0>;
 				};
 				nhk_cfg2 {
-					ste,pins = "GPIO79_C9"; // reset line
+					pins = "GPIO79_C9"; // reset line
 					ste,output = <1>;
 				};
 			};
diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts
index ea282c7..e2fed27 100644
--- a/arch/arm/boot/dts/tegra20-seaboard.dts
+++ b/arch/arm/boot/dts/tegra20-seaboard.dts
@@ -406,7 +406,7 @@
 		clock-frequency = <400000>;
 
 		magnetometer@c {
-			compatible = "ak,ak8975";
+			compatible = "asahi-kasei,ak8975";
 			reg = <0xc>;
 			interrupt-parent = <&gpio>;
 			interrupts = <TEGRA_GPIO(N, 5) IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/vf610-twr.dts b/arch/arm/boot/dts/vf610-twr.dts
index a0f7621..f2b64b1 100644
--- a/arch/arm/boot/dts/vf610-twr.dts
+++ b/arch/arm/boot/dts/vf610-twr.dts
@@ -129,13 +129,28 @@
 
 &fec0 {
 	phy-mode = "rmii";
+	phy-handle = <&ethphy0>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_fec0>;
 	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
+		};
+	};
 };
 
 &fec1 {
 	phy-mode = "rmii";
+	phy-handle = <&ethphy1>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_fec1>;
 	status = "okay";
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 5ef14de..3d0c5d6 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -84,7 +84,8 @@
 CONFIG_POWER_SUPPLY=y
 CONFIG_BATTERY_SBS=y
 CONFIG_CHARGER_TPS65090=y
-# CONFIG_HWMON is not set
+CONFIG_HWMON=y
+CONFIG_SENSORS_LM90=y
 CONFIG_THERMAL=y
 CONFIG_EXYNOS_THERMAL=y
 CONFIG_EXYNOS_THERMAL_CORE=y
@@ -109,11 +110,26 @@
 CONFIG_REGULATOR_S2MPS11=y
 CONFIG_REGULATOR_S5M8767=y
 CONFIG_REGULATOR_TPS65090=y
+CONFIG_DRM=y
+CONFIG_DRM_BRIDGE=y
+CONFIG_DRM_PTN3460=y
+CONFIG_DRM_PS8622=y
+CONFIG_DRM_EXYNOS=y
+CONFIG_DRM_EXYNOS_FIMD=y
+CONFIG_DRM_EXYNOS_DP=y
+CONFIG_DRM_PANEL=y
+CONFIG_DRM_PANEL_SIMPLE=y
 CONFIG_FB=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_SIMPLE=y
 CONFIG_EXYNOS_VIDEO=y
 CONFIG_EXYNOS_MIPI_DSI=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+CONFIG_LCD_PLATFORM=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_GENERIC=y
+CONFIG_BACKLIGHT_PWM=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FONTS=y
 CONFIG_FONT_7x14=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index c2c3a85..667d9d5 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -68,7 +68,7 @@
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-CONFIG_GENERIC_CPUFREQ_CPU0=y
+CONFIG_CPUFREQ_DT=y
 # CONFIG_ARM_OMAP2PLUS_CPUFREQ is not set
 CONFIG_CPU_IDLE=y
 CONFIG_BINFMT_MISC=y
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 66ce176..7b01523 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -38,6 +38,16 @@
 	vcpu->arch.hcr = HCR_GUEST_MASK;
 }
 
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hcr;
+}
+
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+	vcpu->arch.hcr = hcr;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 254e065..04b4ea0 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -125,9 +125,6 @@
 	 * Anything that is not used directly from assembly code goes
 	 * here.
 	 */
-	/* dcache set/way operation pending */
-	int last_pcpu;
-	cpumask_t require_dcache_flush;
 
 	/* Don't run the guest on this vcpu */
 	bool pause;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 63e0ecc..1bca8f8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 
@@ -161,13 +162,10 @@
 	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
 }
 
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size,
-					     bool ipa_uncached)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-		kvm_flush_dcache_to_poc((void *)hva, size);
-	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -179,18 +177,77 @@
 	 *
 	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+	 *
+	 * We need to do this through a kernel mapping (using the
+	 * user-space mapping has proved to be the wrong
+	 * solution). For that, we need to kmap one page at a time,
+	 * and iterate over the range.
 	 */
-	if (icache_is_pipt()) {
-		__cpuc_coherent_user_range(hva, hva + size);
-	} else if (!icache_is_vivt_asid_tagged()) {
+
+	bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
+
+	VM_BUG_ON(size & PAGE_MASK);
+
+	if (!need_flush && !icache_is_pipt())
+		goto vipt_cache;
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		if (need_flush)
+			kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		if (icache_is_pipt())
+			__cpuc_coherent_user_range((unsigned long)va,
+						   (unsigned long)va + PAGE_SIZE);
+
+		size -= PAGE_SIZE;
+		pfn++;
+
+		kunmap_atomic(va);
+	}
+
+vipt_cache:
+	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	void *va = kmap_atomic(pte_page(pte));
+
+	kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+	kunmap_atomic(va);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	unsigned long size = PMD_SIZE;
+	pfn_t pfn = pmd_pfn(pmd);
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		pfn++;
+		size -= PAGE_SIZE;
+
+		kunmap_atomic(va);
+	}
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+}
+
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
-void stage2_flush_vm(struct kvm *kvm);
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 4176df7..1a0045a 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -253,21 +253,22 @@
 	.endm
 
 	.macro	restore_user_regs, fast = 0, offset = 0
-	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
+	mov	r2, sp
+	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
 	msr	spsr_cxsf, r1			@ save in spsr_svc
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
 	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r1, r2, [sp]			@ clear the exclusive monitor
+	strex	r1, r2, [r2]			@ clear the exclusive monitor
 #endif
 	.if	\fast
-	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
+	ldmdb	r2, {r1 - lr}^			@ get calling r1 - lr
 	.else
-	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
+	ldmdb	r2, {r0 - lr}^			@ get calling r0 - lr
 	.endif
 	mov	r0, r0				@ ARMv5T and earlier require a nop
 						@ after ldm {}^
-	add	sp, sp, #S_FRAME_SIZE - S_PC
+	add	sp, sp, #\offset + S_FRAME_SIZE
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index f7c65ad..557e128 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -116,8 +116,14 @@
 		ret = 1;
 	}
 
-	if (left > (s64)armpmu->max_period)
-		left = armpmu->max_period;
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 715ae19..e55408e 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -657,10 +657,13 @@
 
 	/*
 	 * Ensure that start/size are aligned to a page boundary.
-	 * Size is appropriately rounded down, start is rounded up.
+	 * Size is rounded down, start is rounded up.
 	 */
-	size -= start & ~PAGE_MASK;
 	aligned_start = PAGE_ALIGN(start);
+	if (aligned_start > start + size)
+		size = 0;
+	else
+		size -= aligned_start - start;
 
 #ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
 	if (aligned_start > ULONG_MAX) {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d6d910..0b0d58a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -281,15 +281,6 @@
 	vcpu->cpu = cpu;
 	vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
-	/*
-	 * Check whether this vcpu requires the cache to be flushed on
-	 * this physical CPU. This is a consequence of doing dcache
-	 * operations by set/way on this vcpu. We do it here to be in
-	 * a non-preemptible section.
-	 */
-	if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
-		flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
-
 	kvm_arm_set_running_vcpu(vcpu);
 }
 
@@ -541,7 +532,6 @@
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
-		vcpu->arch.last_pcpu = smp_processor_id();
 		kvm_guest_exit();
 		trace_kvm_exit(*vcpu_pc(vcpu));
 		/*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7928dbd..f3d88dc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -189,82 +189,40 @@
 	return true;
 }
 
-/* See note at ARM ARM B1.14.4 */
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
 			const struct coproc_reg *r)
 {
-	unsigned long val;
-	int cpu;
-
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 
-	cpu = get_cpu();
-
-	cpumask_setall(&vcpu->arch.require_dcache_flush);
-	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-
-	/* If we were already preempted, take the long way around */
-	if (cpu != vcpu->arch.last_pcpu) {
-		flush_cache_all();
-		goto done;
-	}
-
-	val = *vcpu_reg(vcpu, p->Rt1);
-
-	switch (p->CRm) {
-	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
-	case 14:		/* DCCISW */
-		asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
-		break;
-
-	case 10:		/* DCCSW */
-		asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
-		break;
-	}
-
-done:
-	put_cpu();
-
+	kvm_set_way_flush(vcpu);
 	return true;
 }
 
 /*
  * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set.  If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
  */
-static bool access_vm_reg(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+		   const struct coproc_params *p,
+		   const struct coproc_reg *r)
 {
+	bool was_enabled = vcpu_has_cache_enabled(vcpu);
+
 	BUG_ON(!p->is_write);
 
 	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
 	if (p->is_64bit)
 		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
 
-	return true;
-}
-
-/*
- * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- *
- * Used by the cpu-specific code.
- */
-bool access_sctlr(struct kvm_vcpu *vcpu,
-		  const struct coproc_params *p,
-		  const struct coproc_reg *r)
-{
-	access_vm_reg(vcpu, p, r);
-
-	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
-		vcpu->arch.hcr &= ~HCR_TVM;
-		stage2_flush_vm(vcpu->kvm);
-	}
-
+	kvm_toggle_cache(vcpu, was_enabled);
 	return true;
 }
 
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 1a44bbe..88d24a3 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,8 +153,8 @@
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
-bool access_sctlr(struct kvm_vcpu *vcpu,
-		  const struct coproc_params *p,
-		  const struct coproc_reg *r);
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+		   const struct coproc_params *p,
+		   const struct coproc_reg *r);
 
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index e6f4ae4..a713675 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
 static const struct coproc_reg a15_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
+			access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 17fc7cd..b19e46d 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
 static const struct coproc_reg a7_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+			access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
 };
 
 static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778..1366625 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+	__kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	__kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+	__kvm_flush_dcache_pud(pud);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 				  int min, int max)
 {
@@ -119,6 +139,26 @@
 	put_page(virt_to_page(pmd));
 }
 
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
@@ -128,9 +168,16 @@
 	start_pte = pte = pte_offset_kernel(pmd, addr);
 	do {
 		if (!pte_none(*pte)) {
+			pte_t old_pte = *pte;
+
 			kvm_set_pte(pte, __pte(0));
-			put_page(virt_to_page(pte));
 			kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+			/* No need to invalidate the cache for device mappings */
+			if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+				kvm_flush_dcache_pte(old_pte);
+
+			put_page(virt_to_page(pte));
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
@@ -149,8 +196,13 @@
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
 			if (kvm_pmd_huge(*pmd)) {
+				pmd_t old_pmd = *pmd;
+
 				pmd_clear(pmd);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pmd(old_pmd);
+
 				put_page(virt_to_page(pmd));
 			} else {
 				unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
 			if (pud_huge(*pud)) {
+				pud_t old_pud = *pud;
+
 				pud_clear(pud);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pud(old_pud);
+
 				put_page(virt_to_page(pud));
 			} else {
 				unmap_pmds(kvm, pud, addr, next);
@@ -209,10 +266,9 @@
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if (!pte_none(*pte)) {
-			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
-		}
+		if (!pte_none(*pte) &&
+		    (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+			kvm_flush_dcache_pte(*pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
@@ -226,12 +282,10 @@
 	do {
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
-			} else {
+			if (kvm_pmd_huge(*pmd))
+				kvm_flush_dcache_pmd(*pmd);
+			else
 				stage2_flush_ptes(kvm, pmd, addr, next);
-			}
 		}
 	} while (pmd++, addr = next, addr != end);
 }
@@ -246,12 +300,10 @@
 	do {
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
-			if (pud_huge(*pud)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
-			} else {
+			if (pud_huge(*pud))
+				kvm_flush_dcache_pud(*pud);
+			else
 				stage2_flush_pmds(kvm, pud, addr, next);
-			}
 		}
 	} while (pud++, addr = next, addr != end);
 }
@@ -278,7 +330,7 @@
  * Go through the stage 2 page tables and invalidate any cache lines
  * backing memory already mapped to the VM.
  */
-void stage2_flush_vm(struct kvm *kvm)
+static void stage2_flush_vm(struct kvm *kvm)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
@@ -905,6 +957,12 @@
 	return !pfn_valid(pfn);
 }
 
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+				      unsigned long size, bool uncached)
+{
+	__coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -994,8 +1052,7 @@
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
-					  fault_ipa_uncached);
+		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1003,8 +1060,7 @@
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
-					  fault_ipa_uncached);
+		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1411,3 +1467,71 @@
 	unmap_stage2_range(kvm, gpa, size);
 	spin_unlock(&kvm->mmu_lock);
 }
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ *   caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+	unsigned long hcr = vcpu_get_hcr(vcpu);
+
+	/*
+	 * If this is the first time we do a S/W operation
+	 * (i.e. HCR_TVM not set) flush the whole memory, and set the
+	 * VM trapping.
+	 *
+	 * Otherwise, rely on the VM trapping to wait for the MMU +
+	 * Caches to be turned off. At that point, we'll be able to
+	 * clean the caches again.
+	 */
+	if (!(hcr & HCR_TVM)) {
+		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+					vcpu_has_cache_enabled(vcpu));
+		stage2_flush_vm(vcpu->kvm);
+		vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+	}
+}
+
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+	bool now_enabled = vcpu_has_cache_enabled(vcpu);
+
+	/*
+	 * If switching the MMU+caches on, need to invalidate the caches.
+	 * If switching it off, need to clean the caches.
+	 * Clean + invalidate does the trick always.
+	 */
+	if (now_enabled != was_enabled)
+		stage2_flush_vm(vcpu->kvm);
+
+	/* Caches are now on, stop trapping VM ops (until a S/W op) */
+	if (now_enabled)
+		vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+
+	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f..b6a6e71 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -223,6 +223,45 @@
 		  __entry->vcpu_pc, __entry->r0, __entry->imm)
 );
 
+TRACE_EVENT(kvm_set_way_flush,
+	    TP_PROTO(unsigned long vcpu_pc, bool cache),
+	    TP_ARGS(vcpu_pc, cache),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		cache		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->cache		= cache;
+	    ),
+
+	    TP_printk("S/W flush at 0x%016lx (cache %s)",
+		      __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+
+TRACE_EVENT(kvm_toggle_cache,
+	    TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+	    TP_ARGS(vcpu_pc, was, now),
+
+	    TP_STRUCT__entry(
+		    __field(	unsigned long,	vcpu_pc		)
+		    __field(	bool,		was		)
+		    __field(	bool,		now		)
+	    ),
+
+	    TP_fast_assign(
+		    __entry->vcpu_pc		= vcpu_pc;
+		    __entry->was		= was;
+		    __entry->now		= now;
+	    ),
+
+	    TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+		      __entry->vcpu_pc, __entry->was ? "on" : "off",
+		      __entry->now ? "on" : "off")
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/arm/mach-at91/board-dt-sama5.c b/arch/arm/mach-at91/board-dt-sama5.c
index 8fb9ef5..97f7367 100644
--- a/arch/arm/mach-at91/board-dt-sama5.c
+++ b/arch/arm/mach-at91/board-dt-sama5.c
@@ -17,6 +17,7 @@
 #include <linux/of_platform.h>
 #include <linux/phy.h>
 #include <linux/clk-provider.h>
+#include <linux/phy.h>
 
 #include <asm/setup.h>
 #include <asm/irq.h>
@@ -26,8 +27,25 @@
 
 #include "generic.h"
 
+static int ksz8081_phy_fixup(struct phy_device *phy)
+{
+	int value;
+
+	value = phy_read(phy, 0x16);
+	value &= ~0x20;
+	phy_write(phy, 0x16, value);
+
+	return 0;
+}
+
 static void __init sama5_dt_device_init(void)
 {
+	if (of_machine_is_compatible("atmel,sama5d4ek") &&
+	   IS_ENABLED(CONFIG_PHYLIB)) {
+		phy_register_fixup_for_id("fc028000.etherne:00",
+						ksz8081_phy_fixup);
+	}
+
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 5951660..2daef61 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -144,7 +144,7 @@
 		post_div_table[1].div = 1;
 		post_div_table[2].div = 1;
 		video_div_table[1].div = 1;
-		video_div_table[2].div = 1;
+		video_div_table[3].div = 1;
 	}
 
 	clk[IMX6QDL_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 2, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
diff --git a/arch/arm/mach-imx/clk-imx6sx.c b/arch/arm/mach-imx/clk-imx6sx.c
index 17354a1..5a3e5a1 100644
--- a/arch/arm/mach-imx/clk-imx6sx.c
+++ b/arch/arm/mach-imx/clk-imx6sx.c
@@ -558,6 +558,9 @@
 	clk_set_parent(clks[IMX6SX_CLK_GPU_CORE_SEL], clks[IMX6SX_CLK_PLL3_PFD0]);
 	clk_set_parent(clks[IMX6SX_CLK_GPU_AXI_SEL], clks[IMX6SX_CLK_PLL3_PFD0]);
 
+	clk_set_parent(clks[IMX6SX_CLK_QSPI1_SEL], clks[IMX6SX_CLK_PLL2_BUS]);
+	clk_set_parent(clks[IMX6SX_CLK_QSPI2_SEL], clks[IMX6SX_CLK_PLL2_BUS]);
+
 	/* Set initial power mode */
 	imx6q_set_lpm(WAIT_CLOCKED);
 }
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 3585cb3..caa21e9 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -246,9 +246,14 @@
 	return type;
 }
 
+/*
+ * As a precaution, we currently completely disable hardware I/O
+ * coherency, until enough testing is done with automatic I/O
+ * synchronization barriers to validate that it is a proper solution.
+ */
 int coherency_available(void)
 {
-	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
+	return false;
 }
 
 int __init coherency_init(void)
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 608079a..b61c049 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -77,6 +77,24 @@
 #endif
 
 #ifdef CONFIG_ARCH_OMAP3
+/* Some boards need board name for legacy userspace in /proc/cpuinfo */
+static const char *const n900_boards_compat[] __initconst = {
+	"nokia,omap3-n900",
+	NULL,
+};
+
+DT_MACHINE_START(OMAP3_N900_DT, "Nokia RX-51 board")
+	.reserve	= omap_reserve,
+	.map_io		= omap3_map_io,
+	.init_early	= omap3430_init_early,
+	.init_machine	= omap_generic_init,
+	.init_late	= omap3_init_late,
+	.init_time	= omap3_sync32k_timer_init,
+	.dt_compat	= n900_boards_compat,
+	.restart	= omap3xxx_restart,
+MACHINE_END
+
+/* Generic omap3 boards, most boards can use these */
 static const char *const omap3_boards_compat[] __initconst = {
 	"ti,omap3430",
 	"ti,omap3",
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 377eea8..64e44d6 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -211,6 +211,7 @@
 extern struct device *omap2_get_l3_device(void);
 extern struct device *omap4_get_dsp_device(void);
 
+unsigned int omap4_xlate_irq(unsigned int hwirq);
 void omap_gic_of_init(void);
 
 #ifdef CONFIG_CACHE_L2X0
@@ -249,6 +250,7 @@
 extern struct smp_operations omap4_smp_ops;
 
 extern void omap5_secondary_startup(void);
+extern void omap5_secondary_hyp_startup(void);
 #endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PM)
diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h
index a3c0133..a80ac2d 100644
--- a/arch/arm/mach-omap2/control.h
+++ b/arch/arm/mach-omap2/control.h
@@ -286,6 +286,10 @@
 #define OMAP5XXX_CONTROL_STATUS                0x134
 #define OMAP5_DEVICETYPE_MASK          (0x7 << 6)
 
+/* DRA7XX CONTROL CORE BOOTSTRAP */
+#define DRA7_CTRL_CORE_BOOTSTRAP	0x6c4
+#define DRA7_SPEEDSELECT_MASK		(0x3 << 8)
+
 /*
  * REVISIT: This list of registers is not comprehensive - there are more
  * that should be added.
diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S
index 4993d4b..6d1dffc 100644
--- a/arch/arm/mach-omap2/omap-headsmp.S
+++ b/arch/arm/mach-omap2/omap-headsmp.S
@@ -22,6 +22,7 @@
 
 /* Physical address needed since MMU not enabled yet on secondary core */
 #define AUX_CORE_BOOT0_PA			0x48281800
+#define API_HYP_ENTRY				0x102
 
 /*
  * OMAP5 specific entry point for secondary CPU to jump from ROM
@@ -41,6 +42,26 @@
 	b	secondary_startup
 ENDPROC(omap5_secondary_startup)
 /*
+ * Same as omap5_secondary_startup except we call into the ROM to
+ * enable HYP mode first.  This is called instead of
+ * omap5_secondary_startup if the primary CPU was put into HYP mode by
+ * the boot loader.
+ */
+ENTRY(omap5_secondary_hyp_startup)
+wait_2:	ldr	r2, =AUX_CORE_BOOT0_PA	@ read from AuxCoreBoot0
+	ldr	r0, [r2]
+	mov	r0, r0, lsr #5
+	mrc	p15, 0, r4, c0, c0, 5
+	and	r4, r4, #0x0f
+	cmp	r0, r4
+	bne	wait_2
+	ldr	r12, =API_HYP_ENTRY
+	adr	r0, hyp_boot
+	smc	#0
+hyp_boot:
+	b	secondary_startup
+ENDPROC(omap5_secondary_hyp_startup)
+/*
  * OMAP4 specific entry point for secondary CPU to jump from ROM
  * code.  This routine also provides a holding flag into which
  * secondary core is held until we're ready for it to initialise.
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 256e84e..5305ec7 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -22,6 +22,7 @@
 #include <linux/irqchip/arm-gic.h>
 
 #include <asm/smp_scu.h>
+#include <asm/virt.h>
 
 #include "omap-secure.h"
 #include "omap-wakeupgen.h"
@@ -227,8 +228,16 @@
 	if (omap_secure_apis_support())
 		omap_auxcoreboot_addr(virt_to_phys(startup_addr));
 	else
-		writel_relaxed(virt_to_phys(omap5_secondary_startup),
-			       base + OMAP_AUX_CORE_BOOT_1);
+		/*
+		 * If the boot CPU is in HYP mode then start secondary
+		 * CPU in HYP mode as well.
+		 */
+		if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
+			writel_relaxed(virt_to_phys(omap5_secondary_hyp_startup),
+				       base + OMAP_AUX_CORE_BOOT_1);
+		else
+			writel_relaxed(virt_to_phys(omap5_secondary_startup),
+				       base + OMAP_AUX_CORE_BOOT_1);
 
 }
 
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index b7cb44a..cc30e49 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -256,6 +256,38 @@
 }
 omap_early_initcall(omap4_sar_ram_init);
 
+static struct of_device_id gic_match[] = {
+	{ .compatible = "arm,cortex-a9-gic", },
+	{ .compatible = "arm,cortex-a15-gic", },
+	{ },
+};
+
+static struct device_node *gic_node;
+
+unsigned int omap4_xlate_irq(unsigned int hwirq)
+{
+	struct of_phandle_args irq_data;
+	unsigned int irq;
+
+	if (!gic_node)
+		gic_node = of_find_matching_node(NULL, gic_match);
+
+	if (WARN_ON(!gic_node))
+		return hwirq;
+
+	irq_data.np = gic_node;
+	irq_data.args_count = 3;
+	irq_data.args[0] = 0;
+	irq_data.args[1] = hwirq - OMAP44XX_IRQ_GIC_START;
+	irq_data.args[2] = IRQ_TYPE_LEVEL_HIGH;
+
+	irq = irq_create_of_mapping(&irq_data);
+	if (WARN_ON(!irq))
+		irq = hwirq;
+
+	return irq;
+}
+
 void __init omap_gic_of_init(void)
 {
 	struct device_node *np;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cbb908d..9025fff 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -3534,9 +3534,15 @@
 
 	mpu_irqs_cnt = _count_mpu_irqs(oh);
 	for (i = 0; i < mpu_irqs_cnt; i++) {
+		unsigned int irq;
+
+		if (oh->xlate_irq)
+			irq = oh->xlate_irq((oh->mpu_irqs + i)->irq);
+		else
+			irq = (oh->mpu_irqs + i)->irq;
 		(res + r)->name = (oh->mpu_irqs + i)->name;
-		(res + r)->start = (oh->mpu_irqs + i)->irq;
-		(res + r)->end = (oh->mpu_irqs + i)->irq;
+		(res + r)->start = irq;
+		(res + r)->end = irq;
 		(res + r)->flags = IORESOURCE_IRQ;
 		r++;
 	}
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 35ca6ef..5b42faf 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -676,6 +676,7 @@
 	spinlock_t			_lock;
 	struct list_head		node;
 	struct omap_hwmod_ocp_if	*_mpu_port;
+	unsigned int			(*xlate_irq)(unsigned int);
 	u16				flags;
 	u8				mpu_rt_idx;
 	u8				response_lat;
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index c314b3c..f5e68a7 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -479,6 +479,7 @@
 	.class		= &omap44xx_dma_hwmod_class,
 	.clkdm_name	= "l3_dma_clkdm",
 	.mpu_irqs	= omap44xx_dma_system_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.main_clk	= "l3_div_ck",
 	.prcm = {
 		.omap4 = {
@@ -640,6 +641,7 @@
 	.class		= &omap44xx_dispc_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dispc_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dispc_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@@ -693,6 +695,7 @@
 	.class		= &omap44xx_dsi_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dsi1_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dsi1_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@@ -726,6 +729,7 @@
 	.class		= &omap44xx_dsi_hwmod_class,
 	.clkdm_name	= "l3_dss_clkdm",
 	.mpu_irqs	= omap44xx_dss_dsi2_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_dsi2_sdma_reqs,
 	.main_clk	= "dss_dss_clk",
 	.prcm = {
@@ -784,6 +788,7 @@
 	 */
 	.flags		= HWMOD_SWSUP_SIDLE,
 	.mpu_irqs	= omap44xx_dss_hdmi_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.sdma_reqs	= omap44xx_dss_hdmi_sdma_reqs,
 	.main_clk	= "dss_48mhz_clk",
 	.prcm = {
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index 3e95230..7c3fac0 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -288,6 +288,7 @@
 	.class		= &omap54xx_dma_hwmod_class,
 	.clkdm_name	= "dma_clkdm",
 	.mpu_irqs	= omap54xx_dma_system_irqs,
+	.xlate_irq	= omap4_xlate_irq,
 	.main_clk	= "l3_iclk_div",
 	.prcm = {
 		.omap4 = {
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index a8e4b58..6163d66 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -498,6 +498,7 @@
 	u8 nr_irqs;
 	const struct omap_prcm_irq *irqs;
 	int irq;
+	unsigned int (*xlate_irq)(unsigned int);
 	void (*read_pending_irqs)(unsigned long *events);
 	void (*ocp_barrier)(void);
 	void (*save_and_clear_irqen)(u32 *saved_mask);
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index cc170fb..408c64e 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -49,6 +49,7 @@
 	.irqs			= omap4_prcm_irqs,
 	.nr_irqs		= ARRAY_SIZE(omap4_prcm_irqs),
 	.irq			= 11 + OMAP44XX_IRQ_GIC_START,
+	.xlate_irq		= omap4_xlate_irq,
 	.read_pending_irqs	= &omap44xx_prm_read_pending_irqs,
 	.ocp_barrier		= &omap44xx_prm_ocp_barrier,
 	.save_and_clear_irqen	= &omap44xx_prm_save_and_clear_irqen,
@@ -751,8 +752,10 @@
 		}
 
 		/* Once OMAP4 DT is filled as well */
-		if (irq_num >= 0)
+		if (irq_num >= 0) {
 			omap4_prcm_irq_setup.irq = irq_num;
+			omap4_prcm_irq_setup.xlate_irq = NULL;
+		}
 	}
 
 	omap44xx_prm_enable_io_wakeup();
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 779940c..dea2833 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -187,6 +187,7 @@
  */
 void omap_prcm_irq_cleanup(void)
 {
+	unsigned int irq;
 	int i;
 
 	if (!prcm_irq_setup) {
@@ -211,7 +212,11 @@
 	kfree(prcm_irq_setup->priority_mask);
 	prcm_irq_setup->priority_mask = NULL;
 
-	irq_set_chained_handler(prcm_irq_setup->irq, NULL);
+	if (prcm_irq_setup->xlate_irq)
+		irq = prcm_irq_setup->xlate_irq(prcm_irq_setup->irq);
+	else
+		irq = prcm_irq_setup->irq;
+	irq_set_chained_handler(irq, NULL);
 
 	if (prcm_irq_setup->base_irq > 0)
 		irq_free_descs(prcm_irq_setup->base_irq,
@@ -259,6 +264,7 @@
 	int offset, i;
 	struct irq_chip_generic *gc;
 	struct irq_chip_type *ct;
+	unsigned int irq;
 
 	if (!irq_setup)
 		return -EINVAL;
@@ -298,7 +304,11 @@
 				1 << (offset & 0x1f);
 	}
 
-	irq_set_chained_handler(irq_setup->irq, omap_prcm_irq_handler);
+	if (irq_setup->xlate_irq)
+		irq = irq_setup->xlate_irq(irq_setup->irq);
+	else
+		irq = irq_setup->irq;
+	irq_set_chained_handler(irq, omap_prcm_irq_handler);
 
 	irq_setup->base_irq = irq_alloc_descs(-1, 0, irq_setup->nr_regs * 32,
 		0);
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 4f61148..7d45c84 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -54,6 +54,7 @@
 
 #include "soc.h"
 #include "common.h"
+#include "control.h"
 #include "powerdomain.h"
 #include "omap-secure.h"
 
@@ -496,7 +497,8 @@
 	void __iomem *base;
 	static struct clk *sys_clk;
 	unsigned long rate;
-	unsigned int reg, num, den;
+	unsigned int reg;
+	unsigned long long num, den;
 
 	base = ioremap(REALTIME_COUNTER_BASE, SZ_32);
 	if (!base) {
@@ -511,13 +513,42 @@
 	}
 
 	rate = clk_get_rate(sys_clk);
+
+	if (soc_is_dra7xx()) {
+		/*
+		 * Errata i856 says the 32.768KHz crystal does not start at
+		 * power on, so the CPU falls back to an emulated 32KHz clock
+		 * based on sysclk / 610 instead. This causes the master counter
+		 * frequency to not be 6.144MHz but at sysclk / 610 * 375 / 2
+		 * (OR sysclk * 75 / 244)
+		 *
+		 * This affects at least the DRA7/AM572x 1.0, 1.1 revisions.
+		 * Of course any board built without a populated 32.768KHz
+		 * crystal would also need this fix even if the CPU is fixed
+		 * later.
+		 *
+		 * Either case can be detected by using the two speedselect bits
+		 * If they are not 0, then the 32.768KHz clock driving the
+		 * coarse counter that corrects the fine counter every time it
+		 * ticks is actually rate/610 rather than 32.768KHz and we
+		 * should compensate to avoid the 570ppm (at 20MHz, much worse
+		 * at other rates) too fast system time.
+		 */
+		reg = omap_ctrl_readl(DRA7_CTRL_CORE_BOOTSTRAP);
+		if (reg & DRA7_SPEEDSELECT_MASK) {
+			num = 75;
+			den = 244;
+			goto sysclk1_based;
+		}
+	}
+
 	/* Numerator/denumerator values refer TRM Realtime Counter section */
 	switch (rate) {
-	case 1200000:
+	case 12000000:
 		num = 64;
 		den = 125;
 		break;
-	case 1300000:
+	case 13000000:
 		num = 768;
 		den = 1625;
 		break;
@@ -529,11 +560,11 @@
 		num = 192;
 		den = 625;
 		break;
-	case 2600000:
+	case 26000000:
 		num = 384;
 		den = 1625;
 		break;
-	case 2700000:
+	case 27000000:
 		num = 256;
 		den = 1125;
 		break;
@@ -545,6 +576,7 @@
 		break;
 	}
 
+sysclk1_based:
 	/* Program numerator and denumerator registers */
 	reg = readl_relaxed(base + INCREMENTER_NUMERATOR_OFFSET) &
 			NUMERATOR_DENUMERATOR_MASK;
@@ -556,7 +588,7 @@
 	reg |= den;
 	writel_relaxed(reg, base + INCREMENTER_DENUMERATOR_RELOAD_OFFSET);
 
-	arch_timer_freq = (rate / den) * num;
+	arch_timer_freq = DIV_ROUND_UP_ULL(rate * num, den);
 	set_cntfreq();
 
 	iounmap(base);
diff --git a/arch/arm/mach-omap2/twl-common.c b/arch/arm/mach-omap2/twl-common.c
index 4457e73..292eca0 100644
--- a/arch/arm/mach-omap2/twl-common.c
+++ b/arch/arm/mach-omap2/twl-common.c
@@ -66,19 +66,24 @@
 	omap_register_i2c_bus(bus, clkrate, &pmic_i2c_board_info, 1);
 }
 
+#ifdef CONFIG_ARCH_OMAP4
 void __init omap4_pmic_init(const char *pmic_type,
 		    struct twl4030_platform_data *pmic_data,
 		    struct i2c_board_info *devices, int nr_devices)
 {
 	/* PMIC part*/
+	unsigned int irq;
+
 	omap_mux_init_signal("sys_nirq1", OMAP_PIN_INPUT_PULLUP | OMAP_PIN_OFF_WAKEUPENABLE);
 	omap_mux_init_signal("fref_clk0_out.sys_drm_msecure", OMAP_PIN_OUTPUT);
-	omap_pmic_init(1, 400, pmic_type, 7 + OMAP44XX_IRQ_GIC_START, pmic_data);
+	irq = omap4_xlate_irq(7 + OMAP44XX_IRQ_GIC_START);
+	omap_pmic_init(1, 400, pmic_type, irq, pmic_data);
 
 	/* Register additional devices on i2c1 bus if needed */
 	if (devices)
 		i2c_register_board_info(1, devices, nr_devices);
 }
+#endif
 
 void __init omap_pmic_late_init(void)
 {
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d226b71..a611f48 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -19,11 +19,37 @@
 #include <linux/init.h>
 #include <linux/of_platform.h>
 #include <linux/irqchip.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/hardware/cache-l2x0.h>
 #include "core.h"
 
+#define RK3288_GRF_SOC_CON0 0x244
+
+static void __init rockchip_timer_init(void)
+{
+	if (of_machine_is_compatible("rockchip,rk3288")) {
+		struct regmap *grf;
+
+		/*
+		 * Disable auto jtag/sdmmc switching that causes issues
+		 * with the mmc controllers making them unreliable
+		 */
+		grf = syscon_regmap_lookup_by_compatible("rockchip,rk3288-grf");
+		if (!IS_ERR(grf))
+			regmap_write(grf, RK3288_GRF_SOC_CON0, 0x10000000);
+		else
+			pr_err("rockchip: could not get grf syscon\n");
+	}
+
+	of_clk_init(NULL);
+	clocksource_of_init();
+}
+
 static void __init rockchip_dt_init(void)
 {
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
@@ -42,6 +68,7 @@
 DT_MACHINE_START(ROCKCHIP_DT, "Rockchip Cortex-A9 (Device Tree)")
 	.l2c_aux_val	= 0,
 	.l2c_aux_mask	= ~0,
+	.init_time	= rockchip_timer_init,
 	.dt_compat	= rockchip_board_dt_compat,
 	.init_machine	= rockchip_dt_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c
index 79ad93d..d191cf4 100644
--- a/arch/arm/mach-shmobile/setup-r8a7740.c
+++ b/arch/arm/mach-shmobile/setup-r8a7740.c
@@ -800,7 +800,14 @@
 	void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10);
 	void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4);
 
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	void __iomem *gic_dist_base = ioremap_nocache(0xc2800000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xc2000000, 0x1000);
+
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+#else
 	irqchip_init();
+#endif
 
 	/* route signals to GIC */
 	iowrite32(0x0, pfc_inta_ctrl);
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index 170bd14..cef8895 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -576,11 +576,18 @@
 void __init r8a7778_init_irq_dt(void)
 {
 	void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000);
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	void __iomem *gic_dist_base = ioremap_nocache(0xfe438000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xfe430000, 0x1000);
+#endif
 
 	BUG_ON(!base);
 
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+#else
 	irqchip_init();
-
+#endif
 	/* route all interrupts to ARM */
 	__raw_writel(0x73ffffff, base + INT2NTSR0);
 	__raw_writel(0xffffffff, base + INT2NTSR1);
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 6156d17..27dceaf9 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -720,10 +720,17 @@
 
 void __init r8a7779_init_irq_dt(void)
 {
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000);
+	void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000);
+#endif
 	gic_arch_extn.irq_set_wake = r8a7779_set_wake;
 
+#ifdef CONFIG_ARCH_SHMOBILE_LEGACY
+	gic_init(0, 29, gic_dist_base, gic_cpu_base);
+#else
 	irqchip_init();
-
+#endif
 	/* route all interrupts to ARM */
 	__raw_writel(0xffffffff, INT2NTSR0);
 	__raw_writel(0x3fffffff, INT2NTSR1);
diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c
index 93ebe34..fb5e1bb 100644
--- a/arch/arm/mach-shmobile/setup-sh73a0.c
+++ b/arch/arm/mach-shmobile/setup-sh73a0.c
@@ -595,6 +595,7 @@
 
 static struct renesas_intc_irqpin_config irqpin0_platform_data = {
 	.irq_base = irq_pin(0), /* IRQ0 -> IRQ7 */
+	.control_parent = true,
 };
 
 static struct resource irqpin0_resources[] = {
@@ -656,6 +657,7 @@
 
 static struct renesas_intc_irqpin_config irqpin2_platform_data = {
 	.irq_base = irq_pin(16), /* IRQ16 -> IRQ23 */
+	.control_parent = true,
 };
 
 static struct resource irqpin2_resources[] = {
@@ -686,6 +688,7 @@
 
 static struct renesas_intc_irqpin_config irqpin3_platform_data = {
 	.irq_base = irq_pin(24), /* IRQ24 -> IRQ31 */
+	.control_parent = true,
 };
 
 static struct resource irqpin3_resources[] = {
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 1c43cec..0666888 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -85,6 +85,7 @@
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)$(MAKE) $(clean)=$(boot)/dts
 
 define archhelp
   echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 3b8d427..c62b0f4 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -3,6 +3,4 @@
 dts-dirs += arm
 dts-dirs += cavium
 
-always		:= $(dtb-y)
 subdir-y	:= $(dts-dirs)
-clean-files	:= *.dtb
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index cb3073e..d429129 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -22,7 +22,7 @@
 	};
 
 	chosen {
-		stdout-path = &soc_uart0;
+		stdout-path = "serial0:115200n8";
 	};
 
 	psci {
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 8127e45..3cb4c85 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -41,6 +41,18 @@
 static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
+		vcpu->arch.hcr_el2 &= ~HCR_RW;
+}
+
+static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hcr_el2;
+}
+
+static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+{
+	vcpu->arch.hcr_el2 = hcr;
 }
 
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0b7dfdb..acd101a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -116,9 +116,6 @@
 	 * Anything that is not used directly from assembly code goes
 	 * here.
 	 */
-	/* dcache set/way operation pending */
-	int last_pcpu;
-	cpumask_t require_dcache_flush;
 
 	/* Don't run the guest */
 	bool pause;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 14a74f1..adcf495 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -243,24 +243,46 @@
 	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size,
-					     bool ipa_uncached)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
 {
+	void *va = page_address(pfn_to_page(pfn));
+
 	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
-		kvm_flush_dcache_to_poc((void *)hva, size);
+		kvm_flush_dcache_to_poc(va, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */
-		flush_icache_range(hva, hva + size);
+		flush_icache_range((unsigned long)va,
+				   (unsigned long)va + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	struct page *page = pmd_page(pmd);
+	kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+	struct page *page = pud_page(pud);
+	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+}
+
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
-void stage2_flush_vm(struct kvm *kvm);
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index b780c6c..23e9432 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		387
+#define __NR_compat_syscalls		388
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 8893ceb..2722442 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -795,3 +795,5 @@
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 386
 __SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 387
+__SYSCALL(__NR_execveat, compat_sys_execveat)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index fbe909f..c3ca89c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1014,6 +1014,7 @@
 	 * Instead, we invalidate Stage-2 for this IPA, and the
 	 * whole of Stage-1. Weep...
 	 */
+	lsr	x1, x1, #12
 	tlbi	ipas2e1is, x1
 	/*
 	 * We have to ensure completion of the invalidation at Stage-2,
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 70a7816..0b43265 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -90,7 +90,6 @@
 			if (!cpu_has_32bit_el1())
 				return -EINVAL;
 			cpu_reset = &default_regs_reset32;
-			vcpu->arch.hcr_el2 &= ~HCR_RW;
 		} else {
 			cpu_reset = &default_regs_reset;
 		}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3d7c2df..f31e8bb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -69,68 +69,31 @@
 	return ccsidr;
 }
 
-static void do_dc_cisw(u32 val)
-{
-	asm volatile("dc cisw, %x0" : : "r" (val));
-	dsb(ish);
-}
-
-static void do_dc_csw(u32 val)
-{
-	asm volatile("dc csw, %x0" : : "r" (val));
-	dsb(ish);
-}
-
-/* See note at ARM ARM B1.14.4 */
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
-	unsigned long val;
-	int cpu;
-
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 
-	cpu = get_cpu();
-
-	cpumask_setall(&vcpu->arch.require_dcache_flush);
-	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-
-	/* If we were already preempted, take the long way around */
-	if (cpu != vcpu->arch.last_pcpu) {
-		flush_cache_all();
-		goto done;
-	}
-
-	val = *vcpu_reg(vcpu, p->Rt);
-
-	switch (p->CRm) {
-	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
-	case 14:		/* DCCISW */
-		do_dc_cisw(val);
-		break;
-
-	case 10:		/* DCCSW */
-		do_dc_csw(val);
-		break;
-	}
-
-done:
-	put_cpu();
-
+	kvm_set_way_flush(vcpu);
 	return true;
 }
 
 /*
  * Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set. If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
  */
 static bool access_vm_reg(struct kvm_vcpu *vcpu,
 			  const struct sys_reg_params *p,
 			  const struct sys_reg_desc *r)
 {
 	unsigned long val;
+	bool was_enabled = vcpu_has_cache_enabled(vcpu);
 
 	BUG_ON(!p->is_write);
 
@@ -143,25 +106,7 @@
 		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
 	}
 
-	return true;
-}
-
-/*
- * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- */
-static bool access_sctlr(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *p,
-			 const struct sys_reg_desc *r)
-{
-	access_vm_reg(vcpu, p, r);
-
-	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
-		vcpu->arch.hcr_el2 &= ~HCR_TVM;
-		stage2_flush_vm(vcpu->kvm);
-	}
-
+	kvm_toggle_cache(vcpu, was_enabled);
 	return true;
 }
 
@@ -377,7 +322,7 @@
 	  NULL, reset_mpidr, MPIDR_EL1 },
 	/* SCTLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
+	  access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
 	/* CPACR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
 	  NULL, reset_val, CPACR_EL1, 0 },
@@ -657,7 +602,7 @@
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index cf33f33..d54dc9a 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -15,6 +15,7 @@
  */
 #include <linux/debugfs.h>
 #include <linux/fs.h>
+#include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index bac492c..c95464a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -335,14 +335,8 @@
 
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (!keep_initrd) {
-		if (start == initrd_start)
-			start = round_down(start, PAGE_SIZE);
-		if (end == initrd_end)
-			end = round_up(end, PAGE_SIZE);
-
+	if (!keep_initrd)
 		free_reserved_area((void *)start, (void *)end, 0, "initrd");
-	}
 }
 
 static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c
index 2c94129..164efa0 100644
--- a/arch/avr32/kernel/module.c
+++ b/arch/avr32/kernel/module.c
@@ -19,12 +19,10 @@
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
 	vfree(mod->arch.syminfo);
 	mod->arch.syminfo = NULL;
-
-	vfree(module_region);
 }
 
 static inline int check_rela(Elf32_Rela *rela, struct module *module,
@@ -291,12 +289,3 @@
 
 	return ret;
 }
-
-int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-		    struct module *module)
-{
-	vfree(module->arch.syminfo);
-	module->arch.syminfo = NULL;
-
-	return 0;
-}
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 0eca933..d223a8b 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -142,6 +142,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 08a313f..f772068 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -604,7 +604,7 @@
 				  struct timespec *ts)
 {
 	unsigned long flags;
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	int avail;
 	struct sync_port *port;
 	unsigned char *start;
diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c
index 51123f9..af04cb6 100644
--- a/arch/cris/kernel/module.c
+++ b/arch/cris/kernel/module.c
@@ -36,7 +36,7 @@
 }
 
 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	kfree(module_region);
 }
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 1790f22..2686a7a 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -176,6 +176,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 67b1d16..0635bd6 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -94,7 +94,7 @@
 				r = &dev->resource[idx];
 				if (!r->start)
 					continue;
-				pci_claim_resource(dev, idx);
+				pci_claim_bridge_resource(dev, idx);
 			}
 		}
 		pcibios_allocate_bus_resources(&bus->children);
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index 9a66372..ec4917d 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -168,6 +168,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 24603be..29754aa 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -305,14 +305,12 @@
 #endif /* !USE_BRL */
 
 void
-module_free (struct module *mod, void *module_region)
+module_arch_freeing_init (struct module *mod)
 {
-	if (mod && mod->arch.init_unw_table &&
-	    module_region == mod->module_init) {
+	if (mod->arch.init_unw_table) {
 		unw_remove_unwind_table(mod->arch.init_unw_table);
 		mod->arch.init_unw_table = NULL;
 	}
-	vfree(module_region);
 }
 
 /* Have we already seen one of these relocations? */
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 7225dad..ba5ba7a 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -172,6 +172,8 @@
 		 */
 		if (fault & VM_FAULT_OOM) {
 			goto out_of_memory;
+		} else if (fault & VM_FAULT_SIGSEGV) {
+			goto bad_area;
 		} else if (fault & VM_FAULT_SIGBUS) {
 			signal = SIGBUS;
 			goto bad_area;
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 291a582..900cc93 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -487,45 +487,39 @@
 	return 0;
 }
 
-static int is_valid_resource(struct pci_dev *dev, int idx)
-{
-	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx], *busr;
-
-	if (!dev->bus)
-		return 0;
-
-	pci_bus_for_each_resource(dev->bus, busr, i) {
-		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
-			continue;
-		if ((devr->start) && (devr->start >= busr->start) &&
-				(devr->end <= busr->end))
-			return 1;
-	}
-	return 0;
-}
-
-static void pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
-{
-	int i;
-
-	for (i = start; i < limit; i++) {
-		if (!dev->resource[i].flags)
-			continue;
-		if ((is_valid_resource(dev, i)))
-			pci_claim_resource(dev, i);
-	}
-}
-
 void pcibios_fixup_device_resources(struct pci_dev *dev)
 {
-	pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_resource(dev, idx);
+	}
 }
 EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources);
 
 static void pcibios_fixup_bridge_resources(struct pci_dev *dev)
 {
-	pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_bridge_resource(dev, idx);
+	}
 }
 
 /*
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index e9c6a80..e3d4d48901 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -200,6 +200,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 75e75d7..244e0db 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		355
+#define NR_syscalls		356
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 2c1bec9..61fb6cb 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -360,5 +360,6 @@
 #define __NR_getrandom		352
 #define __NR_memfd_create	353
 #define __NR_bpf		354
+#define __NR_execveat		355
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 2ca219e..a0ec430 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -375,4 +375,5 @@
 	.long sys_getrandom
 	.long sys_memfd_create
 	.long sys_bpf
+	.long sys_execveat		/* 355 */
 
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 2bd7487..b2f04ae 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -145,6 +145,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto map_err;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto bus_err;
 		BUG();
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index 332680e..2de5dc6 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -141,6 +141,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index fa4cf52..d46a5eb 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -224,6 +224,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index b30e41c..48528fb 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -1026,6 +1026,8 @@
 			 pr, (pr && pr->name) ? pr->name : "nil");
 
 		if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+			struct pci_dev *dev = bus->self;
+
 			if (request_resource(pr, res) == 0)
 				continue;
 			/*
@@ -1035,6 +1037,12 @@
 			 */
 			if (reparent_resources(pr, res) == 0)
 				continue;
+
+			if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+			    pci_claim_bridge_resource(dev,
+						 i + PCI_BRIDGE_RESOURCES) == 0)
+				continue;
+
 		}
 		pr_warn("PCI: Cannot allocate resource region ");
 		pr_cont("%d of PCI bridge %d, will remap\n", i, bus->number);
@@ -1227,7 +1235,10 @@
 				 (unsigned long long)r->end,
 				 (unsigned int)r->flags);
 
-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}
 
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index becc42b..70ab5d6 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -158,6 +158,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 9fd6834..5d61393 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1388,7 +1388,7 @@
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);
 
 	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 3516cbd..0c2cc5d 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -262,6 +262,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index febb9cd..b5b036f 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -106,7 +106,7 @@
 				if (!r->flags)
 					continue;
 				if (!r->start ||
-				    pci_claim_resource(dev, idx) < 0) {
+				    pci_claim_bridge_resource(dev, idx) < 0) {
 					printk(KERN_ERR "PCI:"
 					       " Cannot allocate resource"
 					       " region %d of bridge %s\n",
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
index 6b4339f..471ff39 100644
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -281,42 +281,37 @@
 	return -ENODEV;
 }
 
-static int is_valid_resource(struct pci_dev *dev, int idx)
-{
-	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx], *busr;
-
-	if (dev->bus) {
-		pci_bus_for_each_resource(dev->bus, busr, i) {
-			if (!busr || (busr->flags ^ devr->flags) & type_mask)
-				continue;
-
-			if (devr->start &&
-			    devr->start >= busr->start &&
-			    devr->end <= busr->end)
-				return 1;
-		}
-	}
-
-	return 0;
-}
-
 static void pcibios_fixup_device_resources(struct pci_dev *dev)
 {
-	int limit, i;
+	int idx;
 
-	if (dev->bus->number != 0)
+	if (!dev->bus)
 		return;
 
-	limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ?
-		PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES;
+	for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
 
-	for (i = 0; i < limit; i++) {
-		if (!dev->resource[i].flags)
+		if (!r->flags || r->parent || !r->start)
 			continue;
 
-		if (is_valid_resource(dev, i))
-			pci_claim_resource(dev, i);
+		pci_claim_resource(dev, idx);
+	}
+}
+
+static void pcibios_fixup_bridge_resources(struct pci_dev *dev)
+{
+	int idx;
+
+	if (!dev->bus)
+		return;
+
+	for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+		struct resource *r = &dev->resource[idx];
+
+		if (!r->flags || r->parent || !r->start)
+			continue;
+
+		pci_claim_bridge_resource(dev, idx);
 	}
 }
 
@@ -330,7 +325,7 @@
 
 	if (bus->self) {
 		pci_read_bridge_bases(bus);
-		pcibios_fixup_device_resources(bus->self);
+		pcibios_fixup_bridge_resources(bus->self);
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list)
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index cc924a3..e2e3f13 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -36,7 +36,7 @@
 }
 
 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	kfree(module_region);
 }
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index f9d2788..2d0ea25 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -200,7 +200,7 @@
 
 	/* Set up to return from userspace; jump to fixed address sigreturn
 	   trampoline on kuser page.  */
-	regs->ra = (unsigned long) (0x1040);
+	regs->ra = (unsigned long) (0x1044);
 
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 15a0bb5..34429d5 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -135,6 +135,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 0703acf..230ac20 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -171,6 +171,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 50dfafc..5822e8e 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -298,14 +298,10 @@
 }
 #endif
 
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
 	kfree(mod->arch.section);
 	mod->arch.section = NULL;
-
-	vfree(module_region);
 }
 
 /* Additional bytes needed in front of individual sections */
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 3ca9c11..e5120e6 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -256,6 +256,8 @@
 		 */
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto bad_area;
 		BUG();
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
index d3feba5a2..c154ceb 100644
--- a/arch/powerpc/crypto/sha1.c
+++ b/arch/powerpc/crypto/sha1.c
@@ -154,4 +154,5 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
 
+MODULE_ALIAS_CRYPTO("sha1");
 MODULE_ALIAS_CRYPTO("sha1-powerpc");
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index ebc4f16..0be6c68 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -23,9 +23,9 @@
 #define THREAD_SIZE		(1 << THREAD_SHIFT)
 
 #ifdef CONFIG_PPC64
-#define CURRENT_THREAD_INFO(dest, sp)	clrrdi dest, sp, THREAD_SHIFT
+#define CURRENT_THREAD_INFO(dest, sp)	stringify_in_c(clrrdi dest, sp, THREAD_SHIFT)
 #else
-#define CURRENT_THREAD_INFO(dest, sp)	rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT
+#define CURRENT_THREAD_INFO(dest, sp)	stringify_in_c(rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT)
 #endif
 
 #ifndef __ASSEMBLY__
@@ -71,12 +71,13 @@
 #define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
 
 /* how to get the thread information struct from C */
-register unsigned long __current_r1 asm("r1");
 static inline struct thread_info *current_thread_info(void)
 {
-	/* gcc4, at least, is smart enough to turn this into a single
-	 * rlwinm for ppc32 and clrrdi for ppc64 */
-	return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1));
+	unsigned long val;
+
+	asm (CURRENT_THREAD_INFO(%0,1) : "=r" (val));
+
+	return (struct thread_info *)val;
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 37d512d..2a525c9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1184,6 +1184,8 @@
 			 pr, (pr && pr->name) ? pr->name : "nil");
 
 		if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+			struct pci_dev *dev = bus->self;
+
 			if (request_resource(pr, res) == 0)
 				continue;
 			/*
@@ -1193,6 +1195,11 @@
 			 */
 			if (reparent_resources(pr, res) == 0)
 				continue;
+
+			if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+			    pci_claim_bridge_resource(dev,
+						i + PCI_BRIDGE_RESOURCES) == 0)
+				continue;
 		}
 		pr_warning("PCI: Cannot allocate resource region "
 			   "%d of PCI bridge %d, will remap\n", i, bus->number);
@@ -1401,7 +1408,10 @@
 				 (unsigned long long)r->end,
 				 (unsigned int)r->flags);
 
-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}
 
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 5a236f0..1b5305d 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -76,7 +76,7 @@
 		if (*flt & VM_FAULT_OOM) {
 			ret = -ENOMEM;
 			goto out_unlock;
-		} else if (*flt & VM_FAULT_SIGBUS) {
+		} else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
 			ret = -EFAULT;
 			goto out_unlock;
 		}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index eb79907..6154b0a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -437,6 +437,8 @@
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
 	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+		if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		rc = mm_fault_error(regs, address, fault);
 		if (rc >= MM_FAULT_RETURN)
 			goto bail;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 1ca125b..d1916b5 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -699,7 +699,7 @@
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);
 
 	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 54eca8b..0509bca 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -40,7 +40,6 @@
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
 	ld	r12,opal_tracepoint_refcount@toc(r2);		\
-	std	r12,32(r1);					\
 	cmpdi	r12,0;						\
 	bne-	LABEL;						\
 1:
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index b700a32..d2de7d5 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -304,7 +304,7 @@
 	 * all cpus at boot. Get these reg values of current cpu and use the
 	 * same accross all cpus.
 	 */
-	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
 	uint64_t hid0_val = mfspr(SPRN_HID0);
 	uint64_t hid1_val = mfspr(SPRN_HID1);
 	uint64_t hid4_val = mfspr(SPRN_HID4);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 5b150f0..13c6e20 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -337,6 +337,7 @@
 	args.token = rtas_token("set-indicator");
 	if (args.token == RTAS_UNKNOWN_SERVICE)
 		return;
+	args.token = cpu_to_be32(args.token);
 	args.nargs = cpu_to_be32(3);
 	args.nret = cpu_to_be32(1);
 	args.rets = &args.args[3];
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 32040ac..afbe079 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -231,7 +231,7 @@
 struct dbfs_d2fc_hdr {
 	u64	len;		/* Length of d2fc buffer without header */
 	u16	version;	/* Version of header */
-	char	tod_ext[16];	/* TOD clock for d2fc */
+	char	tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */
 	u64	count;		/* Number of VM guests in d2fc buffer */
 	char	reserved[30];
 } __attribute__ ((packed));
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 37b9091..16aa0c7 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -36,7 +36,7 @@
 
 static inline notrace unsigned long arch_local_save_flags(void)
 {
-	return __arch_local_irq_stosm(0x00);
+	return __arch_local_irq_stnsm(0xff);
 }
 
 static inline notrace unsigned long arch_local_irq_save(void)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 8beee1c..98eb2a5 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -67,20 +67,22 @@
 	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+#define CLOCK_TICK_RATE		1193180 /* Underlying HZ */
+#define STORE_CLOCK_EXT_SIZE	16	/* stcke writes 16 bytes */
 
 typedef unsigned long long cycles_t;
 
-static inline void get_tod_clock_ext(char clk[16])
+static inline void get_tod_clock_ext(char *clk)
 {
-	typedef struct { char _[sizeof(clk)]; } addrtype;
+	typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype;
 
 	asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc");
 }
 
 static inline unsigned long long get_tod_clock(void)
 {
-	unsigned char clk[16];
+	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+
 	get_tod_clock_ext(clk);
 	return *((unsigned long long *)&clk[1]);
 }
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 2b446cf..67878af 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -289,7 +289,8 @@
 #define __NR_bpf		351
 #define __NR_s390_pci_mmio_write	352
 #define __NR_s390_pci_mmio_read		353
-#define NR_syscalls 354
+#define __NR_execveat		354
+#define NR_syscalls 355
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index b89b591..409d152 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -55,14 +55,10 @@
 }
 #endif
 
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_arch_freeing_init(struct module *mod)
 {
-	if (mod) {
-		vfree(mod->arch.syminfo);
-		mod->arch.syminfo = NULL;
-	}
-	vfree(module_region);
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
 }
 
 static void check_rela(Elf_Rela *rela, struct module *me)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index a298724..939ec47 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -362,3 +362,4 @@
 SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
 SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
 SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
+SYSCALL(sys_execveat,sys_execveat,compat_sys_execveat)
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index f6b3cd0..cc73280 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -48,6 +48,30 @@
 	return false;
 }
 
+static int check_per_event(unsigned short cause, unsigned long control,
+			   struct pt_regs *regs)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return 0;
+	/* user space single step */
+	if (control == 0)
+		return 1;
+	/* over indication for storage alteration */
+	if ((control & 0x20200000) && (cause & 0x2000))
+		return 1;
+	if (cause & 0x8000) {
+		/* all branches */
+		if ((control & 0x80800000) == 0x80000000)
+			return 1;
+		/* branch into selected range */
+		if (((control & 0x80800000) == 0x80800000) &&
+		    regs->psw.addr >= current->thread.per_user.start &&
+		    regs->psw.addr <= current->thread.per_user.end)
+			return 1;
+	}
+	return 0;
+}
+
 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	int fixup = probe_get_fixup_type(auprobe->insn);
@@ -71,9 +95,13 @@
 		if (regs->psw.addr - utask->xol_vaddr == ilen)
 			regs->psw.addr = utask->vaddr + ilen;
 	}
-	/* If per tracing was active generate trap */
-	if (regs->psw.mask & PSW_MASK_PER)
-		do_per_trap(regs);
+	if (check_per_event(current->thread.per_event.cause,
+			    current->thread.per_user.control, regs)) {
+		/* fix per address */
+		current->thread.per_event.address = utask->vaddr;
+		/* trigger per event */
+		set_pt_regs_flag(regs, PIF_PER_TRAP);
+	}
 	return 0;
 }
 
@@ -106,6 +134,7 @@
 	clear_thread_flag(TIF_UPROBE_SINGLESTEP);
 	regs->int_code = auprobe->saved_int_code;
 	regs->psw.addr = current->utask->vaddr;
+	current->thread.per_event.address = current->utask->vaddr;
 }
 
 unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
@@ -146,17 +175,20 @@
 	__rc;						\
 })
 
-#define emu_store_ril(ptr, input)			\
+#define emu_store_ril(regs, ptr, input)			\
 ({							\
 	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(ptr) __ptr = (ptr);			\
 	int __rc = 0;					\
 							\
 	if (!test_facility(34))				\
 		__rc = EMU_ILLEGAL_OP;			\
-	else if ((u64 __force)ptr & mask)		\
+	else if ((u64 __force)__ptr & mask)		\
 		__rc = EMU_SPECIFICATION;		\
-	else if (put_user(*(input), ptr))		\
+	else if (put_user(*(input), __ptr))		\
 		__rc = EMU_ADDRESSING;			\
+	if (__rc == 0)					\
+		sim_stor_event(regs, __ptr, mask + 1);	\
 	__rc;						\
 })
 
@@ -198,6 +230,25 @@
 };
 
 /*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return;
+	if (!(current->thread.per_user.control & PER_EVENT_STORE))
+		return;
+	if ((void *)current->thread.per_user.start > (addr + len))
+		return;
+	if ((void *)current->thread.per_user.end < addr)
+		return;
+	current->thread.per_event.address = regs->psw.addr;
+	current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+	set_pt_regs_flag(regs, PIF_PER_TRAP);
+}
+
+/*
  * pc relative instructions are emulated, since parameters may not be
  * accessible from the xol area due to range limitations.
  */
@@ -249,13 +300,13 @@
 			rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
 			break;
 		case 0x07: /* sthrl */
-			rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]);
+			rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
 			break;
 		case 0x0b: /* stgrl */
-			rc = emu_store_ril((u64 __user *)uptr, &rx->u64);
+			rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
 			break;
 		case 0x0f: /* strl */
-			rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]);
+			rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
 			break;
 		}
 		break;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 7f0089d..e34122e5 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -128,8 +128,6 @@
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
 
-	WARN_ON_ONCE(!irqs_disabled());
-
 	timer = S390_lowcore.last_update_timer;
 	S390_lowcore.last_update_timer = get_vtimer();
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 811937b..9065d5a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -374,6 +374,12 @@
 				do_no_context(regs);
 			else
 				pagefault_out_of_memory();
+		} else if (fault & VM_FAULT_SIGSEGV) {
+			/* Kernel mode? Handle exceptions or die */
+			if (!user_mode(regs))
+				do_no_context(regs);
+			else
+				do_sigsegv(regs, SEGV_MAPERR);
 		} else if (fault & VM_FAULT_SIGBUS) {
 			/* Kernel mode? Handle exceptions or die */
 			if (!user_mode(regs))
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index be99357..3cf8cc0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -322,11 +322,12 @@
 static unsigned long __gmap_segment_gaddr(unsigned long *entry)
 {
 	struct page *page;
-	unsigned long offset;
+	unsigned long offset, mask;
 
 	offset = (unsigned long) entry / sizeof(unsigned long);
 	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
-	page = pmd_to_page((pmd_t *) entry);
+	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+	page = virt_to_page((void *)((unsigned long) entry & mask));
 	return page->index + offset;
 }
 
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index 7e45d13..ba44c9f 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -22,8 +22,8 @@
  * skb_copy_bits takes 4 parameters:
  *   %r2 = skb pointer
  *   %r3 = offset into skb data
- *   %r4 = length to copy
- *   %r5 = pointer to temp buffer
+ *   %r4 = pointer to temp buffer
+ *   %r5 = length to copy
  */
 #define SKBDATA	%r8
 
@@ -44,8 +44,9 @@
 
 sk_load_word_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,4			# 4 bytes
-	la	%r5,160(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,160(%r15)		# pointer to temp buffer
+	lghi	%r5,4			# 4 bytes
 	brasl	%r14,skb_copy_bits	# get data from skb
 	l	%r5,160(%r15)		# load result from temp buffer
 	ltgr	%r2,%r2			# set cc to (%r2 != 0)
@@ -69,8 +70,9 @@
 
 sk_load_half_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,2			# 2 bytes
-	la	%r5,162(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,162(%r15)		# pointer to temp buffer
+	lghi	%r5,2			# 2 bytes
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(2,%r15),160(%r15)
 	l	%r5,160(%r15)		# load result from temp buffer
@@ -95,8 +97,9 @@
 
 sk_load_byte_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,1			# 1 bytes
-	la	%r5,163(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,163(%r15)		# pointer to temp buffer
+	lghi	%r5,1			# 1 byte
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(3,%r15),160(%r15)
 	l	%r5,160(%r15)		# load result from temp buffer
@@ -104,11 +107,11 @@
 	lgr	%r2,%r9			# restore %r2
 	br	%r8
 
-	/* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+	/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
 ENTRY(sk_load_byte_msh)
 	llgfr	%r1,%r3			# extend offset
 	clr	%r11,%r3		# hlen < offset ?
-	jle	sk_load_byte_slow
+	jle	sk_load_byte_msh_slow
 	lhi	%r12,0
 	ic	%r12,0(%r1,%r10)	# get byte from skb
 	nill	%r12,0x0f
@@ -118,8 +121,9 @@
 
 sk_load_byte_msh_slow:
 	lgr	%r9,%r2			# save %r2
-	lhi	%r4,2			# 2 bytes
-	la	%r5,162(%r15)		# pointer to temp buffer
+	lgr	%r3,%r1			# offset
+	la	%r4,163(%r15)		# pointer to temp buffer
+	lghi	%r5,1			# 1 byte
 	brasl	%r14,skb_copy_bits	# get data from skb
 	xc	160(3,%r15),160(%r15)
 	l	%r12,160(%r15)		# load result from temp buffer
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index c52ac77..bbd1981 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -431,8 +431,8 @@
 		EMIT4_DISP(0x88500000, K);
 		break;
 	case BPF_ALU | BPF_NEG: /* A = -A */
-		/* lnr %r5,%r5 */
-		EMIT2(0x1155);
+		/* lcr %r5,%r5 */
+		EMIT2(0x1355);
 		break;
 	case BPF_JMP | BPF_JA: /* ip += K */
 		offset = addrs[i + K] + jit->start - jit->prg;
@@ -448,15 +448,12 @@
 		mask = 0x800000; /* je */
 kbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
-			if (K <= 16383)
-				/* chi %r5,<K> */
-				EMIT4_IMM(0xa75e0000, K);
-			else if (test_facility(21))
+			if (test_facility(21))
 				/* clfi %r5,<K> */
 				EMIT6_IMM(0xc25f0000, K);
 			else
-				/* c %r5,<d(K)>(%r13) */
-				EMIT4_DISP(0x5950d000, EMIT_CONST(K));
+				/* cl %r5,<d(K)>(%r13) */
+				EMIT4_DISP(0x5550d000, EMIT_CONST(K));
 		}
 branch:		if (filter->jt == filter->jf) {
 			if (filter->jt == 0)
@@ -502,8 +499,8 @@
 xbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
 			jit->seen |= SEEN_XREG;
-			/* cr %r5,%r12 */
-			EMIT2(0x195c);
+			/* clr %r5,%r12 */
+			EMIT2(0x155c);
 		}
 		goto branch;
 	case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c
index 52238983..6860beb 100644
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -114,6 +114,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 541dc61..a58fec9 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -353,6 +353,8 @@
 	} else {
 		if (fault & VM_FAULT_SIGBUS)
 			do_sigbus(regs, error_code, address);
+		else if (fault & VM_FAULT_SIGSEGV)
+			bad_area(regs, error_code, address);
 		else
 			BUG();
 	}
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index b36365f..9ce5afe 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -639,7 +639,10 @@
 				       (unsigned long long)r->end,
 				       (unsigned int)r->flags);
 
-			pci_claim_resource(dev, i);
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
 		}
 	}
 
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 908e8c1..70d8171 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -249,6 +249,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 18fcd71..4798232 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -446,6 +446,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f33e7c7..7931eee 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -776,7 +776,7 @@
 				if (unlikely(proglen + ilen > oldproglen)) {
 					pr_err("bpb_jit_compile fatal error\n");
 					kfree(addrs);
-					module_free(NULL, image);
+					module_memfree(image);
 					return;
 				}
 				memcpy(image + proglen, temp, ilen);
@@ -822,7 +822,7 @@
 void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
-		module_free(NULL, fp->bpf_func);
+		module_memfree(fp->bpf_func);
 
 	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 96447c9..2305084 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -74,7 +74,7 @@
 
 
 /* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+void module_memfree(void *module_region)
 {
 	vfree(module_region);
 
@@ -83,7 +83,7 @@
 		     0, 0, 0, NULL, NULL, 0);
 
 	/*
-	 * FIXME: If module_region == mod->module_init, trim exception
+	 * FIXME: Add module_arch_freeing_init to trim exception
 	 * table entries.
 	 */
 }
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 565e25a..0f61a73 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -442,6 +442,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 5678c35..2096173 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -80,6 +80,8 @@
 		if (unlikely(fault & VM_FAULT_ERROR)) {
 			if (fault & VM_FAULT_OOM) {
 				goto out_of_memory;
+			} else if (fault & VM_FAULT_SIGSEGV) {
+				goto out;
 			} else if (fault & VM_FAULT_SIGBUS) {
 				err = -EACCES;
 				goto out;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ba397bd..0dc9d01 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -857,7 +857,7 @@
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
-	depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
+	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
 	---help---
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  integrated interrupt controller in the CPU. If you have a single-CPU
@@ -868,6 +868,10 @@
 	  performance counters), and the NMI watchdog which detects hard
 	  lockups.
 
+config X86_UP_APIC_MSI
+	def_bool y
+	select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
+
 config X86_UP_IOAPIC
 	bool "IO-APIC support on uniprocessors"
 	depends on X86_UP_APIC
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index d999398..ad754b4 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -90,7 +90,7 @@
 suffix-$(CONFIG_KERNEL_LZ4) 	:= lz4
 
 RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
-	     perl $(srctree)/arch/x86/tools/calc_run_size.pl)
+	     $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
 quiet_cmd_mkpiggy = MKPIGGY $@
       cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
 
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dcc1c53..a950864 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -373,6 +373,8 @@
 				  unsigned long output_len,
 				  unsigned long run_size)
 {
+	unsigned char *output_orig = output;
+
 	real_mode = rmode;
 
 	sanitize_boot_params(real_mode);
@@ -421,7 +423,12 @@
 	debug_putstr("\nDecompressing Linux... ");
 	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
 	parse_elf(output);
-	handle_relocations(output, output_len);
+	/*
+	 * 32-bit always performs relocations. 64-bit relocations are only
+	 * needed if kASLR has chosen a different load address.
+	 */
+	if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
+		handle_relocations(output, output_len);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a225a5ca..fd9f6b0 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -931,4 +931,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
 
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0ab4f9f..3a45668 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -50,6 +50,7 @@
 
 extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
 				  int trigger, int polarity);
+extern void (*__acpi_unregister_gsi)(u32 gsi);
 
 static inline void disable_acpi(void)
 {
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 50d033a..a94b82e 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -251,7 +251,8 @@
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 }
 
-#define _LDT_empty(info)				\
+/* This intentionally ignores lm, since 32-bit apps don't have that field. */
+#define LDT_empty(info)					\
 	((info)->base_addr		== 0	&&	\
 	 (info)->limit			== 0	&&	\
 	 (info)->contents		== 0	&&	\
@@ -261,11 +262,18 @@
 	 (info)->seg_not_present	== 1	&&	\
 	 (info)->useable		== 0)
 
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
+/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
+static inline bool LDT_zero(const struct user_desc *info)
+{
+	return (info->base_addr		== 0 &&
+		info->limit		== 0 &&
+		info->contents		== 0 &&
+		info->read_exec_only	== 0 &&
+		info->seg_32bit		== 0 &&
+		info->limit_in_pages	== 0 &&
+		info->seg_not_present	== 0 &&
+		info->useable		== 0);
+}
 
 static inline void clear_LDT(void)
 {
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 40269a2..4b75d59 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -130,7 +130,25 @@
 static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 			      unsigned long start, unsigned long end)
 {
-	mpx_notify_unmap(mm, vma, start, end);
+	/*
+	 * mpx_notify_unmap() goes and reads a rarely-hot
+	 * cacheline in the mm_struct.  That can be expensive
+	 * enough to be seen in profiles.
+	 *
+	 * The mpx_notify_unmap() call and its contents have been
+	 * observed to affect munmap() performance on hardware
+	 * where MPX is not present.
+	 *
+	 * The unlikely() optimizes for the fast case: no MPX
+	 * in the CPU, or no MPX use in the process.  Even if
+	 * we get this wrong (in the unlikely event that MPX
+	 * is widely enabled on some system) the overhead of
+	 * MPX itself (reading bounds tables) is expected to
+	 * overwhelm the overhead of getting this unlikely()
+	 * consistently wrong.
+	 */
+	if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+		mpx_notify_unmap(mm, vma, start, end);
 }
 
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d162636..b9e30da 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -611,20 +611,20 @@
 
 int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 {
-	int irq;
+	int rc, irq, trigger, polarity;
 
-	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-		*irqp = gsi;
-	} else {
-		mutex_lock(&acpi_ioapic_lock);
-		irq = mp_map_gsi_to_irq(gsi,
-					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
-		mutex_unlock(&acpi_ioapic_lock);
-		if (irq < 0)
-			return -1;
-		*irqp = irq;
+	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+	if (rc == 0) {
+		trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+		irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
+		if (irq >= 0) {
+			*irqp = irq;
+			return 0;
+		}
 	}
-	return 0;
+
+	return -1;
 }
 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a450373..939155f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -107,6 +107,7 @@
 	.rating		= 400, /* use this when running on Hyperv*/
 	.read		= read_hv_clock,
 	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static void __init ms_hyperv_init_platform(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 944bf01..498b6d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2431,6 +2431,7 @@
 		break;
 
 	case 55: /* 22nm Atom "Silvermont"                */
+	case 76: /* 14nm Atom "Airmont"                   */
 	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
 		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
 			sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3c895d4..0739833 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -568,8 +568,8 @@
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
 	/* Allow all events as PEBS with no flags */
 	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 	EVENT_CONSTRAINT_END
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 673f930..c4bb8b8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -103,6 +103,13 @@
 
 #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
 
+#define RAPL_EVENT_ATTR_STR(_name, v, str)				\
+static struct perf_pmu_events_attr event_attr_##v = {			\
+	.attr		= __ATTR(_name, 0444, rapl_sysfs_show, NULL),	\
+	.id		= 0,						\
+	.event_str	= str,						\
+};
+
 struct rapl_pmu {
 	spinlock_t	 lock;
 	int		 hw_unit;  /* 1/2^hw_unit Joule */
@@ -135,7 +142,7 @@
 	 * or use ldexp(count, -32).
 	 * Watts = Joules/Time delta
 	 */
-	return v << (32 - __this_cpu_read(rapl_pmu->hw_unit));
+	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
 }
 
 static u64 rapl_event_update(struct perf_event *event)
@@ -379,23 +386,36 @@
 	.attrs = rapl_pmu_attrs,
 };
 
-EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
-EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
-EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+static ssize_t rapl_sysfs_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr = \
+		container_of(attr, struct perf_pmu_events_attr, attr);
 
-EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
-EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
-EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+	if (pmu_attr->event_str)
+		return sprintf(page, "%s", pmu_attr->event_str);
+
+	return 0;
+}
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
 
 /*
  * we compute in 0.23 nJ increments regardless of MSR
  */
-EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
 
 static struct attribute *rapl_events_srv_attr[] = {
 	EVENT_PTR(rapl_cores),
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 10b8d3e..c635b8b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -840,7 +840,6 @@
 	box->phys_id = phys_id;
 	box->pci_dev = pdev;
 	box->pmu = pmu;
-	uncore_box_init(box);
 	pci_set_drvdata(pdev, box);
 
 	raw_spin_lock(&uncore_box_lock);
@@ -1004,10 +1003,8 @@
 			pmu = &type->pmus[j];
 			box = *per_cpu_ptr(pmu->box, cpu);
 			/* called by uncore_cpu_init? */
-			if (box && box->phys_id >= 0) {
-				uncore_box_init(box);
+			if (box && box->phys_id >= 0)
 				continue;
-			}
 
 			for_each_online_cpu(k) {
 				exist = *per_cpu_ptr(pmu->box, k);
@@ -1023,10 +1020,8 @@
 				}
 			}
 
-			if (box) {
+			if (box)
 				box->phys_id = phys_id;
-				uncore_box_init(box);
-			}
 		}
 	}
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 863d9b0..6c8c1e7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -257,6 +257,14 @@
 	return box->pmu->type->num_counters;
 }
 
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->init_box)
+			box->pmu->type->ops->init_box(box);
+	}
+}
+
 static inline void uncore_disable_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->type->ops->disable_box)
@@ -265,6 +273,8 @@
 
 static inline void uncore_enable_box(struct intel_uncore_box *box)
 {
+	uncore_box_init(box);
+
 	if (box->pmu->type->ops->enable_box)
 		box->pmu->type->ops->enable_box(box);
 }
@@ -287,14 +297,6 @@
 	return box->pmu->type->ops->read_counter(box, event);
 }
 
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
-	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
-		if (box->pmu->type->ops->init_box)
-			box->pmu->type->ops->init_box(box);
-	}
-}
-
 static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
 {
 	return (box->phys_id < 0);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 2142376..8b7b0a5 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -674,7 +674,7 @@
 }
 static inline void tramp_free(void *tramp)
 {
-	module_free(NULL, tramp);
+	module_memfree(tramp);
 }
 #else
 /* Trampolines can only be created if modules are supported */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6307a0f..705ef8d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -127,7 +127,7 @@
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
-	seq_printf(p, "%*s: ", prec, "THR");
+	seq_printf(p, "%*s: ", prec, "HYP");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
 	seq_puts(p, "  Hypervisor callback interrupts\n");
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f7e3cd5..98f654d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1020,6 +1020,15 @@
 	regs->flags &= ~X86_EFLAGS_IF;
 	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
+
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer to get messed up.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -1048,24 +1057,25 @@
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->ip - 1);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	void *saved_sp = kcb->jprobe_saved_sp;
 
 	if ((addr > (u8 *) jprobe_return) &&
 	    (addr < (u8 *) jprobe_return_end)) {
-		if (stack_addr(regs) != kcb->jprobe_saved_sp) {
+		if (stack_addr(regs) != saved_sp) {
 			struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
 			printk(KERN_ERR
 			       "current sp %p does not match saved sp %p\n",
-			       stack_addr(regs), kcb->jprobe_saved_sp);
+			       stack_addr(regs), saved_sp);
 			printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
 			show_regs(saved_regs);
 			printk(KERN_ERR "Current registers\n");
 			show_regs(regs);
 			BUG();
 		}
+		/* It's OK to start function graph tracing again */
+		unpause_graph_tracing();
 		*regs = kcb->jprobe_saved_regs;
-		memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
-		       kcb->jprobes_stack,
-		       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
+		memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
 		preempt_enable_no_resched();
 		return 1;
 	}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 4e942f3..7fc5e84 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -29,7 +29,28 @@
 
 static bool tls_desc_okay(const struct user_desc *info)
 {
-	if (LDT_empty(info))
+	/*
+	 * For historical reasons (i.e. no one ever documented how any
+	 * of the segmentation APIs work), user programs can and do
+	 * assume that a struct user_desc that's all zeros except for
+	 * entry_number means "no segment at all".  This never actually
+	 * worked.  In fact, up to Linux 3.19, a struct user_desc like
+	 * this would create a 16-bit read-write segment with base and
+	 * limit both equal to zero.
+	 *
+	 * That was close enough to "no segment at all" until we
+	 * hardened this function to disallow 16-bit TLS segments.  Fix
+	 * it up by interpreting these zeroed segments the way that they
+	 * were almost certainly intended to be interpreted.
+	 *
+	 * The correct way to ask for "no segment at all" is to specify
+	 * a user_desc that satisfies LDT_empty.  To keep everything
+	 * working, we accept both.
+	 *
+	 * Note that there's a similar kludge in modify_ldt -- look at
+	 * the distinction between modes 1 and 0x11.
+	 */
+	if (LDT_empty(info) || LDT_zero(info))
 		return true;
 
 	/*
@@ -71,7 +92,7 @@
 	cpu = get_cpu();
 
 	while (n-- > 0) {
-		if (LDT_empty(info))
+		if (LDT_empty(info) || LDT_zero(info))
 			desc->a = desc->b = 0;
 		else
 			fill_ldt(desc, info);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b7e50bb..5054497 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -617,7 +617,7 @@
 			goto success;
 		}
 	}
-	pr_err("Fast TSC calibration failed\n");
+	pr_info("Fast TSC calibration failed\n");
 	return 0;
 
 success:
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 169b09d..de12c1d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2348,7 +2348,7 @@
 	 * Not recognized on AMD in compat mode (but is recognized in legacy
 	 * mode).
 	 */
-	if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
+	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);
 
@@ -2359,25 +2359,13 @@
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
-	switch (ctxt->mode) {
-	case X86EMUL_MODE_PROT32:
-		if ((msr_data & 0xfffc) == 0x0)
-			return emulate_gp(ctxt, 0);
-		break;
-	case X86EMUL_MODE_PROT64:
-		if (msr_data == 0x0)
-			return emulate_gp(ctxt, 0);
-		break;
-	default:
-		break;
-	}
+	if ((msr_data & 0xfffc) == 0x0)
+		return emulate_gp(ctxt, 0);
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-	cs_sel = (u16)msr_data;
-	cs_sel &= ~SELECTOR_RPL_MASK;
+	cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
 	ss_sel = cs_sel + 8;
-	ss_sel &= ~SELECTOR_RPL_MASK;
-	if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
+	if (efer & EFER_LMA) {
 		cs.d = 0;
 		cs.l = 1;
 	}
@@ -2386,10 +2374,11 @@
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
-	ctxt->_eip = msr_data;
+	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
+	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
+							      (u32)msr_data;
 
 	return X86EMUL_CONTINUE;
 }
@@ -3791,8 +3780,8 @@
 };
 
 static const struct opcode group6[] = {
-	DI(Prot,	sldt),
-	DI(Prot,	str),
+	DI(Prot | DstMem,	sldt),
+	DI(Prot | DstMem,	str),
 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
 	II(Prot | Priv | SrcMem16, em_ltr, ltr),
 	N, N, N, N,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4f0c0b9..d52dcf0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@
 		u16 cid, lid;
 		u32 ldr, aid;
 
+		if (!kvm_apic_present(vcpu))
+			continue;
+
 		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 		cid = apic_cluster_id(new, ldr);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 38dcec4..e3ff27a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -898,6 +898,8 @@
 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
 			     VM_FAULT_HWPOISON_LARGE))
 			do_sigbus(regs, error_code, address, fault);
+		else if (fault & VM_FAULT_SIGSEGV)
+			bad_area_nosemaphore(regs, error_code, address);
 		else
 			BUG();
 	}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 08a7d31..079c3b6 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -43,7 +43,7 @@
 	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
 	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
 };
-EXPORT_SYMBOL_GPL(__cachemode2pte_tbl);
+EXPORT_SYMBOL(__cachemode2pte_tbl);
 uint8_t __pte2cachemode_tbl[8] = {
 	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
 	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
@@ -54,7 +54,7 @@
 	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
-EXPORT_SYMBOL_GPL(__pte2cachemode_tbl);
+EXPORT_SYMBOL(__pte2cachemode_tbl);
 
 static unsigned long __initdata pgt_buf_start;
 static unsigned long __initdata pgt_buf_end;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 67ebf57..c439ec4 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -349,6 +349,12 @@
 		return MPX_INVALID_BOUNDS_DIR;
 
 	/*
+	 * 32-bit binaries on 64-bit kernels are currently
+	 * unsupported.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
+		return MPX_INVALID_BOUNDS_DIR;
+	/*
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
 	 */
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index edf299c..7ac6869 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -234,8 +234,13 @@
 	      PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
 
 	/* Boot CPU check */
-	if (!boot_pat_state)
+	if (!boot_pat_state) {
 		rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+		if (!boot_pat_state) {
+			pat_disable("PAT read returns always zero, disabled.");
+			return;
+		}
+	}
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 9b18ef3..349c0d3 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -216,7 +216,7 @@
 			continue;
 		if (r->parent)	/* Already allocated */
 			continue;
-		if (!r->start || pci_claim_resource(dev, idx) < 0) {
+		if (!r->start || pci_claim_bridge_resource(dev, idx) < 0) {
 			/*
 			 * Something is wrong with the region.
 			 * Invalidate the resource to prevent
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c489ef2..9098d88 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -458,6 +458,7 @@
 	 * just how GSIs get registered.
 	 */
 	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
+	__acpi_unregister_gsi = NULL;
 #endif
 
 #ifdef CONFIG_PCI_MSI
@@ -471,52 +472,6 @@
 }
 
 #ifdef CONFIG_XEN_DOM0
-static __init void xen_setup_acpi_sci(void)
-{
-	int rc;
-	int trigger, polarity;
-	int gsi = acpi_sci_override_gsi;
-	int irq = -1;
-	int gsi_override = -1;
-
-	if (!gsi)
-		return;
-
-	rc = acpi_get_override_irq(gsi, &trigger, &polarity);
-	if (rc) {
-		printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
-				" sci, rc=%d\n", rc);
-		return;
-	}
-	trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
-	polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-
-	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
-			"polarity=%d\n", gsi, trigger, polarity);
-
-	/* Before we bind the GSI to a Linux IRQ, check whether
-	 * we need to override it with bus_irq (IRQ) value. Usually for
-	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
-	 * but there are oddballs where the IRQ != GSI:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
-	 * which ends up being: gsi_to_irq[9] == 20
-	 * (which is what acpi_gsi_to_irq ends up calling when starting the
-	 * the ACPI interpreter and keels over since IRQ 9 has not been
-	 * setup as we had setup IRQ 20 for it).
-	 */
-	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
-		/* Use the provided value if it's valid. */
-		if (irq >= 0)
-			gsi_override = irq;
-	}
-
-	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
-	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
-
-	return;
-}
-
 int __init pci_xen_initial_domain(void)
 {
 	int irq;
@@ -527,8 +482,8 @@
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 	pci_msi_ignore_mask = 1;
 #endif
-	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
+	__acpi_unregister_gsi = NULL;
 	/* Pre-allocate legacy irqs */
 	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
 		int trigger, polarity;
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl
deleted file mode 100644
index 23210ba..0000000
--- a/arch/x86/tools/calc_run_size.pl
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/perl
-#
-# Calculate the amount of space needed to run the kernel, including room for
-# the .bss and .brk sections.
-#
-# Usage:
-# objdump -h a.out | perl calc_run_size.pl
-use strict;
-
-my $mem_size = 0;
-my $file_offset = 0;
-
-my $sections=" *[0-9]+ \.(?:bss|brk) +";
-while (<>) {
-	if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) {
-		my $size = hex($1);
-		my $offset = hex($2);
-		$mem_size += $size;
-		if ($file_offset == 0) {
-			$file_offset = $offset;
-		} elsif ($file_offset != $offset) {
-			# BFD linker shows the same file offset in ELF.
-			# Gold linker shows them as consecutive.
-			next if ($file_offset + $mem_size == $offset + $size);
-
-			printf STDERR "file_offset: 0x%lx\n", $file_offset;
-			printf STDERR "mem_size: 0x%lx\n", $mem_size;
-			printf STDERR "offset: 0x%lx\n", $offset;
-			printf STDERR "size: 0x%lx\n", $size;
-
-			die ".bss and .brk are non-contiguous\n";
-		}
-	}
-}
-
-if ($file_offset == 0) {
-	die "Never found .bss or .brk file offset\n";
-}
-printf("%d\n", $mem_size + $file_offset);
diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh
new file mode 100644
index 0000000..1a4c17b
--- /dev/null
+++ b/arch/x86/tools/calc_run_size.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+#
+# Calculate the amount of space needed to run the kernel, including room for
+# the .bss and .brk sections.
+#
+# Usage:
+# objdump -h a.out | sh calc_run_size.sh
+
+NUM='\([0-9a-fA-F]*[ \t]*\)'
+OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p')
+if [ -z "$OUT" ] ; then
+	echo "Never found .bss or .brk file offset" >&2
+	exit 1
+fi
+
+OUT=$(echo ${OUT# })
+sizeA=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+offsetA=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+sizeB=$(printf "%d" 0x${OUT%% *})
+OUT=${OUT#* }
+offsetB=$(printf "%d" 0x${OUT%% *})
+
+run_size=$(( $offsetA + $sizeA + $sizeB ))
+
+# BFD linker shows the same file offset in ELF.
+if [ "$offsetA" -ne "$offsetB" ] ; then
+	# Gold linker shows them as consecutive.
+	endB=$(( $offsetB + $sizeB ))
+	if [ "$endB" != "$run_size" ] ; then
+		printf "sizeA: 0x%x\n" $sizeA >&2
+		printf "offsetA: 0x%x\n" $offsetA >&2
+		printf "sizeB: 0x%x\n" $sizeB >&2
+		printf "offsetB: 0x%x\n" $offsetB >&2
+		echo ".bss and .brk are non-contiguous" >&2
+		exit 1
+	fi
+fi
+
+printf "%d\n" $run_size
+exit 0
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 6bf3a13..78a881b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -40,6 +40,7 @@
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <xen/interface/memory.h>
+#include <xen/interface/nmi.h>
 #include <xen/interface/xen-mca.h>
 #include <xen/features.h>
 #include <xen/page.h>
@@ -66,6 +67,7 @@
 #include <asm/reboot.h>
 #include <asm/stackprotector.h>
 #include <asm/hypervisor.h>
+#include <asm/mach_traps.h>
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/pat.h>
@@ -1351,6 +1353,21 @@
 	.emergency_restart = xen_emergency_restart,
 };
 
+static unsigned char xen_get_nmi_reason(void)
+{
+	unsigned char reason = 0;
+
+	/* Construct a value which looks like it came from port 0x61. */
+	if (test_bit(_XEN_NMIREASON_io_error,
+		     &HYPERVISOR_shared_info->arch.nmi_reason))
+		reason |= NMI_REASON_IOCHK;
+	if (test_bit(_XEN_NMIREASON_pci_serr,
+		     &HYPERVISOR_shared_info->arch.nmi_reason))
+		reason |= NMI_REASON_SERR;
+
+	return reason;
+}
+
 static void __init xen_boot_params_init_edd(void)
 {
 #if IS_ENABLED(CONFIG_EDD)
@@ -1535,9 +1552,12 @@
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
 	pv_apic_ops = xen_apic_ops;
-	if (!xen_pvh_domain())
+	if (!xen_pvh_domain()) {
 		pv_cpu_ops = xen_cpu_ops;
 
+		x86_platform.get_nmi_reason = xen_get_nmi_reason;
+	}
+
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
 	else
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index edbc7a6..70fb507 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -167,10 +167,13 @@
 	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
 }
 
-/* Only to be called in case of a race for a page just allocated! */
-static void free_p2m_page(void *p)
+static void __ref free_p2m_page(void *p)
 {
-	BUG_ON(!slab_is_available());
+	if (unlikely(!slab_is_available())) {
+		free_bootmem((unsigned long)p, PAGE_SIZE);
+		return;
+	}
+
 	free_page((unsigned long)p);
 }
 
@@ -375,7 +378,7 @@
 			p2m_missing_pte : p2m_identity_pte;
 		for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
 			pmdp = populate_extra_pmd(
-				(unsigned long)(p2m + pfn + i * PTRS_PER_PTE));
+				(unsigned long)(p2m + pfn) + i * PMD_SIZE);
 			set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
 		}
 	}
@@ -436,10 +439,9 @@
  * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
  * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
  */
-static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg)
+static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
 {
 	pte_t *ptechk;
-	pte_t *pteret = ptep;
 	pte_t *pte_newpg[PMDS_PER_MID_PAGE];
 	pmd_t *pmdp;
 	unsigned int level;
@@ -473,8 +475,6 @@
 		if (ptechk == pte_pg) {
 			set_pmd(pmdp,
 				__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
-			if (vaddr == (addr & ~(PMD_SIZE - 1)))
-				pteret = pte_offset_kernel(pmdp, addr);
 			pte_newpg[i] = NULL;
 		}
 
@@ -488,7 +488,7 @@
 		vaddr += PMD_SIZE;
 	}
 
-	return pteret;
+	return lookup_address(addr, &level);
 }
 
 /*
@@ -517,7 +517,7 @@
 
 	if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
 		/* PMD level is missing, allocate a new one */
-		ptep = alloc_p2m_pmd(addr, ptep, pte_pg);
+		ptep = alloc_p2m_pmd(addr, pte_pg);
 		if (!ptep)
 			return false;
 	}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index dfd77de..865e56c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -140,7 +140,7 @@
 unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
 {
 	int i;
-	unsigned long addr = PFN_PHYS(pfn);
+	phys_addr_t addr = PFN_PHYS(pfn);
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
 		if (addr >= xen_extra_mem[i].start &&
@@ -160,6 +160,8 @@
 	int i;
 
 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		if (!xen_extra_mem[i].size)
+			continue;
 		pfn_s = PFN_DOWN(xen_extra_mem[i].start);
 		pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size);
 		for (pfn = pfn_s; pfn < pfn_e; pfn++)
@@ -229,15 +231,14 @@
  * as a fallback if the remapping fails.
  */
 static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
-	unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
-	unsigned long *released)
+	unsigned long end_pfn, unsigned long nr_pages, unsigned long *released)
 {
-	unsigned long len = 0;
 	unsigned long pfn, end;
 	int ret;
 
 	WARN_ON(start_pfn > end_pfn);
 
+	/* Release pages first. */
 	end = min(end_pfn, nr_pages);
 	for (pfn = start_pfn; pfn < end; pfn++) {
 		unsigned long mfn = pfn_to_mfn(pfn);
@@ -250,16 +251,14 @@
 		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
 
 		if (ret == 1) {
+			(*released)++;
 			if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
 				break;
-			len++;
 		} else
 			break;
 	}
 
-	/* Need to release pages first */
-	*released += len;
-	*identity += set_phys_range_identity(start_pfn, end_pfn);
+	set_phys_range_identity(start_pfn, end_pfn);
 }
 
 /*
@@ -287,7 +286,7 @@
 	}
 
 	/* Update kernel mapping, but not for highmem. */
-	if ((pfn << PAGE_SHIFT) >= __pa(high_memory))
+	if (pfn >= PFN_UP(__pa(high_memory - 1)))
 		return;
 
 	if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
@@ -318,7 +317,6 @@
 	unsigned long ident_pfn_iter, remap_pfn_iter;
 	unsigned long ident_end_pfn = start_pfn + size;
 	unsigned long left = size;
-	unsigned long ident_cnt = 0;
 	unsigned int i, chunk;
 
 	WARN_ON(size == 0);
@@ -347,8 +345,7 @@
 		xen_remap_mfn = mfn;
 
 		/* Set identity map */
-		ident_cnt += set_phys_range_identity(ident_pfn_iter,
-			ident_pfn_iter + chunk);
+		set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
 
 		left -= chunk;
 	}
@@ -371,7 +368,7 @@
 static unsigned long __init xen_set_identity_and_remap_chunk(
         const struct e820entry *list, size_t map_size, unsigned long start_pfn,
 	unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
-	unsigned long *identity, unsigned long *released)
+	unsigned long *released, unsigned long *remapped)
 {
 	unsigned long pfn;
 	unsigned long i = 0;
@@ -386,8 +383,7 @@
 		/* Do not remap pages beyond the current allocation */
 		if (cur_pfn >= nr_pages) {
 			/* Identity map remaining pages */
-			*identity += set_phys_range_identity(cur_pfn,
-				cur_pfn + size);
+			set_phys_range_identity(cur_pfn, cur_pfn + size);
 			break;
 		}
 		if (cur_pfn + size > nr_pages)
@@ -398,7 +394,7 @@
 		if (!remap_range_size) {
 			pr_warning("Unable to find available pfn range, not remapping identity pages\n");
 			xen_set_identity_and_release_chunk(cur_pfn,
-				cur_pfn + left, nr_pages, identity, released);
+				cur_pfn + left, nr_pages, released);
 			break;
 		}
 		/* Adjust size to fit in current e820 RAM region */
@@ -410,7 +406,7 @@
 		/* Update variables to reflect new mappings. */
 		i += size;
 		remap_pfn += size;
-		*identity += size;
+		*remapped += size;
 	}
 
 	/*
@@ -427,13 +423,13 @@
 
 static void __init xen_set_identity_and_remap(
 	const struct e820entry *list, size_t map_size, unsigned long nr_pages,
-	unsigned long *released)
+	unsigned long *released, unsigned long *remapped)
 {
 	phys_addr_t start = 0;
-	unsigned long identity = 0;
 	unsigned long last_pfn = nr_pages;
 	const struct e820entry *entry;
 	unsigned long num_released = 0;
+	unsigned long num_remapped = 0;
 	int i;
 
 	/*
@@ -460,14 +456,14 @@
 				last_pfn = xen_set_identity_and_remap_chunk(
 						list, map_size, start_pfn,
 						end_pfn, nr_pages, last_pfn,
-						&identity, &num_released);
+						&num_released, &num_remapped);
 			start = end;
 		}
 	}
 
 	*released = num_released;
+	*remapped = num_remapped;
 
-	pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
 	pr_info("Released %ld page(s)\n", num_released);
 }
 
@@ -586,6 +582,7 @@
 	struct xen_memory_map memmap;
 	unsigned long max_pages;
 	unsigned long extra_pages = 0;
+	unsigned long remapped_pages;
 	int i;
 	int op;
 
@@ -635,9 +632,10 @@
 	 * underlying RAM.
 	 */
 	xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
-				   &xen_released_pages);
+				   &xen_released_pages, &remapped_pages);
 
 	extra_pages += xen_released_pages;
+	extra_pages += remapped_pages;
 
 	/*
 	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index f473d26..6908734 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -391,7 +391,7 @@
 
 struct xen_clock_event_device {
 	struct clock_event_device evt;
-	char *name;
+	char name[16];
 };
 static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
 
@@ -420,46 +420,38 @@
 	if (evt->irq >= 0) {
 		unbind_from_irqhandler(evt->irq, NULL);
 		evt->irq = -1;
-		kfree(per_cpu(xen_clock_events, cpu).name);
-		per_cpu(xen_clock_events, cpu).name = NULL;
 	}
 }
 
 void xen_setup_timer(int cpu)
 {
-	char *name;
-	struct clock_event_device *evt;
+	struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
+	struct clock_event_device *evt = &xevt->evt;
 	int irq;
 
-	evt = &per_cpu(xen_clock_events, cpu).evt;
 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
 	if (evt->irq >= 0)
 		xen_teardown_timer(cpu);
 
 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
 
-	name = kasprintf(GFP_KERNEL, "timer%d", cpu);
-	if (!name)
-		name = "<timer kasprintf failed>";
+	snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
 
 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
 				      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
 				      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
-				      name, NULL);
+				      xevt->name, NULL);
 	(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
 
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
 	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
-	per_cpu(xen_clock_events, cpu).name = name;
 }
 
 
 void xen_setup_cpu_clockevents(void)
 {
-	BUG_ON(preemptible());
-
 	clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
 }
 
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index b57c4f9..9e3571a 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -117,6 +117,8 @@
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
 		else if (fault & VM_FAULT_SIGBUS)
 			goto do_sigbus;
 		BUG();
diff --git a/block/blk-core.c b/block/blk-core.c
index 30f6153..3ad4055 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,6 +473,25 @@
 }
 EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
+void blk_set_queue_dying(struct request_queue *q)
+{
+	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+
+	if (q->mq_ops)
+		blk_mq_wake_waiters(q);
+	else {
+		struct request_list *rl;
+
+		blk_queue_for_each_rl(rl, q) {
+			if (rl->rq_pool) {
+				wake_up(&rl->wait[BLK_RW_SYNC]);
+				wake_up(&rl->wait[BLK_RW_ASYNC]);
+			}
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+
 /**
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
@@ -486,7 +505,7 @@
 
 	/* mark @q DYING, no new request or merges will be allowed afterwards */
 	mutex_lock(&q->sysfs_lock);
-	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+	blk_set_queue_dying(q);
 	spin_lock_irq(lock);
 
 	/*
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 1630a20..6774a0e 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -15,6 +15,26 @@
 
 static void blk_mq_sysfs_release(struct kobject *kobj)
 {
+	struct request_queue *q;
+
+	q = container_of(kobj, struct request_queue, mq_kobj);
+	free_percpu(q->queue_ctx);
+}
+
+static void blk_mq_ctx_release(struct kobject *kobj)
+{
+	struct blk_mq_ctx *ctx;
+
+	ctx = container_of(kobj, struct blk_mq_ctx, kobj);
+	kobject_put(&ctx->queue->mq_kobj);
+}
+
+static void blk_mq_hctx_release(struct kobject *kobj)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
+	kfree(hctx);
 }
 
 struct blk_mq_ctx_sysfs_entry {
@@ -318,13 +338,13 @@
 static struct kobj_type blk_mq_ctx_ktype = {
 	.sysfs_ops	= &blk_mq_sysfs_ops,
 	.default_attrs	= default_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_ctx_release,
 };
 
 static struct kobj_type blk_mq_hw_ktype = {
 	.sysfs_ops	= &blk_mq_hw_sysfs_ops,
 	.default_attrs	= default_hw_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_hctx_release,
 };
 
 static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@@ -355,6 +375,7 @@
 		return ret;
 
 	hctx_for_each_ctx(hctx, ctx, i) {
+		kobject_get(&q->mq_kobj);
 		ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
 		if (ret)
 			break;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32e8dbb..60c9d4a 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -68,9 +68,9 @@
 }
 
 /*
- * Wakeup all potentially sleeping on normal (non-reserved) tags
+ * Wakeup all potentially sleeping on tags
  */
-static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
+void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 {
 	struct blk_mq_bitmap_tags *bt;
 	int i, wake_index;
@@ -85,6 +85,12 @@
 
 		wake_index = bt_index_inc(wake_index);
 	}
+
+	if (include_reserve) {
+		bt = &tags->breserved_tags;
+		if (waitqueue_active(&bt->bs[0].wait))
+			wake_up(&bt->bs[0].wait);
+	}
 }
 
 /*
@@ -100,7 +106,7 @@
 
 	atomic_dec(&tags->active_queues);
 
-	blk_mq_tag_wakeup_all(tags);
+	blk_mq_tag_wakeup_all(tags, false);
 }
 
 /*
@@ -584,7 +590,7 @@
 	 * static and should never need resizing.
 	 */
 	bt_update_count(&tags->bitmap_tags, tdepth);
-	blk_mq_tag_wakeup_all(tags);
+	blk_mq_tag_wakeup_all(tags, false);
 	return 0;
 }
 
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 6206ed1..a6fa0fc 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -54,6 +54,7 @@
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
 extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
 extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
+extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
 
 enum {
 	BLK_MQ_TAG_CACHE_MIN	= 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da1ab56..9ee3b87 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -107,7 +107,7 @@
 	wake_up_all(&q->mq_freeze_wq);
 }
 
-static void blk_mq_freeze_queue_start(struct request_queue *q)
+void blk_mq_freeze_queue_start(struct request_queue *q)
 {
 	bool freeze;
 
@@ -120,6 +120,7 @@
 		blk_mq_run_queues(q, false);
 	}
 }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
 static void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
@@ -136,7 +137,7 @@
 	blk_mq_freeze_queue_wait(q);
 }
 
-static void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue(struct request_queue *q)
 {
 	bool wake;
 
@@ -149,6 +150,24 @@
 		wake_up_all(&q->mq_freeze_wq);
 	}
 }
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+
+void blk_mq_wake_waiters(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (blk_mq_hw_queue_mapped(hctx))
+			blk_mq_tag_wakeup_all(hctx->tags, true);
+
+	/*
+	 * If we are called because the queue has now been marked as
+	 * dying, we need to ensure that processes currently waiting on
+	 * the queue are notified as well.
+	 */
+	wake_up_all(&q->mq_freeze_wq);
+}
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 {
@@ -258,8 +277,10 @@
 		ctx = alloc_data.ctx;
 	}
 	blk_mq_put_ctx(ctx);
-	if (!rq)
+	if (!rq) {
+		blk_mq_queue_exit(q);
 		return ERR_PTR(-EWOULDBLOCK);
+	}
 	return rq;
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
@@ -383,6 +404,12 @@
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
+int blk_mq_request_started(struct request *rq)
+{
+	return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_request_started);
+
 void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -500,12 +527,38 @@
 }
 EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
+void blk_mq_cancel_requeue_work(struct request_queue *q)
+{
+	cancel_work_sync(&q->requeue_work);
+}
+EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
+
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
 	kblockd_schedule_work(&q->requeue_work);
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
+void blk_mq_abort_requeue_list(struct request_queue *q)
+{
+	unsigned long flags;
+	LIST_HEAD(rq_list);
+
+	spin_lock_irqsave(&q->requeue_lock, flags);
+	list_splice_init(&q->requeue_list, &rq_list);
+	spin_unlock_irqrestore(&q->requeue_lock, flags);
+
+	while (!list_empty(&rq_list)) {
+		struct request *rq;
+
+		rq = list_first_entry(&rq_list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		rq->errors = -EIO;
+		blk_mq_end_request(rq, rq->errors);
+	}
+}
+EXPORT_SYMBOL(blk_mq_abort_requeue_list);
+
 static inline bool is_flush_request(struct request *rq,
 		struct blk_flush_queue *fq, unsigned int tag)
 {
@@ -566,13 +619,24 @@
 		break;
 	}
 }
-		
+
 static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
 	struct blk_mq_timeout_data *data = priv;
 
-	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+		/*
+		 * If a request wasn't started before the queue was
+		 * marked dying, kill it here or it'll go unnoticed.
+		 */
+		if (unlikely(blk_queue_dying(rq->q))) {
+			rq->errors = -EIO;
+			blk_mq_complete_request(rq);
+		}
+		return;
+	}
+	if (rq->cmd_flags & REQ_NO_TIMEOUT)
 		return;
 
 	if (time_after_eq(jiffies, rq->deadline)) {
@@ -1577,10 +1641,8 @@
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
+	queue_for_each_hw_ctx(q, hctx, i)
 		free_cpumask_var(hctx->cpumask);
-		kfree(hctx);
-	}
 }
 
 static int blk_mq_init_hctx(struct request_queue *q,
@@ -1601,7 +1663,6 @@
 	hctx->queue = q;
 	hctx->queue_num = hctx_idx;
 	hctx->flags = set->flags;
-	hctx->cmd_size = set->cmd_size;
 
 	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
 					blk_mq_hctx_notify, hctx);
@@ -1939,11 +2000,9 @@
 
 	percpu_ref_exit(&q->mq_usage_counter);
 
-	free_percpu(q->queue_ctx);
 	kfree(q->queue_hw_ctx);
 	kfree(q->mq_map);
 
-	q->queue_ctx = NULL;
 	q->queue_hw_ctx = NULL;
 	q->mq_map = NULL;
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 206230e..4f4f943 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@
 void blk_mq_clone_flush_request(struct request *flush_rq,
 		struct request *orig_rq);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
+void blk_mq_wake_waiters(struct request_queue *q);
 
 /*
  * CPU hotplug helpers
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 56c0258..246dfb1 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -190,6 +190,9 @@
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 
+	if (req->cmd_flags & REQ_NO_TIMEOUT)
+		return;
+
 	/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
 	if (!q->mq_ops && !q->rq_timed_out_fn)
 		return;
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 9b3c54c..3dd1011 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -1475,3 +1475,4 @@
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("aes-generic");
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index b4485a1..6f5bebc 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -477,3 +477,4 @@
 module_init(prng_mod_init);
 module_exit(prng_mod_fini);
 MODULE_ALIAS_CRYPTO("stdrng");
+MODULE_ALIAS_CRYPTO("ansi_cprng");
diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c
index 7bd71f0..87b392a 100644
--- a/crypto/blowfish_generic.c
+++ b/crypto/blowfish_generic.c
@@ -139,3 +139,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Blowfish Cipher Algorithm");
 MODULE_ALIAS_CRYPTO("blowfish");
+MODULE_ALIAS_CRYPTO("blowfish-generic");
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index 1b74c5a..a02286b 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -1099,3 +1099,4 @@
 MODULE_DESCRIPTION("Camellia Cipher Algorithm");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("camellia");
+MODULE_ALIAS_CRYPTO("camellia-generic");
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index 84c86db..df5c726 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -550,3 +550,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast5 Cipher Algorithm");
 MODULE_ALIAS_CRYPTO("cast5");
+MODULE_ALIAS_CRYPTO("cast5-generic");
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index f408f0b..058c8d7 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -292,3 +292,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast6 Cipher Algorithm");
 MODULE_ALIAS_CRYPTO("cast6");
+MODULE_ALIAS_CRYPTO("cast6-generic");
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 2a06202..06f1b60 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -171,4 +171,5 @@
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-generic");
 MODULE_SOFTDEP("pre: crc32c");
diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c
index 08bb4f5..c1229614 100644
--- a/crypto/crct10dif_generic.c
+++ b/crypto/crct10dif_generic.c
@@ -125,3 +125,4 @@
 MODULE_DESCRIPTION("T10 DIF CRC calculation.");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-generic");
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index 4291294..a717205 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -983,8 +983,6 @@
 	.cia_decrypt		=	des3_ede_decrypt } }
 } };
 
-MODULE_ALIAS_CRYPTO("des3_ede");
-
 static int __init des_generic_mod_init(void)
 {
 	return crypto_register_algs(des_algs, ARRAY_SIZE(des_algs));
@@ -1001,4 +999,7 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
 MODULE_AUTHOR("Dag Arne Osvik <da@osvik.no>");
-MODULE_ALIAS("des");
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des-generic");
+MODULE_ALIAS_CRYPTO("des3_ede");
+MODULE_ALIAS_CRYPTO("des3_ede-generic");
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index 4e97fae..bac7099 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -173,3 +173,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("GHASH Message Digest Algorithm");
 MODULE_ALIAS_CRYPTO("ghash");
+MODULE_ALIAS_CRYPTO("ghash-generic");
diff --git a/crypto/krng.c b/crypto/krng.c
index 67c88b3..0224841 100644
--- a/crypto/krng.c
+++ b/crypto/krng.c
@@ -63,3 +63,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Kernel Random Number Generator");
 MODULE_ALIAS_CRYPTO("stdrng");
+MODULE_ALIAS_CRYPTO("krng");
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index 3d0f9df..f550b5d 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -249,3 +249,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm");
 MODULE_ALIAS_CRYPTO("salsa20");
+MODULE_ALIAS_CRYPTO("salsa20-generic");
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index a53b5e2..94970a7 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -667,3 +667,4 @@
 MODULE_AUTHOR("Dag Arne Osvik <osvik@ii.uib.no>");
 MODULE_ALIAS_CRYPTO("tnepres");
 MODULE_ALIAS_CRYPTO("serpent");
+MODULE_ALIAS_CRYPTO("serpent-generic");
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 039e58c..a3e50c3 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -154,3 +154,4 @@
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
 
 MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-generic");
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index 5eb21b1..b001ff5 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -385,4 +385,6 @@
 MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm");
 
 MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-generic");
 MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-generic");
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 8d0b19e..1c3c376 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -289,4 +289,6 @@
 MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms");
 
 MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha384-generic");
 MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha512-generic");
diff --git a/crypto/tea.c b/crypto/tea.c
index 495be2d..b70b441 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -270,6 +270,7 @@
 	crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs));
 }
 
+MODULE_ALIAS_CRYPTO("tea");
 MODULE_ALIAS_CRYPTO("xtea");
 MODULE_ALIAS_CRYPTO("xeta");
 
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 6e5651c..321bc6f 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -676,6 +676,7 @@
 	crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs));
 }
 
+MODULE_ALIAS_CRYPTO("tgr192");
 MODULE_ALIAS_CRYPTO("tgr160");
 MODULE_ALIAS_CRYPTO("tgr128");
 
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index 523ad8c..ebf7a3e 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -212,3 +212,4 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Twofish Cipher Algorithm");
 MODULE_ALIAS_CRYPTO("twofish");
+MODULE_ALIAS_CRYPTO("twofish-generic");
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 0de42eb..7ee5a04 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1167,6 +1167,7 @@
 	crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs));
 }
 
+MODULE_ALIAS_CRYPTO("wp512");
 MODULE_ALIAS_CRYPTO("wp384");
 MODULE_ALIAS_CRYPTO("wp256");
 
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 694d5a7..c70d6e4 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -134,8 +134,6 @@
 
 source "drivers/platform/Kconfig"
 
-source "drivers/soc/Kconfig"
-
 source "drivers/clk/Kconfig"
 
 source "drivers/hwspinlock/Kconfig"
diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c
index a27d31d..9dcf836 100644
--- a/drivers/acpi/int340x_thermal.c
+++ b/drivers/acpi/int340x_thermal.c
@@ -14,10 +14,10 @@
 
 #include "internal.h"
 
-#define DO_ENUMERATION 0x01
+#define INT3401_DEVICE 0X01
 static const struct acpi_device_id int340x_thermal_device_ids[] = {
-	{"INT3400", DO_ENUMERATION },
-	{"INT3401"},
+	{"INT3400"},
+	{"INT3401", INT3401_DEVICE},
 	{"INT3402"},
 	{"INT3403"},
 	{"INT3404"},
@@ -34,7 +34,10 @@
 					const struct acpi_device_id *id)
 {
 #if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE)
-	if (id->driver_data == DO_ENUMERATION)
+	acpi_create_platform_device(adev);
+#elif defined(INTEL_SOC_DTS_THERMAL) || defined(INTEL_SOC_DTS_THERMAL_MODULE)
+	/* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */
+	if (id->driver_data == INT3401_DEVICE)
 		acpi_create_platform_device(adev);
 #endif
 	return 1;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 5277a0e..b1def41 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -512,7 +512,6 @@
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		dev->irq = 0;
 		dev->irq_managed = 0;
 	}
 }
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a3a1360..5f60155 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -835,6 +835,7 @@
 config PATA_AT91
 	tristate "PATA support for AT91SAM9260"
 	depends on ARM && SOC_AT91SAM9
+	depends on !ARCH_MULTIPLATFORM
 	help
 	  This option enables support for IDE devices on the Atmel AT91SAM9260 SoC.
 
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 49f1e68..33bb06e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -325,7 +325,6 @@
 	{ PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */
 	{ PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */
 	{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */
-	{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index feeb8f1..cbcd208 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -125,10 +125,11 @@
  * xgene_ahci_qc_issue - Issue commands to the device
  * @qc: Command to issue
  *
- * Due to Hardware errata for IDENTIFY DEVICE command, the controller cannot
- * clear the BSY bit after receiving the PIO setup FIS. This results in the dma
- * state machine goes into the CMFatalErrorUpdate state and locks up. By
- * restarting the dma engine, it removes the controller out of lock up state.
+ * Due to Hardware errata for IDENTIFY DEVICE command and PACKET
+ * command of ATAPI protocol set, the controller cannot clear the BSY bit
+ * after receiving the PIO setup FIS. This results in the DMA state machine
+ * going into the CMFatalErrorUpdate state and locks up. By restarting the
+ * DMA engine, it removes the controller out of lock up state.
  */
 static unsigned int xgene_ahci_qc_issue(struct ata_queued_cmd *qc)
 {
@@ -137,7 +138,8 @@
 	struct xgene_ahci_context *ctx = hpriv->plat_data;
 	int rc = 0;
 
-	if (unlikely(ctx->last_cmd[ap->port_no] == ATA_CMD_ID_ATA))
+	if (unlikely((ctx->last_cmd[ap->port_no] == ATA_CMD_ID_ATA) ||
+	    (ctx->last_cmd[ap->port_no] == ATA_CMD_PACKET)))
 		xgene_ahci_restart_engine(ap);
 
 	rc = ahci_qc_issue(qc);
@@ -188,7 +190,7 @@
 	 *
 	 * Clear reserved bit 8 (DEVSLP bit) as we don't support DEVSLP
 	 */
-	id[ATA_ID_FEATURE_SUPP] &= ~(1 << 8);
+	id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8));
 
 	return 0;
 }
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 97683e4..61a9c07 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -2003,7 +2003,7 @@
 
 	devslp = readl(port_mmio + PORT_DEVSLP);
 	if (!(devslp & PORT_DEVSLP_DSP)) {
-		dev_err(ap->host->dev, "port does not support device sleep\n");
+		dev_info(ap->host->dev, "port does not support device sleep\n");
 		return;
 	}
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 5c84fb5..d1a05f9 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4233,10 +4233,33 @@
 	{ "PIONEER DVD-RW  DVR-216D",	NULL,	ATA_HORKAGE_NOSETXFER },
 
 	/* devices that don't properly handle queued TRIM commands */
-	{ "Micron_M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
-	{ "Crucial_CT???M500SSD*",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
-	{ "Micron_M550*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
-	{ "Crucial_CT*M550SSD*",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+	{ "Micron_M[56]*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM |
+						ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "Crucial_CT*SSD*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+
+	/*
+	 * As defined, the DRAT (Deterministic Read After Trim) and RZAT
+	 * (Return Zero After Trim) flags in the ATA Command Set are
+	 * unreliable in the sense that they only define what happens if
+	 * the device successfully executed the DSM TRIM command. TRIM
+	 * is only advisory, however, and the device is free to silently
+	 * ignore all or parts of the request.
+	 *
+	 * Whitelist drives that are known to reliably return zeroes
+	 * after TRIM.
+	 */
+
+	/*
+	 * The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude
+	 * that model before whitelisting all other intel SSDs.
+	 */
+	{ "INTEL*SSDSC2MH*",		NULL,	0, },
+
+	{ "INTEL*SSD*", 		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "SSD*INTEL*",			NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "Samsung*SSD*",		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "SAMSUNG*SSD*",		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "ST[1248][0248]0[FH]*",	NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
 
 	/*
 	 * Some WD SATA-I drives spin up and down erratically when the link
@@ -4748,7 +4771,10 @@
 		return NULL;
 
 	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
-		tag = tag < max_queue ? tag : 0;
+		if (ap->flags & ATA_FLAG_LOWTAG)
+			tag = i;
+		else
+			tag = tag < max_queue ? tag : 0;
 
 		/* the last tag is reserved for internal command. */
 		if (tag == ATA_TAG_INTERNAL)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 3dbec89..8d00c26 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2389,6 +2389,7 @@
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
 
 /**
  *	ata_eh_link_report - report error handling to user
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e364e86..6abd17a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2532,13 +2532,15 @@
 		rbuf[15] = lowest_aligned;
 
 		if (ata_id_has_trim(args->id)) {
-			rbuf[14] |= 0x80; /* TPE */
+			rbuf[14] |= 0x80; /* LBPME */
 
-			if (ata_id_has_zero_after_trim(args->id))
-				rbuf[14] |= 0x40; /* TPRZ */
+			if (ata_id_has_zero_after_trim(args->id) &&
+			    dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM) {
+				ata_dev_info(dev, "Enabling discard_zeroes_data\n");
+				rbuf[14] |= 0x40; /* LBPRZ */
+			}
 		}
 	}
-
 	return 0;
 }
 
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index db90aa3..2e86e3b 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1333,7 +1333,19 @@
 	DPRINTK("ENTER\n");
 
 	cancel_delayed_work_sync(&ap->sff_pio_task);
+
+	/*
+	 * We wanna reset the HSM state to IDLE.  If we do so without
+	 * grabbing the port lock, critical sections protected by it which
+	 * expect the HSM state to stay stable may get surprised.  For
+	 * example, we may set IDLE in between the time
+	 * __ata_sff_port_intr() checks for HSM_ST_IDLE and before it calls
+	 * ata_sff_hsm_move() causing ata_sff_hsm_move() to BUG().
+	 */
+	spin_lock_irq(ap->lock);
 	ap->hsm_task_state = HSM_ST_IDLE;
+	spin_unlock_irq(ap->lock);
+
 	ap->sff_pio_task_link = NULL;
 
 	if (ata_msg_ctl(ap))
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index c7ddef8..8e824817 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -797,7 +797,7 @@
 	if (err) {
 		dev_err(host_pvt.dwc_dev, "%s: dma_request_interrupts returns"
 			" %d\n", __func__, err);
-		goto error_out;
+		return err;
 	}
 
 	/* Enabe DMA */
@@ -808,11 +808,6 @@
 		sata_dma_regs);
 
 	return 0;
-
-error_out:
-	dma_dwc_exit(hsdev);
-
-	return err;
 }
 
 static int sata_dwc_scr_read(struct ata_link *link, unsigned int scr, u32 *val)
@@ -1662,7 +1657,7 @@
 	char *ver = (char *)&versionr;
 	u8 *base = NULL;
 	int err = 0;
-	int irq, rc;
+	int irq;
 	struct ata_host *host;
 	struct ata_port_info pi = sata_dwc_port_info[0];
 	const struct ata_port_info *ppi[] = { &pi, NULL };
@@ -1725,7 +1720,7 @@
 	if (irq == NO_IRQ) {
 		dev_err(&ofdev->dev, "no SATA DMA irq\n");
 		err = -ENODEV;
-		goto error_out;
+		goto error_iomap;
 	}
 
 	/* Get physical SATA DMA register base address */
@@ -1734,14 +1729,16 @@
 		dev_err(&ofdev->dev, "ioremap failed for AHBDMA register"
 			" address\n");
 		err = -ENODEV;
-		goto error_out;
+		goto error_iomap;
 	}
 
 	/* Save dev for later use in dev_xxx() routines */
 	host_pvt.dwc_dev = &ofdev->dev;
 
 	/* Initialize AHB DMAC */
-	dma_dwc_init(hsdev, irq);
+	err = dma_dwc_init(hsdev, irq);
+	if (err)
+		goto error_dma_iomap;
 
 	/* Enable SATA Interrupts */
 	sata_dwc_enable_interrupts(hsdev);
@@ -1759,9 +1756,8 @@
 	 * device discovery process, invoking our port_start() handler &
 	 * error_handler() to execute a dummy Softreset EH session
 	 */
-	rc = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht);
-
-	if (rc != 0)
+	err = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht);
+	if (err)
 		dev_err(&ofdev->dev, "failed to activate host");
 
 	dev_set_drvdata(&ofdev->dev, host);
@@ -1770,7 +1766,8 @@
 error_out:
 	/* Free SATA DMA resources */
 	dma_dwc_exit(hsdev);
-
+error_dma_iomap:
+	iounmap((void __iomem *)host_pvt.sata_dma_regs);
 error_iomap:
 	iounmap(base);
 error_kmalloc:
@@ -1791,6 +1788,7 @@
 	/* Free SATA DMA resources */
 	dma_dwc_exit(hsdev);
 
+	iounmap((void __iomem *)host_pvt.sata_dma_regs);
 	iounmap(hsdev->reg_base);
 	kfree(hsdev);
 	kfree(host);
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index d81b20d..ea65594 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -246,7 +246,7 @@
 	/* host flags */
 	SIL24_COMMON_FLAGS	= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
 				  ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA |
-				  ATA_FLAG_AN | ATA_FLAG_PMP,
+				  ATA_FLAG_AN | ATA_FLAG_PMP | ATA_FLAG_LOWTAG,
 	SIL24_FLAG_PCIX_IRQ_WOC	= (1 << 24), /* IRQ loss errata on PCI-X */
 
 	IRQ_STAT_4PORTS		= 0xf,
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ae9f615..aa2224a 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -530,7 +530,7 @@
 			goto out_cleanup_queues;
 
 		nullb->q = blk_mq_init_queue(&nullb->tag_set);
-		if (!nullb->q) {
+		if (IS_ERR(nullb->q)) {
 			rv = -ENOMEM;
 			goto out_cleanup_tags;
 		}
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index b1d5d87..d826bf3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -106,7 +106,7 @@
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
 	u16 q_depth;
-	u16 cq_vector;
+	s16 cq_vector;
 	u16 sq_head;
 	u16 sq_tail;
 	u16 cq_head;
@@ -215,6 +215,7 @@
 	cmd->fn = handler;
 	cmd->ctx = ctx;
 	cmd->aborted = 0;
+	blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
 }
 
 /* Special values must be less than 0x1000 */
@@ -431,8 +432,13 @@
 	if (unlikely(status)) {
 		if (!(status & NVME_SC_DNR || blk_noretry_request(req))
 		    && (jiffies - req->start_time) < req->timeout) {
+			unsigned long flags;
+
 			blk_mq_requeue_request(req);
-			blk_mq_kick_requeue_list(req->q);
+			spin_lock_irqsave(req->q->queue_lock, flags);
+			if (!blk_queue_stopped(req->q))
+				blk_mq_kick_requeue_list(req->q);
+			spin_unlock_irqrestore(req->q->queue_lock, flags);
 			return;
 		}
 		req->errors = nvme_error_status(status);
@@ -664,8 +670,6 @@
 		}
 	}
 
-	blk_mq_start_request(req);
-
 	nvme_set_info(cmd, iod, req_completion);
 	spin_lock_irq(&nvmeq->q_lock);
 	if (req->cmd_flags & REQ_DISCARD)
@@ -835,6 +839,7 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	req->cmd_flags |= REQ_NO_TIMEOUT;
 	cmd_info = blk_mq_rq_to_pdu(req);
 	nvme_set_info(cmd_info, req, async_req_completion);
 
@@ -1016,14 +1021,19 @@
 	struct nvme_command cmd;
 
 	if (!nvmeq->qid || cmd_rq->aborted) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev_list_lock, flags);
 		if (work_busy(&dev->reset_work))
-			return;
+			goto out;
 		list_del_init(&dev->node);
 		dev_warn(&dev->pci_dev->dev,
 			"I/O %d QID %d timeout, reset controller\n",
 							req->tag, nvmeq->qid);
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
+ out:
+		spin_unlock_irqrestore(&dev_list_lock, flags);
 		return;
 	}
 
@@ -1064,15 +1074,22 @@
 	void *ctx;
 	nvme_completion_fn fn;
 	struct nvme_cmd_info *cmd;
-	static struct nvme_completion cqe = {
-		.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
-	};
+	struct nvme_completion cqe;
+
+	if (!blk_mq_request_started(req))
+		return;
 
 	cmd = blk_mq_rq_to_pdu(req);
 
 	if (cmd->ctx == CMD_CTX_CANCELLED)
 		return;
 
+	if (blk_queue_dying(req->q))
+		cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+	else
+		cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
+
+
 	dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
 						req->tag, nvmeq->qid);
 	ctx = cancel_cmd_info(cmd, &fn);
@@ -1084,17 +1101,29 @@
 	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = cmd->nvmeq;
 
-	dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
-							nvmeq->qid);
-	if (nvmeq->dev->initialized)
-		nvme_abort_req(req);
-
 	/*
 	 * The aborted req will be completed on receiving the abort req.
 	 * We enable the timer again. If hit twice, it'll cause a device reset,
 	 * as the device then is in a faulty state.
 	 */
-	return BLK_EH_RESET_TIMER;
+	int ret = BLK_EH_RESET_TIMER;
+
+	dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
+							nvmeq->qid);
+
+	spin_lock_irq(&nvmeq->q_lock);
+	if (!nvmeq->dev->initialized) {
+		/*
+		 * Force cancelled command frees the request, which requires we
+		 * return BLK_EH_NOT_HANDLED.
+		 */
+		nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
+		ret = BLK_EH_NOT_HANDLED;
+	} else
+		nvme_abort_req(req);
+	spin_unlock_irq(&nvmeq->q_lock);
+
+	return ret;
 }
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1131,10 +1160,16 @@
  */
 static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
-	int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
+	int vector;
 
 	spin_lock_irq(&nvmeq->q_lock);
+	if (nvmeq->cq_vector == -1) {
+		spin_unlock_irq(&nvmeq->q_lock);
+		return 1;
+	}
+	vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
 	nvmeq->dev->online_queues--;
+	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	irq_set_affinity_hint(vector, NULL);
@@ -1169,11 +1204,13 @@
 		adapter_delete_sq(dev, qid);
 		adapter_delete_cq(dev, qid);
 	}
+	if (!qid && dev->admin_q)
+		blk_mq_freeze_queue_start(dev->admin_q);
 	nvme_clear_queue(nvmeq);
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-							int depth, int vector)
+							int depth)
 {
 	struct device *dmadev = &dev->pci_dev->dev;
 	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
@@ -1199,7 +1236,6 @@
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
-	nvmeq->cq_vector = vector;
 	nvmeq->qid = qid;
 	dev->queue_count++;
 	dev->queues[qid] = nvmeq;
@@ -1244,6 +1280,7 @@
 	struct nvme_dev *dev = nvmeq->dev;
 	int result;
 
+	nvmeq->cq_vector = qid - 1;
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)
 		return result;
@@ -1355,6 +1392,14 @@
 	.timeout	= nvme_timeout,
 };
 
+static void nvme_dev_remove_admin(struct nvme_dev *dev)
+{
+	if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
+		blk_cleanup_queue(dev->admin_q);
+		blk_mq_free_tag_set(&dev->admin_tagset);
+	}
+}
+
 static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 {
 	if (!dev->admin_q) {
@@ -1370,21 +1415,20 @@
 			return -ENOMEM;
 
 		dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
-		if (!dev->admin_q) {
+		if (IS_ERR(dev->admin_q)) {
 			blk_mq_free_tag_set(&dev->admin_tagset);
 			return -ENOMEM;
 		}
-	}
+		if (!blk_get_queue(dev->admin_q)) {
+			nvme_dev_remove_admin(dev);
+			return -ENODEV;
+		}
+	} else
+		blk_mq_unfreeze_queue(dev->admin_q);
 
 	return 0;
 }
 
-static void nvme_free_admin_tags(struct nvme_dev *dev)
-{
-	if (dev->admin_q)
-		blk_mq_free_tag_set(&dev->admin_tagset);
-}
-
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1416,7 +1460,7 @@
 
 	nvmeq = dev->queues[0];
 	if (!nvmeq) {
-		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0);
+		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
 		if (!nvmeq)
 			return -ENOMEM;
 	}
@@ -1439,18 +1483,13 @@
 	if (result)
 		goto free_nvmeq;
 
-	result = nvme_alloc_admin_tags(dev);
+	nvmeq->cq_vector = 0;
+	result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
 	if (result)
 		goto free_nvmeq;
 
-	result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
-	if (result)
-		goto free_tags;
-
 	return result;
 
- free_tags:
-	nvme_free_admin_tags(dev);
  free_nvmeq:
 	nvme_free_queues(dev, 0);
 	return result;
@@ -1944,7 +1983,7 @@
 	unsigned i;
 
 	for (i = dev->queue_count; i <= dev->max_qid; i++)
-		if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1))
+		if (!nvme_alloc_queue(dev, i, dev->q_depth))
 			break;
 
 	for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
@@ -2235,13 +2274,18 @@
 			break;
 		if (!schedule_timeout(ADMIN_TIMEOUT) ||
 					fatal_signal_pending(current)) {
+			/*
+			 * Disable the controller first since we can't trust it
+			 * at this point, but leave the admin queue enabled
+			 * until all queue deletion requests are flushed.
+			 * FIXME: This may take a while if there are more h/w
+			 * queues than admin tags.
+			 */
 			set_current_state(TASK_RUNNING);
-
 			nvme_disable_ctrl(dev, readq(&dev->bar->cap));
-			nvme_disable_queue(dev, 0);
-
-			send_sig(SIGKILL, dq->worker->task, 1);
+			nvme_clear_queue(dev->queues[0]);
 			flush_kthread_worker(dq->worker);
+			nvme_disable_queue(dev, 0);
 			return;
 		}
 	}
@@ -2318,7 +2362,6 @@
 {
 	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
 							cmdinfo.work);
-	allow_signal(SIGKILL);
 	if (nvme_delete_sq(nvmeq))
 		nvme_del_queue_end(nvmeq);
 }
@@ -2376,6 +2419,34 @@
 		kthread_stop(tmp);
 }
 
+static void nvme_freeze_queues(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		blk_mq_freeze_queue_start(ns->queue);
+
+		spin_lock(ns->queue->queue_lock);
+		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+		spin_unlock(ns->queue->queue_lock);
+
+		blk_mq_cancel_requeue_work(ns->queue);
+		blk_mq_stop_hw_queues(ns->queue);
+	}
+}
+
+static void nvme_unfreeze_queues(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+		blk_mq_unfreeze_queue(ns->queue);
+		blk_mq_start_stopped_hw_queues(ns->queue, true);
+		blk_mq_kick_requeue_list(ns->queue);
+	}
+}
+
 static void nvme_dev_shutdown(struct nvme_dev *dev)
 {
 	int i;
@@ -2384,8 +2455,10 @@
 	dev->initialized = 0;
 	nvme_dev_list_remove(dev);
 
-	if (dev->bar)
+	if (dev->bar) {
+		nvme_freeze_queues(dev);
 		csts = readl(&dev->bar->csts);
+	}
 	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
 		for (i = dev->queue_count - 1; i >= 0; i--) {
 			struct nvme_queue *nvmeq = dev->queues[i];
@@ -2400,12 +2473,6 @@
 	nvme_dev_unmap(dev);
 }
 
-static void nvme_dev_remove_admin(struct nvme_dev *dev)
-{
-	if (dev->admin_q && !blk_queue_dying(dev->admin_q))
-		blk_cleanup_queue(dev->admin_q);
-}
-
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
@@ -2413,8 +2480,10 @@
 	list_for_each_entry(ns, &dev->namespaces, list) {
 		if (ns->disk->flags & GENHD_FL_UP)
 			del_gendisk(ns->disk);
-		if (!blk_queue_dying(ns->queue))
+		if (!blk_queue_dying(ns->queue)) {
+			blk_mq_abort_requeue_list(ns->queue);
 			blk_cleanup_queue(ns->queue);
+		}
 	}
 }
 
@@ -2495,6 +2564,7 @@
 	nvme_free_namespaces(dev);
 	nvme_release_instance(dev);
 	blk_mq_free_tag_set(&dev->tagset);
+	blk_put_queue(dev->admin_q);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2591,15 +2661,20 @@
 	}
 
 	nvme_init_queue(dev->queues[0], 0);
+	result = nvme_alloc_admin_tags(dev);
+	if (result)
+		goto disable;
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
-		goto disable;
+		goto free_tags;
 
 	nvme_set_irq_hints(dev);
 
 	return result;
 
+ free_tags:
+	nvme_dev_remove_admin(dev);
  disable:
 	nvme_disable_queue(dev, 0);
 	nvme_dev_list_remove(dev);
@@ -2639,6 +2714,9 @@
 		dev->reset_workfn = nvme_remove_disks;
 		queue_work(nvme_workq, &dev->reset_work);
 		spin_unlock(&dev_list_lock);
+	} else {
+		nvme_unfreeze_queues(dev);
+		nvme_set_irq_hints(dev);
 	}
 	dev->initialized = 1;
 	return 0;
@@ -2776,11 +2854,10 @@
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
 	misc_deregister(&dev->miscdev);
-	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
+	nvme_dev_remove(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
-	nvme_free_admin_tags(dev);
 	nvme_release_prp_pools(dev);
 	kref_put(&dev->kref, nvme_free_dev);
 }
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3ec85df..8a86b62 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2098,32 +2098,26 @@
  * If an image has a non-zero parent overlap, get a reference to its
  * parent.
  *
- * We must get the reference before checking for the overlap to
- * coordinate properly with zeroing the parent overlap in
- * rbd_dev_v2_parent_info() when an image gets flattened.  We
- * drop it again if there is no overlap.
- *
  * Returns true if the rbd device has a parent with a non-zero
  * overlap and a reference for it was successfully taken, or
  * false otherwise.
  */
 static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
 {
-	int counter;
+	int counter = 0;
 
 	if (!rbd_dev->parent_spec)
 		return false;
 
-	counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
-	if (counter > 0 && rbd_dev->parent_overlap)
-		return true;
-
-	/* Image was flattened, but parent is not yet torn down */
+	down_read(&rbd_dev->header_rwsem);
+	if (rbd_dev->parent_overlap)
+		counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
+	up_read(&rbd_dev->header_rwsem);
 
 	if (counter < 0)
 		rbd_warn(rbd_dev, "parent reference overflow");
 
-	return false;
+	return counter > 0;
 }
 
 /*
@@ -4239,7 +4233,6 @@
 		 */
 		if (rbd_dev->parent_overlap) {
 			rbd_dev->parent_overlap = 0;
-			smp_mb();
 			rbd_dev_parent_put(rbd_dev);
 			pr_info("%s: clone image has been flattened\n",
 				rbd_dev->disk->disk_name);
@@ -4285,7 +4278,6 @@
 	 * treat it specially.
 	 */
 	rbd_dev->parent_overlap = overlap;
-	smp_mb();
 	if (!overlap) {
 
 		/* A null parent_spec indicates it's the initial probe */
@@ -5114,10 +5106,7 @@
 {
 	struct rbd_image_header	*header;
 
-	/* Drop parent reference unless it's already been done (or none) */
-
-	if (rbd_dev->parent_overlap)
-		rbd_dev_parent_put(rbd_dev);
+	rbd_dev_parent_put(rbd_dev);
 
 	/* Free dynamic fields from the header, then zero it out */
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7ef7c09..cdfbd21 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -638,7 +638,7 @@
 		goto out_put_disk;
 
 	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
-	if (!q) {
+	if (IS_ERR(q)) {
 		err = -ENOMEM;
 		goto out_free_tags;
 	}
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 860da40..0ce5e2d 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -1312,6 +1312,9 @@
 	if (!np)
 		return -ENODEV;
 
+	if (!of_device_is_available(np))
+		return -ENODEV;
+
 	cci_config = of_match_node(arm_cci_matches, np)->data;
 	if (!cci_config)
 		return -ENODEV;
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index eb7682d..81bf297 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -210,12 +210,25 @@
 }
 
 /* Checks whether the given window number is available */
+
+/* On Armada XP, 375 and 38x the MBus window 13 has the remap
+ * capability, like windows 0 to 7. However, the mvebu-mbus driver
+ * isn't currently taking into account this special case, which means
+ * that when window 13 is actually used, the remap registers are left
+ * to 0, making the device using this MBus window unavailable. The
+ * quick fix for stable is to not use window 13. A follow up patch
+ * will correctly handle this window.
+*/
 static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus,
 				     const int win)
 {
 	void __iomem *addr = mbus->mbuswins_base +
 		mbus->soc->win_cfg_offset(win);
 	u32 ctrl = readl(addr + WIN_CTRL_OFF);
+
+	if (win == 13)
+		return false;
+
 	return !(ctrl & WIN_CTRL_ENABLE);
 }
 
diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c
index 32f7c1b..2f13bd5 100644
--- a/drivers/clk/at91/clk-slow.c
+++ b/drivers/clk/at91/clk-slow.c
@@ -70,6 +70,7 @@
 
 #define to_clk_sam9x5_slow(hw) container_of(hw, struct clk_sam9x5_slow, hw)
 
+static struct clk *slow_clk;
 
 static int clk_slow_osc_prepare(struct clk_hw *hw)
 {
@@ -357,6 +358,8 @@
 	clk = clk_register(NULL, &slowck->hw);
 	if (IS_ERR(clk))
 		kfree(slowck);
+	else
+		slow_clk = clk;
 
 	return clk;
 }
@@ -433,6 +436,8 @@
 	clk = clk_register(NULL, &slowck->hw);
 	if (IS_ERR(clk))
 		kfree(slowck);
+	else
+		slow_clk = clk;
 
 	return clk;
 }
@@ -465,3 +470,25 @@
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+
+/*
+ * FIXME: All slow clk users are not properly claiming it (get + prepare +
+ * enable) before using it.
+ * If all users properly claiming this clock decide that they don't need it
+ * anymore (or are removed), it is disabled while faulty users are still
+ * requiring it, and the system hangs.
+ * Prevent this clock from being disabled until all users are properly
+ * requesting it.
+ * Once this is done we should remove this function and the slow_clk variable.
+ */
+static int __init of_at91_clk_slow_retain(void)
+{
+	if (!slow_clk)
+		return 0;
+
+	__clk_get(slow_clk);
+	clk_prepare_enable(slow_clk);
+
+	return 0;
+}
+arch_initcall(of_at91_clk_slow_retain);
diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c
index 21784e4..440ef81 100644
--- a/drivers/clk/berlin/bg2q.c
+++ b/drivers/clk/berlin/bg2q.c
@@ -285,7 +285,6 @@
 	{ "pbridge",	"perif",	15, CLK_IGNORE_UNUSED },
 	{ "sdio",	"perif",	16, CLK_IGNORE_UNUSED },
 	{ "nfc",	"perif",	18 },
-	{ "smemc",	"perif",	19 },
 	{ "pcie",	"perif",	22 },
 };
 
diff --git a/drivers/clk/clk-ppc-corenet.c b/drivers/clk/clk-ppc-corenet.c
index b6e6c85..0a47d6f 100644
--- a/drivers/clk/clk-ppc-corenet.c
+++ b/drivers/clk/clk-ppc-corenet.c
@@ -291,7 +291,7 @@
 	{}
 };
 
-static struct platform_driver ppc_corenet_clk_driver __initdata = {
+static struct platform_driver ppc_corenet_clk_driver = {
 	.driver = {
 		.name = "ppc_corenet_clock",
 		.of_match_table = ppc_clk_ids,
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index f4963b7..d48ac71 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1366,7 +1366,7 @@
 		new_rate = clk->ops->determine_rate(clk->hw, rate,
 						    &best_parent_rate,
 						    &parent_hw);
-		parent = parent_hw->clk;
+		parent = parent_hw ? parent_hw->clk : NULL;
 	} else if (clk->ops->round_rate) {
 		new_rate = clk->ops->round_rate(clk->hw, rate,
 						&best_parent_rate);
diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index 75c8c45..8539c4f 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -124,10 +124,11 @@
 {
 	const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data;
 	unsigned long alt_prate, alt_div;
+	unsigned long flags;
 
 	alt_prate = clk_get_rate(cpuclk->alt_parent);
 
-	spin_lock(cpuclk->lock);
+	spin_lock_irqsave(cpuclk->lock, flags);
 
 	/*
 	 * If the old parent clock speed is less than the clock speed
@@ -164,7 +165,7 @@
 			cpuclk->reg_base + reg_data->core_reg);
 	}
 
-	spin_unlock(cpuclk->lock);
+	spin_unlock_irqrestore(cpuclk->lock, flags);
 	return 0;
 }
 
@@ -173,6 +174,7 @@
 {
 	const struct rockchip_cpuclk_reg_data *reg_data = cpuclk->reg_data;
 	const struct rockchip_cpuclk_rate_table *rate;
+	unsigned long flags;
 
 	rate = rockchip_get_cpuclk_settings(cpuclk, ndata->new_rate);
 	if (!rate) {
@@ -181,7 +183,7 @@
 		return -EINVAL;
 	}
 
-	spin_lock(cpuclk->lock);
+	spin_lock_irqsave(cpuclk->lock, flags);
 
 	if (ndata->old_rate < ndata->new_rate)
 		rockchip_cpuclk_set_dividers(cpuclk, rate);
@@ -201,7 +203,7 @@
 	if (ndata->old_rate > ndata->new_rate)
 		rockchip_cpuclk_set_dividers(cpuclk, rate);
 
-	spin_unlock(cpuclk->lock);
+	spin_unlock_irqrestore(cpuclk->lock, flags);
 	return 0;
 }
 
diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index c540789..7eb684c 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c
@@ -210,6 +210,17 @@
 PNAME(mux_mac_p)		= { "gpll", "dpll" };
 PNAME(mux_sclk_macref_p)	= { "mac_src", "ext_rmii" };
 
+static struct rockchip_pll_clock rk3066_pll_clks[] __initdata = {
+	[apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0),
+		     RK2928_MODE_CON, 0, 5, 0, rk3188_pll_rates),
+	[dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4),
+		     RK2928_MODE_CON, 4, 4, 0, NULL),
+	[cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8),
+		     RK2928_MODE_CON, 8, 6, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates),
+	[gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12),
+		     RK2928_MODE_CON, 12, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates),
+};
+
 static struct rockchip_pll_clock rk3188_pll_clks[] __initdata = {
 	[apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0),
 		     RK2928_MODE_CON, 0, 6, 0, rk3188_pll_rates),
@@ -427,11 +438,11 @@
 	/* hclk_peri gates */
 	GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS),
 	GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 6, GFLAGS),
-	GATE(0, "hclk_emem_peri", "hclk_peri", 0, RK2928_CLKGATE_CON(4), 7, GFLAGS),
+	GATE(0, "hclk_emem_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 7, GFLAGS),
 	GATE(HCLK_EMAC, "hclk_emac", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS),
 	GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS),
-	GATE(0, "hclk_usb_peri", "hclk_peri", 0, RK2928_CLKGATE_CON(4), 5, GFLAGS),
-	GATE(HCLK_OTG0, "hclk_usbotg0", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 13, GFLAGS),
+	GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 5, GFLAGS),
+	GATE(HCLK_OTG0, "hclk_usbotg0", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 13, GFLAGS),
 	GATE(HCLK_HSADC, "hclk_hsadc", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 5, GFLAGS),
 	GATE(HCLK_PIDF, "hclk_pidfilter", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 6, GFLAGS),
 	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 10, GFLAGS),
@@ -592,7 +603,8 @@
 	GATE(0, "hclk_cif1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 6, GFLAGS),
 	GATE(0, "hclk_hdmi", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS),
 
-	GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 14, GFLAGS),
+	GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED,
+			RK2928_CLKGATE_CON(5), 14, GFLAGS),
 
 	GATE(0, "aclk_cif1", "aclk_vio1", 0, RK2928_CLKGATE_CON(6), 7, GFLAGS),
 
@@ -680,7 +692,8 @@
 	GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS),
 	GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS),
 
-	GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS),
+	GATE(HCLK_OTG1, "hclk_usbotg1", "hclk_peri", CLK_IGNORE_UNUSED,
+			RK2928_CLKGATE_CON(7), 3, GFLAGS),
 	GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS),
 
 	GATE(PCLK_TIMER3, "pclk_timer3", "pclk_cpu", 0, RK2928_CLKGATE_CON(7), 9, GFLAGS),
@@ -735,8 +748,8 @@
 static void __init rk3066a_clk_init(struct device_node *np)
 {
 	rk3188_common_clk_init(np);
-	rockchip_clk_register_plls(rk3188_pll_clks,
-				   ARRAY_SIZE(rk3188_pll_clks),
+	rockchip_clk_register_plls(rk3066_pll_clks,
+				   ARRAY_SIZE(rk3066_pll_clks),
 				   RK3066_GRF_SOC_STATUS);
 	rockchip_clk_register_branches(rk3066a_clk_branches,
 				  ARRAY_SIZE(rk3066a_clk_branches));
diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c
index ac6be7c..11194b8 100644
--- a/drivers/clk/rockchip/clk-rk3288.c
+++ b/drivers/clk/rockchip/clk-rk3288.c
@@ -145,20 +145,20 @@
 	}
 
 static struct rockchip_cpuclk_rate_table rk3288_cpuclk_rates[] __initdata = {
-	RK3288_CPUCLK_RATE(1800000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1704000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1608000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1512000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1416000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1200000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE(1008000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 816000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 696000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 600000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 408000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 312000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 216000000, 2, 4, 2, 4, 4),
-	RK3288_CPUCLK_RATE( 126000000, 2, 4, 2, 4, 4),
+	RK3288_CPUCLK_RATE(1800000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1704000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1608000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1512000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1416000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1200000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE(1008000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 816000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 696000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 600000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 408000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 312000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 216000000, 1, 3, 1, 3, 3),
+	RK3288_CPUCLK_RATE( 126000000, 1, 3, 1, 3, 3),
 };
 
 static const struct rockchip_cpuclk_reg_data rk3288_cpuclk_data = {
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index 0595dc6..f1e33d0 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -68,9 +68,8 @@
 }
 
 static void
-kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
+kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
 {
-	void __iomem *base = IOMEM(timer_base);
 	int loop_limit = 4;
 
 	/*
@@ -86,9 +85,9 @@
 	 */
 
 	while (--loop_limit) {
-		*msw = readl(base + KONA_GPTIMER_STCHI_OFFSET);
-		*lsw = readl(base + KONA_GPTIMER_STCLO_OFFSET);
-		if (*msw == readl(base + KONA_GPTIMER_STCHI_OFFSET))
+		*msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
+		*lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
+		if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
 			break;
 	}
 	if (!loop_limit) {
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 9403061..83564c9 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -97,8 +97,8 @@
 	writel_relaxed(value, reg_base + offset);
 
 	if (likely(offset >= EXYNOS4_MCT_L_BASE(0))) {
-		stat_addr = (offset & ~EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET;
-		switch (offset & EXYNOS4_MCT_L_MASK) {
+		stat_addr = (offset & EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET;
+		switch (offset & ~EXYNOS4_MCT_L_MASK) {
 		case MCT_L_TCON_OFFSET:
 			mask = 1 << 3;		/* L_TCON write status */
 			break;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 0f665b8..f150ca82 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -428,7 +428,7 @@
 	ced->features = CLOCK_EVT_FEAT_PERIODIC;
 	ced->features |= CLOCK_EVT_FEAT_ONESHOT;
 	ced->rating = 200;
-	ced->cpumask = cpumask_of(0);
+	ced->cpumask = cpu_possible_mask;
 	ced->set_next_event = sh_tmu_clock_event_next;
 	ced->set_mode = sh_tmu_clock_event_mode;
 	ced->suspend = sh_tmu_clock_event_suspend;
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 3804785..5c06254 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1505,7 +1505,6 @@
 	dw->regs = chip->regs;
 	chip->dw = dw;
 
-	pm_runtime_enable(chip->dev);
 	pm_runtime_get_sync(chip->dev);
 
 	dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
@@ -1703,7 +1702,6 @@
 	}
 
 	pm_runtime_put_sync_suspend(chip->dev);
-	pm_runtime_disable(chip->dev);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dw_dma_remove);
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index a630161..32ea1ac 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/clk.h>
+#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
@@ -185,6 +186,8 @@
 	if (err)
 		return err;
 
+	pm_runtime_enable(&pdev->dev);
+
 	err = dw_dma_probe(chip, pdata);
 	if (err)
 		goto err_dw_dma_probe;
@@ -205,6 +208,7 @@
 	return 0;
 
 err_dw_dma_probe:
+	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(chip->clk);
 	return err;
 }
@@ -217,6 +221,7 @@
 		of_dma_controller_free(pdev->dev.of_node);
 
 	dw_dma_remove(chip);
+	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(chip->clk);
 
 	return 0;
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 55d4803..3d9e08f 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -272,7 +272,7 @@
 	for (gpio = 0; gpio < CRYSTALCOVE_GPIO_NUM; gpio++) {
 		if (pending & BIT(gpio)) {
 			virq = irq_find_mapping(cg->chip.irqdomain, gpio);
-			generic_handle_irq(virq);
+			handle_nested_irq(virq);
 		}
 	}
 
diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c
index 978b51e..ce3c155 100644
--- a/drivers/gpio/gpio-dln2.c
+++ b/drivers/gpio/gpio-dln2.c
@@ -47,13 +47,6 @@
 
 #define DLN2_GPIO_MAX_PINS 32
 
-struct dln2_irq_work {
-	struct work_struct work;
-	struct dln2_gpio *dln2;
-	int pin;
-	int type;
-};
-
 struct dln2_gpio {
 	struct platform_device *pdev;
 	struct gpio_chip gpio;
@@ -64,10 +57,12 @@
 	 */
 	DECLARE_BITMAP(output_enabled, DLN2_GPIO_MAX_PINS);
 
-	DECLARE_BITMAP(irqs_masked, DLN2_GPIO_MAX_PINS);
-	DECLARE_BITMAP(irqs_enabled, DLN2_GPIO_MAX_PINS);
-	DECLARE_BITMAP(irqs_pending, DLN2_GPIO_MAX_PINS);
-	struct dln2_irq_work *irq_work;
+	/* active IRQs - not synced to hardware */
+	DECLARE_BITMAP(unmasked_irqs, DLN2_GPIO_MAX_PINS);
+	/* active IRQS - synced to hardware */
+	DECLARE_BITMAP(enabled_irqs, DLN2_GPIO_MAX_PINS);
+	int irq_type[DLN2_GPIO_MAX_PINS];
+	struct mutex irq_lock;
 };
 
 struct dln2_gpio_pin {
@@ -141,16 +136,16 @@
 	return !!ret;
 }
 
-static void dln2_gpio_pin_set_out_val(struct dln2_gpio *dln2,
-				      unsigned int pin, int value)
+static int dln2_gpio_pin_set_out_val(struct dln2_gpio *dln2,
+				     unsigned int pin, int value)
 {
 	struct dln2_gpio_pin_val req = {
 		.pin = cpu_to_le16(pin),
 		.value = value,
 	};
 
-	dln2_transfer_tx(dln2->pdev, DLN2_GPIO_PIN_SET_OUT_VAL, &req,
-			 sizeof(req));
+	return dln2_transfer_tx(dln2->pdev, DLN2_GPIO_PIN_SET_OUT_VAL, &req,
+				sizeof(req));
 }
 
 #define DLN2_GPIO_DIRECTION_IN		0
@@ -267,6 +262,13 @@
 static int dln2_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 				      int value)
 {
+	struct dln2_gpio *dln2 = container_of(chip, struct dln2_gpio, gpio);
+	int ret;
+
+	ret = dln2_gpio_pin_set_out_val(dln2, offset, value);
+	if (ret < 0)
+		return ret;
+
 	return dln2_gpio_set_direction(chip, offset, DLN2_GPIO_DIRECTION_OUT);
 }
 
@@ -297,36 +299,13 @@
 				&req, sizeof(req));
 }
 
-static void dln2_irq_work(struct work_struct *w)
-{
-	struct dln2_irq_work *iw = container_of(w, struct dln2_irq_work, work);
-	struct dln2_gpio *dln2 = iw->dln2;
-	u8 type = iw->type & DLN2_GPIO_EVENT_MASK;
-
-	if (test_bit(iw->pin, dln2->irqs_enabled))
-		dln2_gpio_set_event_cfg(dln2, iw->pin, type, 0);
-	else
-		dln2_gpio_set_event_cfg(dln2, iw->pin, DLN2_GPIO_EVENT_NONE, 0);
-}
-
-static void dln2_irq_enable(struct irq_data *irqd)
+static void dln2_irq_unmask(struct irq_data *irqd)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
 	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
 	int pin = irqd_to_hwirq(irqd);
 
-	set_bit(pin, dln2->irqs_enabled);
-	schedule_work(&dln2->irq_work[pin].work);
-}
-
-static void dln2_irq_disable(struct irq_data *irqd)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
-	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
-	int pin = irqd_to_hwirq(irqd);
-
-	clear_bit(pin, dln2->irqs_enabled);
-	schedule_work(&dln2->irq_work[pin].work);
+	set_bit(pin, dln2->unmasked_irqs);
 }
 
 static void dln2_irq_mask(struct irq_data *irqd)
@@ -335,27 +314,7 @@
 	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
 	int pin = irqd_to_hwirq(irqd);
 
-	set_bit(pin, dln2->irqs_masked);
-}
-
-static void dln2_irq_unmask(struct irq_data *irqd)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
-	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
-	struct device *dev = dln2->gpio.dev;
-	int pin = irqd_to_hwirq(irqd);
-
-	if (test_and_clear_bit(pin, dln2->irqs_pending)) {
-		int irq;
-
-		irq = irq_find_mapping(dln2->gpio.irqdomain, pin);
-		if (!irq) {
-			dev_err(dev, "pin %d not mapped to IRQ\n", pin);
-			return;
-		}
-
-		generic_handle_irq(irq);
-	}
+	clear_bit(pin, dln2->unmasked_irqs);
 }
 
 static int dln2_irq_set_type(struct irq_data *irqd, unsigned type)
@@ -366,19 +325,19 @@
 
 	switch (type) {
 	case IRQ_TYPE_LEVEL_HIGH:
-		dln2->irq_work[pin].type = DLN2_GPIO_EVENT_LVL_HIGH;
+		dln2->irq_type[pin] = DLN2_GPIO_EVENT_LVL_HIGH;
 		break;
 	case IRQ_TYPE_LEVEL_LOW:
-		dln2->irq_work[pin].type = DLN2_GPIO_EVENT_LVL_LOW;
+		dln2->irq_type[pin] = DLN2_GPIO_EVENT_LVL_LOW;
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
-		dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE;
+		dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE;
 		break;
 	case IRQ_TYPE_EDGE_RISING:
-		dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE_RISING;
+		dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE_RISING;
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE_FALLING;
+		dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE_FALLING;
 		break;
 	default:
 		return -EINVAL;
@@ -387,13 +346,50 @@
 	return 0;
 }
 
+static void dln2_irq_bus_lock(struct irq_data *irqd)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
+	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
+
+	mutex_lock(&dln2->irq_lock);
+}
+
+static void dln2_irq_bus_unlock(struct irq_data *irqd)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd);
+	struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio);
+	int pin = irqd_to_hwirq(irqd);
+	int enabled, unmasked;
+	unsigned type;
+	int ret;
+
+	enabled = test_bit(pin, dln2->enabled_irqs);
+	unmasked = test_bit(pin, dln2->unmasked_irqs);
+
+	if (enabled != unmasked) {
+		if (unmasked) {
+			type = dln2->irq_type[pin] & DLN2_GPIO_EVENT_MASK;
+			set_bit(pin, dln2->enabled_irqs);
+		} else {
+			type = DLN2_GPIO_EVENT_NONE;
+			clear_bit(pin, dln2->enabled_irqs);
+		}
+
+		ret = dln2_gpio_set_event_cfg(dln2, pin, type, 0);
+		if (ret)
+			dev_err(dln2->gpio.dev, "failed to set event\n");
+	}
+
+	mutex_unlock(&dln2->irq_lock);
+}
+
 static struct irq_chip dln2_gpio_irqchip = {
 	.name = "dln2-irq",
-	.irq_enable = dln2_irq_enable,
-	.irq_disable = dln2_irq_disable,
 	.irq_mask = dln2_irq_mask,
 	.irq_unmask = dln2_irq_unmask,
 	.irq_set_type = dln2_irq_set_type,
+	.irq_bus_lock = dln2_irq_bus_lock,
+	.irq_bus_sync_unlock = dln2_irq_bus_unlock,
 };
 
 static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
@@ -425,14 +421,7 @@
 		return;
 	}
 
-	if (!test_bit(pin, dln2->irqs_enabled))
-		return;
-	if (test_bit(pin, dln2->irqs_masked)) {
-		set_bit(pin, dln2->irqs_pending);
-		return;
-	}
-
-	switch (dln2->irq_work[pin].type) {
+	switch (dln2->irq_type[pin]) {
 	case DLN2_GPIO_EVENT_CHANGE_RISING:
 		if (event->value)
 			generic_handle_irq(irq);
@@ -451,7 +440,7 @@
 	struct dln2_gpio *dln2;
 	struct device *dev = &pdev->dev;
 	int pins;
-	int i, ret;
+	int ret;
 
 	pins = dln2_gpio_get_pin_count(pdev);
 	if (pins < 0) {
@@ -467,15 +456,7 @@
 	if (!dln2)
 		return -ENOMEM;
 
-	dln2->irq_work = devm_kcalloc(&pdev->dev, pins,
-				      sizeof(struct dln2_irq_work), GFP_KERNEL);
-	if (!dln2->irq_work)
-		return -ENOMEM;
-	for (i = 0; i < pins; i++) {
-		INIT_WORK(&dln2->irq_work[i].work, dln2_irq_work);
-		dln2->irq_work[i].pin = i;
-		dln2->irq_work[i].dln2 = dln2;
-	}
+	mutex_init(&dln2->irq_lock);
 
 	dln2->pdev = pdev;
 
@@ -529,11 +510,8 @@
 static int dln2_gpio_remove(struct platform_device *pdev)
 {
 	struct dln2_gpio *dln2 = platform_get_drvdata(pdev);
-	int i;
 
 	dln2_unregister_event_cb(pdev, DLN2_GPIO_CONDITION_MET_EV);
-	for (i = 0; i < dln2->gpio.ngpio; i++)
-		flush_work(&dln2->irq_work[i].work);
 	gpiochip_remove(&dln2->gpio);
 
 	return 0;
diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
index 09daaf2..3a5a710 100644
--- a/drivers/gpio/gpio-grgpio.c
+++ b/drivers/gpio/gpio-grgpio.c
@@ -441,7 +441,8 @@
 	err = gpiochip_add(gc);
 	if (err) {
 		dev_err(&ofdev->dev, "Could not add gpiochip\n");
-		irq_domain_remove(priv->domain);
+		if (priv->domain)
+			irq_domain_remove(priv->domain);
 		return err;
 	}
 
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 604dbe6..08261f2 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -45,8 +45,14 @@
 		return false;
 
 	ret = gc->of_xlate(gc, &gg_data->gpiospec, gg_data->flags);
-	if (ret < 0)
-		return false;
+	if (ret < 0) {
+		/* We've found the gpio chip, but the translation failed.
+		 * Return true to stop looking and return the translation
+		 * error via out_gpio
+		 */
+		gg_data->out_gpio = ERR_PTR(ret);
+		return true;
+	 }
 
 	gg_data->out_gpio = gpiochip_get_desc(gc, ret);
 	return true;
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 2ac1800..f62aa11 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -128,7 +128,7 @@
 	return status;
 }
 
-static const DEVICE_ATTR(value, 0644,
+static DEVICE_ATTR(value, 0644,
 		gpio_value_show, gpio_value_store);
 
 static irqreturn_t gpio_sysfs_irq(int irq, void *priv)
@@ -353,17 +353,46 @@
 	return status ? : size;
 }
 
-static const DEVICE_ATTR(active_low, 0644,
+static DEVICE_ATTR(active_low, 0644,
 		gpio_active_low_show, gpio_active_low_store);
 
-static const struct attribute *gpio_attrs[] = {
+static umode_t gpio_is_visible(struct kobject *kobj, struct attribute *attr,
+			       int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct gpio_desc *desc = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+	bool show_direction = test_bit(FLAG_SYSFS_DIR, &desc->flags);
+
+	if (attr == &dev_attr_direction.attr) {
+		if (!show_direction)
+			mode = 0;
+	} else if (attr == &dev_attr_edge.attr) {
+		if (gpiod_to_irq(desc) < 0)
+			mode = 0;
+		if (!show_direction && test_bit(FLAG_IS_OUT, &desc->flags))
+			mode = 0;
+	}
+
+	return mode;
+}
+
+static struct attribute *gpio_attrs[] = {
+	&dev_attr_direction.attr,
+	&dev_attr_edge.attr,
 	&dev_attr_value.attr,
 	&dev_attr_active_low.attr,
 	NULL,
 };
 
-static const struct attribute_group gpio_attr_group = {
-	.attrs = (struct attribute **) gpio_attrs,
+static const struct attribute_group gpio_group = {
+	.attrs = gpio_attrs,
+	.is_visible = gpio_is_visible,
+};
+
+static const struct attribute_group *gpio_groups[] = {
+	&gpio_group,
+	NULL
 };
 
 /*
@@ -400,16 +429,13 @@
 }
 static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
 
-static const struct attribute *gpiochip_attrs[] = {
+static struct attribute *gpiochip_attrs[] = {
 	&dev_attr_base.attr,
 	&dev_attr_label.attr,
 	&dev_attr_ngpio.attr,
 	NULL,
 };
-
-static const struct attribute_group gpiochip_attr_group = {
-	.attrs = (struct attribute **) gpiochip_attrs,
-};
+ATTRIBUTE_GROUPS(gpiochip);
 
 /*
  * /sys/class/gpio/export ... write-only
@@ -556,45 +582,30 @@
 		goto fail_unlock;
 	}
 
-	if (!desc->chip->direction_input || !desc->chip->direction_output)
-		direction_may_change = false;
+	if (desc->chip->direction_input && desc->chip->direction_output &&
+			direction_may_change) {
+		set_bit(FLAG_SYSFS_DIR, &desc->flags);
+	}
+
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	offset = gpio_chip_hwgpio(desc);
 	if (desc->chip->names && desc->chip->names[offset])
 		ioname = desc->chip->names[offset];
 
-	dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
-			    desc, ioname ? ioname : "gpio%u",
-			    desc_to_gpio(desc));
+	dev = device_create_with_groups(&gpio_class, desc->chip->dev,
+					MKDEV(0, 0), desc, gpio_groups,
+					ioname ? ioname : "gpio%u",
+					desc_to_gpio(desc));
 	if (IS_ERR(dev)) {
 		status = PTR_ERR(dev);
 		goto fail_unlock;
 	}
 
-	status = sysfs_create_group(&dev->kobj, &gpio_attr_group);
-	if (status)
-		goto fail_unregister_device;
-
-	if (direction_may_change) {
-		status = device_create_file(dev, &dev_attr_direction);
-		if (status)
-			goto fail_unregister_device;
-	}
-
-	if (gpiod_to_irq(desc) >= 0 && (direction_may_change ||
-				       !test_bit(FLAG_IS_OUT, &desc->flags))) {
-		status = device_create_file(dev, &dev_attr_edge);
-		if (status)
-			goto fail_unregister_device;
-	}
-
 	set_bit(FLAG_EXPORT, &desc->flags);
 	mutex_unlock(&sysfs_lock);
 	return 0;
 
-fail_unregister_device:
-	device_unregister(dev);
 fail_unlock:
 	mutex_unlock(&sysfs_lock);
 	gpiod_dbg(desc, "%s: status %d\n", __func__, status);
@@ -718,6 +729,7 @@
 		dev = class_find_device(&gpio_class, NULL, desc, match_export);
 		if (dev) {
 			gpio_setup_irq(desc, dev, 0);
+			clear_bit(FLAG_SYSFS_DIR, &desc->flags);
 			clear_bit(FLAG_EXPORT, &desc->flags);
 		} else
 			status = -ENODEV;
@@ -750,13 +762,13 @@
 
 	/* use chip->base for the ID; it's already known to be unique */
 	mutex_lock(&sysfs_lock);
-	dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
-				"gpiochip%d", chip->base);
-	if (!IS_ERR(dev)) {
-		status = sysfs_create_group(&dev->kobj,
-				&gpiochip_attr_group);
-	} else
+	dev = device_create_with_groups(&gpio_class, chip->dev, MKDEV(0, 0),
+					chip, gpiochip_groups,
+					"gpiochip%d", chip->base);
+	if (IS_ERR(dev))
 		status = PTR_ERR(dev);
+	else
+		status = 0;
 	chip->exported = (status == 0);
 	mutex_unlock(&sysfs_lock);
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 487afe6..568aa2b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -248,29 +248,30 @@
 		base = gpiochip_find_base(chip->ngpio);
 		if (base < 0) {
 			status = base;
-			goto unlock;
+			spin_unlock_irqrestore(&gpio_lock, flags);
+			goto err_free_descs;
 		}
 		chip->base = base;
 	}
 
 	status = gpiochip_add_to_list(chip);
+	if (status) {
+		spin_unlock_irqrestore(&gpio_lock, flags);
+		goto err_free_descs;
+	}
 
-	if (status == 0) {
-		for (id = 0; id < chip->ngpio; id++) {
-			struct gpio_desc *desc = &descs[id];
-			desc->chip = chip;
+	for (id = 0; id < chip->ngpio; id++) {
+		struct gpio_desc *desc = &descs[id];
 
-			/* REVISIT:  most hardware initializes GPIOs as
-			 * inputs (often with pullups enabled) so power
-			 * usage is minimized.  Linux code should set the
-			 * gpio direction first thing; but until it does,
-			 * and in case chip->get_direction is not set,
-			 * we may expose the wrong direction in sysfs.
-			 */
-			desc->flags = !chip->direction_input
-				? (1 << FLAG_IS_OUT)
-				: 0;
-		}
+		desc->chip = chip;
+
+		/* REVISIT: most hardware initializes GPIOs as inputs (often
+		 * with pullups enabled) so power usage is minimized. Linux
+		 * code should set the gpio direction first thing; but until
+		 * it does, and in case chip->get_direction is not set, we may
+		 * expose the wrong direction in sysfs.
+		 */
+		desc->flags = !chip->direction_input ? (1 << FLAG_IS_OUT) : 0;
 	}
 
 	chip->desc = descs;
@@ -284,12 +285,9 @@
 	of_gpiochip_add(chip);
 	acpi_gpiochip_add(chip);
 
-	if (status)
-		goto fail;
-
 	status = gpiochip_export(chip);
 	if (status)
-		goto fail;
+		goto err_remove_chip;
 
 	pr_debug("%s: registered GPIOs %d to %d on device: %s\n", __func__,
 		chip->base, chip->base + chip->ngpio - 1,
@@ -297,11 +295,15 @@
 
 	return 0;
 
-unlock:
+err_remove_chip:
+	acpi_gpiochip_remove(chip);
+	of_gpiochip_remove(chip);
+	spin_lock_irqsave(&gpio_lock, flags);
+	list_del(&chip->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
-fail:
-	kfree(descs);
 	chip->desc = NULL;
+err_free_descs:
+	kfree(descs);
 
 	/* failures here can mean systems won't boot... */
 	pr_err("%s: GPIOs %d..%d (%s) failed to register\n", __func__,
@@ -325,14 +327,15 @@
 	unsigned long	flags;
 	unsigned	id;
 
-	acpi_gpiochip_remove(chip);
-
-	spin_lock_irqsave(&gpio_lock, flags);
+	gpiochip_unexport(chip);
 
 	gpiochip_irqchip_remove(chip);
+
+	acpi_gpiochip_remove(chip);
 	gpiochip_remove_pin_ranges(chip);
 	of_gpiochip_remove(chip);
 
+	spin_lock_irqsave(&gpio_lock, flags);
 	for (id = 0; id < chip->ngpio; id++) {
 		if (test_bit(FLAG_REQUESTED, &chip->desc[id].flags))
 			dev_crit(chip->dev, "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n");
@@ -342,7 +345,6 @@
 
 	list_del(&chip->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
-	gpiochip_unexport(chip);
 
 	kfree(chip->desc);
 	chip->desc = NULL;
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index e3a5211..550a5ea 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -77,6 +77,7 @@
 #define FLAG_OPEN_DRAIN	7	/* Gpio is open drain type */
 #define FLAG_OPEN_SOURCE 8	/* Gpio is open source type */
 #define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
+#define FLAG_SYSFS_DIR	10	/* show sysfs direction attribute */
 
 #define ID_SHIFT	16	/* add new flags before this one */
 
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index be6246d..307a309 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -8,7 +8,6 @@
 		kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
 		kfd_process.o kfd_queue.o kfd_mqd_manager.o \
 		kfd_kernel_queue.o kfd_packet_manager.o \
-		kfd_process_queue_manager.o kfd_device_queue_manager.o \
-		kfd_interrupt.o
+		kfd_process_queue_manager.o kfd_device_queue_manager.o
 
 obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 43884eb..25bc47f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -169,9 +170,8 @@
 	kfd->shared_resources = *gpu_resources;
 
 	/* calculate max size of mqds needed for queues */
-	size = max_num_of_processes *
-		max_num_of_queues_per_process *
-		kfd->device_info->mqd_size_aligned;
+	size = max_num_of_queues_per_device *
+			kfd->device_info->mqd_size_aligned;
 
 	/* add another 512KB for all other allocations on gart */
 	size += 512 * 1024;
@@ -192,13 +192,6 @@
 		goto kfd_topology_add_device_error;
 	}
 
-	if (kfd_interrupt_init(kfd)) {
-		dev_err(kfd_device,
-			"Error initializing interrupts for device (%x:%x)\n",
-			kfd->pdev->vendor, kfd->pdev->device);
-		goto kfd_interrupt_error;
-	}
-
 	if (!device_iommu_pasid_init(kfd)) {
 		dev_err(kfd_device,
 			"Error initializing iommuv2 for device (%x:%x)\n",
@@ -237,8 +230,6 @@
 device_queue_manager_error:
 	amd_iommu_free_device(kfd->pdev);
 device_iommu_pasid_error:
-	kfd_interrupt_exit(kfd);
-kfd_interrupt_error:
 	kfd_topology_remove_device(kfd);
 kfd_topology_add_device_error:
 	kfd2kgd->fini_sa_manager(kfd->kgd);
@@ -254,7 +245,6 @@
 	if (kfd->init_complete) {
 		device_queue_manager_uninit(kfd->dqm);
 		amd_iommu_free_device(kfd->pdev);
-		kfd_interrupt_exit(kfd);
 		kfd_topology_remove_device(kfd);
 	}
 
@@ -296,13 +286,5 @@
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
-	if (kfd->init_complete) {
-		spin_lock(&kfd->interrupt_lock);
-
-		if (kfd->interrupts_active
-		    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
-			schedule_work(&kfd->interrupt_work);
-
-		spin_unlock(&kfd->interrupt_lock);
-	}
+	/* Process interrupts / schedule work as necessary */
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 9c8961d..0d8694f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -183,6 +183,13 @@
 
 	mutex_lock(&dqm->lock);
 
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
 	if (list_empty(&qpd->queues_list)) {
 		retval = allocate_vmid(dqm, qpd, q);
 		if (retval != 0) {
@@ -207,6 +214,14 @@
 	list_add(&q->list, &qpd->queues_list);
 	dqm->queue_count++;
 
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	mutex_unlock(&dqm->lock);
 	return 0;
 }
@@ -280,7 +295,7 @@
 			q->queue);
 
 	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
-			q->queue, q->properties.write_ptr);
+			q->queue, (uint32_t __user *) q->properties.write_ptr);
 	if (retval != 0) {
 		deallocate_hqd(dqm, q);
 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -326,6 +341,15 @@
 	if (list_empty(&qpd->queues_list))
 		deallocate_vmid(dqm, qpd, q);
 	dqm->queue_count--;
+
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -541,10 +565,14 @@
 
 	for (i = 0; i < pipes_num; i++) {
 		inx = i + first_pipe;
+		/*
+		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
+		 * space in GTT for pipelines we don't initialize
+		 */
 		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
 		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
 		/* = log2(bytes/4)-1 */
-		kfd2kgd->init_pipeline(dqm->dev->kgd, i,
+		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
 				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
 	}
 
@@ -560,7 +588,7 @@
 
 	pr_debug("kfd: In %s\n", __func__);
 
-	retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
+	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
 	if (retval != 0)
 		return retval;
 
@@ -752,6 +780,21 @@
 	pr_debug("kfd: In func %s\n", __func__);
 
 	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	list_add(&kq->list, &qpd->priv_queue_list);
 	dqm->queue_count++;
 	qpd->is_debug = true;
@@ -775,6 +818,13 @@
 	dqm->queue_count--;
 	qpd->is_debug = false;
 	execute_queues_cpsch(dqm, false);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
 	mutex_unlock(&dqm->lock);
 }
 
@@ -793,6 +843,13 @@
 
 	mutex_lock(&dqm->lock);
 
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
+
 	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
 	if (mqd == NULL) {
 		mutex_unlock(&dqm->lock);
@@ -810,6 +867,15 @@
 		retval = execute_queues_cpsch(dqm, false);
 	}
 
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
@@ -930,6 +996,14 @@
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
 	mutex_unlock(&dqm->lock);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c3f189e8..52035bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -130,6 +130,7 @@
 	struct list_head	queues;
 	unsigned int		processes_count;
 	unsigned int		queue_count;
+	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
 	unsigned int		vmid_bitmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
deleted file mode 100644
index 5b99909..0000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * KFD Interrupts.
- *
- * AMD GPUs deliver interrupts by pushing an interrupt description onto the
- * interrupt ring and then sending an interrupt. KGD receives the interrupt
- * in ISR and sends us a pointer to each new entry on the interrupt ring.
- *
- * We generally can't process interrupt-signaled events from ISR, so we call
- * out to each interrupt client module (currently only the scheduler) to ask if
- * each interrupt is interesting. If they return true, then it requires further
- * processing so we copy it to an internal interrupt ring and call each
- * interrupt client again from a work-queue.
- *
- * There's no acknowledgment for the interrupts we use. The hardware simply
- * queues a new interrupt each time without waiting.
- *
- * The fixed-size internal queue means that it's possible for us to lose
- * interrupts because we have no back-pressure to the hardware.
- */
-
-#include <linux/slab.h>
-#include <linux/device.h>
-#include "kfd_priv.h"
-
-#define KFD_INTERRUPT_RING_SIZE 256
-
-static void interrupt_wq(struct work_struct *);
-
-int kfd_interrupt_init(struct kfd_dev *kfd)
-{
-	void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
-					kfd->device_info->ih_ring_entry_size,
-					GFP_KERNEL);
-	if (!interrupt_ring)
-		return -ENOMEM;
-
-	kfd->interrupt_ring = interrupt_ring;
-	kfd->interrupt_ring_size =
-		KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
-	atomic_set(&kfd->interrupt_ring_wptr, 0);
-	atomic_set(&kfd->interrupt_ring_rptr, 0);
-
-	spin_lock_init(&kfd->interrupt_lock);
-
-	INIT_WORK(&kfd->interrupt_work, interrupt_wq);
-
-	kfd->interrupts_active = true;
-
-	/*
-	 * After this function returns, the interrupt will be enabled. This
-	 * barrier ensures that the interrupt running on a different processor
-	 * sees all the above writes.
-	 */
-	smp_wmb();
-
-	return 0;
-}
-
-void kfd_interrupt_exit(struct kfd_dev *kfd)
-{
-	/*
-	 * Stop the interrupt handler from writing to the ring and scheduling
-	 * workqueue items. The spinlock ensures that any interrupt running
-	 * after we have unlocked sees interrupts_active = false.
-	 */
-	unsigned long flags;
-
-	spin_lock_irqsave(&kfd->interrupt_lock, flags);
-	kfd->interrupts_active = false;
-	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
-
-	/*
-	 * Flush_scheduled_work ensures that there are no outstanding
-	 * work-queue items that will access interrupt_ring. New work items
-	 * can't be created because we stopped interrupt handling above.
-	 */
-	flush_scheduled_work();
-
-	kfree(kfd->interrupt_ring);
-}
-
-/*
- * This assumes that it can't be called concurrently with itself
- * but only with dequeue_ih_ring_entry.
- */
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry)
-{
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
-
-	if ((rptr - wptr) % kfd->interrupt_ring_size ==
-					kfd->device_info->ih_ring_entry_size) {
-		/* This is very bad, the system is likely to hang. */
-		dev_err_ratelimited(kfd_chardev(),
-			"Interrupt ring overflow, dropping interrupt.\n");
-		return false;
-	}
-
-	memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
-			kfd->device_info->ih_ring_entry_size);
-
-	wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
-	smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
-	atomic_set(&kfd->interrupt_ring_wptr, wptr);
-
-	return true;
-}
-
-/*
- * This assumes that it can't be called concurrently with itself
- * but only with enqueue_ih_ring_entry.
- */
-static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
-{
-	/*
-	 * Assume that wait queues have an implicit barrier, i.e. anything that
-	 * happened in the ISR before it queued work is visible.
-	 */
-
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-
-	if (rptr == wptr)
-		return false;
-
-	memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
-			kfd->device_info->ih_ring_entry_size);
-
-	rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
-
-	/*
-	 * Ensure the rptr write update is not visible until
-	 * memcpy has finished reading.
-	 */
-	smp_mb();
-	atomic_set(&kfd->interrupt_ring_rptr, rptr);
-
-	return true;
-}
-
-static void interrupt_wq(struct work_struct *work)
-{
-	struct kfd_dev *dev = container_of(work, struct kfd_dev,
-						interrupt_work);
-
-	uint32_t ih_ring_entry[DIV_ROUND_UP(
-				dev->device_info->ih_ring_entry_size,
-				sizeof(uint32_t))];
-
-	while (dequeue_ih_ring_entry(dev, ih_ring_entry))
-		;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 95d5af1..a8be6df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -50,15 +50,10 @@
 MODULE_PARM_DESC(sched_policy,
 	"Kernel cmdline parameter that defines the amdkfd scheduling policy");
 
-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
-
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
-	"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
 
 bool kgd2kfd_init(unsigned interface_version,
 		  const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@
 	}
 
 	/* Verify module parameters */
-	if ((max_num_of_processes < 0) ||
-		(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
-		pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
-		return -1;
-	}
-
-	if ((max_num_of_queues_per_process < 0) ||
-		(max_num_of_queues_per_process >
-			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
-		pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
+	if ((max_num_of_queues_per_device < 0) ||
+		(max_num_of_queues_per_device >
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+		pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
 		return -1;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index 4c25ef5..6cfe7f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -30,7 +30,7 @@
 
 int kfd_pasid_init(void)
 {
-	pasid_limit = max_num_of_processes;
+	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
 
 	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
 	if (!pasid_bitmap)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a5edb29..96dc10e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -52,20 +52,19 @@
 #define kfd_alloc_struct(ptr_to_struct)	\
 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
 
-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
 #define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 
 /*
- * Kernel module parameter to specify maximum number of supported queues
- * per process
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
  */
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;
 
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 
 #define KFD_KERNEL_QUEUE_SIZE 2048
 
@@ -135,22 +134,10 @@
 
 	struct kgd2kfd_shared_resources shared_resources;
 
-	void *interrupt_ring;
-	size_t interrupt_ring_size;
-	atomic_t interrupt_ring_rptr;
-	atomic_t interrupt_ring_wptr;
-	struct work_struct interrupt_work;
-	spinlock_t interrupt_lock;
-
 	/* QCM Device instance */
 	struct device_queue_manager *dqm;
 
 	bool init_complete;
-	/*
-	 * Interrupts of interest to KFD are copied
-	 * from the HW ring into a SW ring.
-	 */
-	bool interrupts_active;
 };
 
 /* KGD2KFD callbacks */
@@ -531,10 +518,7 @@
 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
 
 /* Interrupts */
-int kfd_interrupt_init(struct kfd_dev *dev);
-void kfd_interrupt_exit(struct kfd_dev *dev);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
-bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry);
 
 /* Power Management */
 void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 4752678..f37cf5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -54,11 +54,11 @@
 	pr_debug("kfd: in %s\n", __func__);
 
 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
-			max_num_of_queues_per_process);
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 
 	pr_debug("kfd: the new slot id %lu\n", found);
 
-	if (found >= max_num_of_queues_per_process) {
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
 		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
 				pqm->process->pasid);
 		return -ENOMEM;
@@ -76,7 +76,7 @@
 
 	INIT_LIST_HEAD(&pqm->queues);
 	pqm->queue_slot_bitmap =
-			kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 					BITS_PER_BYTE), GFP_KERNEL);
 	if (pqm->queue_slot_bitmap == NULL)
 		return -ENOMEM;
@@ -203,6 +203,7 @@
 		pqn->kq = NULL;
 		retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
 						&q->properties.vmid);
+		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
@@ -222,7 +223,7 @@
 	}
 
 	if (retval != 0) {
-		pr_err("kfd: error dqm create queue\n");
+		pr_debug("Error dqm create queue\n");
 		goto err_create_queue;
 	}
 
@@ -241,7 +242,10 @@
 err_create_queue:
 	kfree(pqn);
 err_allocate_pqn:
+	/* check if queues list is empty unregister process from device */
 	clear_bit(*qid, pqm->queue_slot_bitmap);
+	if (list_empty(&pqm->queues))
+		dev->dqm->unregister_process(dev->dqm, &pdd->qpd);
 	return retval;
 }
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 52ce26d..dc386eb 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -145,6 +145,31 @@
 }
 EXPORT_SYMBOL(drm_fb_helper_add_one_connector);
 
+static void remove_from_modeset(struct drm_mode_set *set,
+		struct drm_connector *connector)
+{
+	int i, j;
+
+	for (i = 0; i < set->num_connectors; i++) {
+		if (set->connectors[i] == connector)
+			break;
+	}
+
+	if (i == set->num_connectors)
+		return;
+
+	for (j = i + 1; j < set->num_connectors; j++) {
+		set->connectors[j - 1] = set->connectors[j];
+	}
+	set->num_connectors--;
+
+	/* because i915 is pissy about this..
+	 * TODO maybe need to makes sure we set it back to !=NULL somewhere?
+	 */
+	if (set->num_connectors == 0)
+		set->fb = NULL;
+}
+
 int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 				       struct drm_connector *connector)
 {
@@ -167,6 +192,11 @@
 	}
 	fb_helper->connector_count--;
 	kfree(fb_helper_connector);
+
+	/* also cleanup dangling references to the connector: */
+	for (i = 0; i < fb_helper->crtc_count; i++)
+		remove_from_modeset(&fb_helper->crtc_info[i].mode_set, connector);
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_remove_one_connector);
@@ -741,7 +771,9 @@
 	int i, j, rc = 0;
 	int start;
 
-	drm_modeset_lock_all(dev);
+	if (__drm_modeset_lock_all(dev, !!oops_in_progress)) {
+		return -EBUSY;
+	}
 	if (!drm_fb_helper_is_bound(fb_helper)) {
 		drm_modeset_unlock_all(dev);
 		return -EBUSY;
@@ -915,7 +947,9 @@
 	int ret = 0;
 	int i;
 
-	drm_modeset_lock_all(dev);
+	if (__drm_modeset_lock_all(dev, !!oops_in_progress)) {
+		return -EBUSY;
+	}
 	if (!drm_fb_helper_is_bound(fb_helper)) {
 		drm_modeset_unlock_all(dev);
 		return -EBUSY;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 121470a..1bcbe07 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -645,18 +645,6 @@
 	if (!is_exynos)
 		return -ENODEV;
 
-	/*
-	 * Register device object only in case of Exynos SoC.
-	 *
-	 * Below codes resolves temporarily infinite loop issue incurred
-	 * by Exynos drm driver when using multi-platform kernel.
-	 * So these codes will be replaced with more generic way later.
-	 */
-	if (!of_machine_is_compatible("samsung,exynos3") &&
-			!of_machine_is_compatible("samsung,exynos4") &&
-			!of_machine_is_compatible("samsung,exynos5"))
-		return -ENODEV;
-
 	exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1,
 								NULL, 0);
 	if (IS_ERR(exynos_drm_pdev))
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 5765a16..98051e8 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1669,7 +1669,6 @@
 
 static void hdmiphy_conf_reset(struct hdmi_context *hdata)
 {
-	u8 buffer[2];
 	u32 reg;
 
 	clk_disable_unprepare(hdata->res.sclk_hdmi);
@@ -1677,11 +1676,8 @@
 	clk_prepare_enable(hdata->res.sclk_hdmi);
 
 	/* operation mode */
-	buffer[0] = 0x1f;
-	buffer[1] = 0x00;
-
-	if (hdata->hdmiphy_port)
-		i2c_master_send(hdata->hdmiphy_port, buffer, 2);
+	hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE,
+				HDMI_PHY_ENABLE_MODE_SET);
 
 	if (hdata->type == HDMI_TYPE13)
 		reg = HDMI_V13_PHY_RSTOUT;
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 820b762..064ed65 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1026,6 +1026,7 @@
 static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr)
 {
 	struct mixer_context *mixer_ctx = mgr_to_mixer(mgr);
+	int err;
 
 	mutex_lock(&mixer_ctx->mixer_mutex);
 	if (!mixer_ctx->powered) {
@@ -1034,7 +1035,11 @@
 	}
 	mutex_unlock(&mixer_ctx->mixer_mutex);
 
-	drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe);
+	err = drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe);
+	if (err < 0) {
+		DRM_DEBUG_KMS("failed to acquire vblank counter\n");
+		return;
+	}
 
 	atomic_set(&mixer_ctx->wait_vsync_event, 1);
 
@@ -1262,8 +1267,6 @@
 		return ret;
 	}
 
-	pm_runtime_enable(dev);
-
 	return 0;
 }
 
@@ -1272,8 +1275,6 @@
 	struct mixer_context *ctx = dev_get_drvdata(dev);
 
 	mixer_mgr_remove(&ctx->manager);
-
-	pm_runtime_disable(dev);
 }
 
 static const struct component_ops mixer_component_ops = {
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index d476279..a9041d1 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -32,6 +32,8 @@
 struct tda998x_priv {
 	struct i2c_client *cec;
 	struct i2c_client *hdmi;
+	struct mutex mutex;
+	struct delayed_work dwork;
 	uint16_t rev;
 	uint8_t current_page;
 	int dpms;
@@ -402,9 +404,10 @@
 	uint8_t addr = REG2ADDR(reg);
 	int ret;
 
+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	ret = i2c_master_send(client, &addr, sizeof(addr));
 	if (ret < 0)
@@ -414,10 +417,12 @@
 	if (ret < 0)
 		goto fail;
 
-	return ret;
+	goto out;
 
 fail:
 	dev_err(&client->dev, "Error %d reading from 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 	return ret;
 }
 
@@ -431,13 +436,16 @@
 	buf[0] = REG2ADDR(reg);
 	memcpy(&buf[1], p, cnt);
 
+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;
 
 	ret = i2c_master_send(client, buf, cnt + 1);
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }
 
 static int
@@ -459,13 +467,16 @@
 	uint8_t buf[] = {REG2ADDR(reg), val};
 	int ret;
 
+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;
 
 	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }
 
 static void
@@ -475,13 +486,16 @@
 	uint8_t buf[] = {REG2ADDR(reg), val >> 8, val};
 	int ret;
 
+	mutex_lock(&priv->mutex);
 	ret = set_page(priv, reg);
 	if (ret < 0)
-		return;
+		goto out;
 
 	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+out:
+	mutex_unlock(&priv->mutex);
 }
 
 static void
@@ -536,6 +550,17 @@
 	reg_write(priv, REG_MUX_VP_VIP_OUT, 0x24);
 }
 
+/* handle HDMI connect/disconnect */
+static void tda998x_hpd(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct tda998x_priv *priv =
+			container_of(dwork, struct tda998x_priv, dwork);
+
+	if (priv->encoder && priv->encoder->dev)
+		drm_kms_helper_hotplug_event(priv->encoder->dev);
+}
+
 /*
  * only 2 interrupts may occur: screen plug/unplug and EDID read
  */
@@ -559,8 +584,7 @@
 		priv->wq_edid_wait = 0;
 		wake_up(&priv->wq_edid);
 	} else if (cec != 0) {			/* HPD change */
-		if (priv->encoder && priv->encoder->dev)
-			drm_helper_hpd_irq_event(priv->encoder->dev);
+		schedule_delayed_work(&priv->dwork, HZ/10);
 	}
 	return IRQ_HANDLED;
 }
@@ -1170,8 +1194,10 @@
 	/* disable all IRQs and free the IRQ handler */
 	cec_write(priv, REG_CEC_RXSHPDINTENA, 0);
 	reg_clear(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
-	if (priv->hdmi->irq)
+	if (priv->hdmi->irq) {
 		free_irq(priv->hdmi->irq, priv);
+		cancel_delayed_work_sync(&priv->dwork);
+	}
 
 	i2c_unregister_device(priv->cec);
 }
@@ -1255,6 +1281,7 @@
 	struct device_node *np = client->dev.of_node;
 	u32 video;
 	int rev_lo, rev_hi, ret;
+	unsigned short cec_addr;
 
 	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3);
 	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
@@ -1262,12 +1289,16 @@
 
 	priv->current_page = 0xff;
 	priv->hdmi = client;
-	priv->cec = i2c_new_dummy(client->adapter, 0x34);
+	/* CEC I2C address bound to TDA998x I2C addr by configuration pins */
+	cec_addr = 0x34 + (client->addr & 0x03);
+	priv->cec = i2c_new_dummy(client->adapter, cec_addr);
 	if (!priv->cec)
 		return -ENODEV;
 
 	priv->dpms = DRM_MODE_DPMS_OFF;
 
+	mutex_init(&priv->mutex);	/* protect the page access */
+
 	/* wake up the device: */
 	cec_write(priv, REG_CEC_ENAMODS,
 			CEC_ENAMODS_EN_RXSENS | CEC_ENAMODS_EN_HDMI);
@@ -1323,8 +1354,9 @@
 	if (client->irq) {
 		int irqf_trigger;
 
-		/* init read EDID waitqueue */
+		/* init read EDID waitqueue and HDP work */
 		init_waitqueue_head(&priv->wq_edid);
+		INIT_DELAYED_WORK(&priv->dwork, tda998x_hpd);
 
 		/* clear pending interrupts */
 		reg_read(priv, REG_INT_FLAGS_0);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 574057c..7643300 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -462,19 +462,13 @@
 			} else if (id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_LPT;
 				DRM_DEBUG_KMS("Found LynxPoint PCH\n");
-				WARN_ON(!IS_HASWELL(dev));
-				WARN_ON(IS_HSW_ULT(dev));
-			} else if (IS_BROADWELL(dev)) {
-				dev_priv->pch_type = PCH_LPT;
-				dev_priv->pch_id =
-					INTEL_PCH_LPT_LP_DEVICE_ID_TYPE;
-				DRM_DEBUG_KMS("This is Broadwell, assuming "
-					      "LynxPoint LP PCH\n");
+				WARN_ON(!IS_HASWELL(dev) && !IS_BROADWELL(dev));
+				WARN_ON(IS_HSW_ULT(dev) || IS_BDW_ULT(dev));
 			} else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_LPT;
 				DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
-				WARN_ON(!IS_HASWELL(dev));
-				WARN_ON(!IS_HSW_ULT(dev));
+				WARN_ON(!IS_HASWELL(dev) && !IS_BROADWELL(dev));
+				WARN_ON(!IS_HSW_ULT(dev) && !IS_BDW_ULT(dev));
 			} else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_SPT;
 				DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e9f891c..9d7a715 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2159,8 +2159,7 @@
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
 				 (INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
 #define IS_BDW_ULT(dev)		(IS_BROADWELL(dev) && \
-				 ((INTEL_DEVID(dev) & 0xf) == 0x2  || \
-				 (INTEL_DEVID(dev) & 0xf) == 0x6 || \
+				 ((INTEL_DEVID(dev) & 0xf) == 0x6 ||	\
 				 (INTEL_DEVID(dev) & 0xf) == 0xe))
 #define IS_BDW_GT3(dev)		(IS_BROADWELL(dev) && \
 				 (INTEL_DEVID(dev) & 0x00F0) == 0x0020)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c11603b..5f61482 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3148,6 +3148,13 @@
 		u32 size = i915_gem_obj_ggtt_size(obj);
 		uint64_t val;
 
+		/* Adjust fence size to match tiled area */
+		if (obj->tiling_mode != I915_TILING_NONE) {
+			uint32_t row_size = obj->stride *
+				(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
+			size = (size / row_size) * row_size;
+		}
+
 		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
 				 0xfffff000) << 32;
 		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
@@ -4884,13 +4891,12 @@
 	for (i = 0; i < NUM_L3_SLICES(dev); i++)
 		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
 
-	/*
-	 * XXX: Contexts should only be initialized once. Doing a switch to the
-	 * default context switch however is something we'd like to do after
-	 * reset or thaw (the latter may not actually be necessary for HW, but
-	 * goes with our code better). Context switching requires rings (for
-	 * the do_switch), but before enabling PPGTT. So don't move this.
-	 */
+	ret = i915_ppgtt_init_hw(dev);
+	if (ret && ret != -EIO) {
+		DRM_ERROR("PPGTT enable failed %d\n", ret);
+		i915_gem_cleanup_ringbuffer(dev);
+	}
+
 	ret = i915_gem_context_enable(dev_priv);
 	if (ret && ret != -EIO) {
 		DRM_ERROR("Context enable failed %d\n", ret);
@@ -4899,12 +4905,6 @@
 		return ret;
 	}
 
-	ret = i915_ppgtt_init_hw(dev);
-	if (ret && ret != -EIO) {
-		DRM_ERROR("PPGTT enable failed %d\n", ret);
-		i915_gem_cleanup_ringbuffer(dev);
-	}
-
 	return ret;
 }
 
@@ -5155,7 +5155,7 @@
 	if (!mutex_is_locked(mutex))
 		return false;
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
 	return mutex->owner == task;
 #else
 	/* Since UP may be pre-empted, we cannot assume that we own the lock */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d0d3dfb..b051a23 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -292,6 +292,23 @@
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
+u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask)
+{
+	/*
+	 * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer
+	 * if GEN6_PM_UP_EI_EXPIRED is masked.
+	 *
+	 * TODO: verify if this can be reproduced on VLV,CHV.
+	 */
+	if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv))
+		mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
+
+	if (INTEL_INFO(dev_priv)->gen >= 8)
+		mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP;
+
+	return mask;
+}
+
 void gen6_disable_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -304,8 +321,7 @@
 
 	spin_lock_irq(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ?
-		   ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0);
+	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
 	__gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
 	I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) &
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e2af138..e7a16f1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9815,7 +9815,7 @@
 		if (obj->tiling_mode != work->old_fb_obj->tiling_mode)
 			/* vlv: DISPLAY_FLIP fails to change tiling */
 			ring = NULL;
-	} else if (IS_IVYBRIDGE(dev)) {
+	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
 		ring = obj->ring;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 25fdbb1..3b40a17 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -794,6 +794,7 @@
 void gen6_reset_rps_interrupts(struct drm_device *dev);
 void gen6_enable_rps_interrupts(struct drm_device *dev);
 void gen6_disable_rps_interrupts(struct drm_device *dev);
+u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask);
 void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
 static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 4d63839..dfb783a 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -962,7 +962,7 @@
 
 	WARN_ON(panel->backlight.max == 0);
 
-	if (panel->backlight.level == 0) {
+	if (panel->backlight.level <= panel->backlight.min) {
 		panel->backlight.level = panel->backlight.max;
 		if (panel->backlight.device)
 			panel->backlight.device->props.brightness =
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 964b28e..bf814a6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4363,16 +4363,7 @@
 	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
 	mask &= dev_priv->pm_rps_events;
 
-	/* IVB and SNB hard hangs on looping batchbuffer
-	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-	 */
-	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED;
-
-	if (IS_GEN8(dev_priv->dev))
-		mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
-
-	return ~mask;
+	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
 }
 
 /* gen6_set_rps is called to update the frequency request, but should also be
@@ -4441,7 +4432,8 @@
 		return;
 
 	/* Mask turbo interrupt so that they will not come in between */
-	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+	I915_WRITE(GEN6_PMINTRMSK,
+		   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
 	vlv_force_gfx_clock(dev_priv, true);
 
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6dcde37..64fdae5 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -6033,6 +6033,17 @@
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 1 << vm_id);
 
+	/* wait for the invalidate to complete */
+	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
+				 WAIT_REG_MEM_ENGINE(0))); /* me */
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0); /* ref */
+	radeon_ring_write(ring, 0); /* mask */
+	radeon_ring_write(ring, 0x20); /* poll interval */
+
 	/* compute doesn't have PFP */
 	if (usepfp) {
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index dde5c7e..42cd0cf 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -816,7 +816,6 @@
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 			if (flags & R600_PTE_SYSTEM) {
 				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
 			} else if (flags & R600_PTE_VALID) {
 				value = addr;
 			} else {
@@ -903,6 +902,9 @@
 void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
 		      unsigned vm_id, uint64_t pd_addr)
 {
+	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
+
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	if (vm_id < 8) {
 		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
@@ -943,5 +945,12 @@
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 1 << vm_id);
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0); /* reference */
+	radeon_ring_write(ring, 0); /* mask */
+	radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 360de9f..aea48c8 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -2516,6 +2516,16 @@
 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
 	radeon_ring_write(ring, 1 << vm_id);
 
+	/* wait for the invalidate to complete */
+	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
+				 WAIT_REG_MEM_ENGINE(0))); /* me */
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0); /* ref */
+	radeon_ring_write(ring, 0); /* mask */
+	radeon_ring_write(ring, 0x20); /* poll interval */
+
 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 	radeon_ring_write(ring, 0x0);
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 50f8861..ce787a9 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -372,7 +372,6 @@
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 			if (flags & R600_PTE_SYSTEM) {
 				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
 			} else if (flags & R600_PTE_VALID) {
 				value = addr;
 			} else {
@@ -463,5 +462,11 @@
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
 	radeon_ring_write(ring, 1 << vm_id);
+
+	/* wait for invalidate to complete */
+	radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
+	radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2));
+	radeon_ring_write(ring, 0); /* mask */
+	radeon_ring_write(ring, 0); /* value */
 }
 
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 2e12e4d..ad71254 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -1133,6 +1133,23 @@
 #define	PACKET3_MEM_SEMAPHORE				0x39
 #define	PACKET3_MPEG_INDEX				0x3A
 #define	PACKET3_WAIT_REG_MEM				0x3C
+#define		WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
+                /* 0 - always
+		 * 1 - <
+		 * 2 - <=
+		 * 3 - ==
+		 * 4 - !=
+		 * 5 - >=
+		 * 6 - >
+		 */
+#define		WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
+                /* 0 - reg
+		 * 1 - mem
+		 */
+#define		WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
+                /* 0 - me
+		 * 1 - pfp
+		 */
 #define	PACKET3_MEM_WRITE				0x3D
 #define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_SURFACE_SYNC				0x43
@@ -1272,6 +1289,13 @@
 					 (1 << 21) |			\
 					 (((n) & 0xFFFFF) << 0))
 
+#define DMA_SRBM_POLL_PACKET		((9 << 28) |			\
+					 (1 << 27) |			\
+					 (1 << 26))
+
+#define DMA_SRBM_READ_PACKET		((9 << 28) |			\
+					 (1 << 27))
+
 /* async DMA Packet types */
 #define	DMA_PACKET_WRITE				  0x2
 #define	DMA_PACKET_COPY					  0x3
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 74f06d5..279801c 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -644,6 +644,7 @@
 		return r;
 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
+	rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 	return radeon_gart_table_ram_alloc(rdev);
 }
@@ -681,11 +682,16 @@
 	WREG32(RADEON_AIC_HI_ADDR, 0);
 }
 
+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
+{
+	return addr;
+}
+
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr, uint32_t flags)
+			    uint64_t entry)
 {
 	u32 *gtt = rdev->gart.ptr;
-	gtt[i] = cpu_to_le32(lower_32_bits(addr));
+	gtt[i] = cpu_to_le32(lower_32_bits(entry));
 }
 
 void r100_pci_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 064ad55..08d68f3 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -73,11 +73,8 @@
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
-void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-			      uint64_t addr, uint32_t flags)
+uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags)
 {
-	void __iomem *ptr = rdev->gart.ptr;
-
 	addr = (lower_32_bits(addr) >> 8) |
 		((upper_32_bits(addr) & 0xff) << 24);
 	if (flags & RADEON_GART_PAGE_READ)
@@ -86,10 +83,18 @@
 		addr |= R300_PTE_WRITEABLE;
 	if (!(flags & RADEON_GART_PAGE_SNOOP))
 		addr |= R300_PTE_UNSNOOPED;
+	return addr;
+}
+
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+			      uint64_t entry)
+{
+	void __iomem *ptr = rdev->gart.ptr;
+
 	/* on x86 we want this to be CPU endian, on powerpc
 	 * on powerpc without HW swappers, it'll get swapped on way
 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
-	writel(addr, ((void __iomem *)ptr) + (i * 4));
+	writel(entry, ((void __iomem *)ptr) + (i * 4));
 }
 
 int rv370_pcie_gart_init(struct radeon_device *rdev)
@@ -109,6 +114,7 @@
 		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
 	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
 	rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
+	rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry;
 	rdev->asic->gart.set_page = &rv370_pcie_gart_set_page;
 	return radeon_gart_table_vram_alloc(rdev);
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 54529b8..3f2a8d3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -242,6 +242,7 @@
  * Dummy page
  */
 struct radeon_dummy_page {
+	uint64_t	entry;
 	struct page	*page;
 	dma_addr_t	addr;
 };
@@ -645,7 +646,7 @@
 	unsigned			num_cpu_pages;
 	unsigned			table_size;
 	struct page			**pages;
-	dma_addr_t			*pages_addr;
+	uint64_t			*pages_entry;
 	bool				ready;
 };
 
@@ -1847,8 +1848,9 @@
 	/* gart */
 	struct {
 		void (*tlb_flush)(struct radeon_device *rdev);
+		uint64_t (*get_page_entry)(uint64_t addr, uint32_t flags);
 		void (*set_page)(struct radeon_device *rdev, unsigned i,
-				 uint64_t addr, uint32_t flags);
+				 uint64_t entry);
 	} gart;
 	struct {
 		int (*init)(struct radeon_device *rdev);
@@ -2852,7 +2854,8 @@
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
-#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f))
+#define radeon_gart_get_page_entry(a, f) (rdev)->asic->gart.get_page_entry((a), (f))
+#define radeon_gart_set_page(rdev, i, e) (rdev)->asic->gart.set_page((rdev), (i), (e))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count)))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 850de57..ed0e10e 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -159,11 +159,13 @@
 		DRM_INFO("Forcing AGP to PCIE mode\n");
 		rdev->flags |= RADEON_IS_PCIE;
 		rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
+		rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry;
 		rdev->asic->gart.set_page = &rv370_pcie_gart_set_page;
 	} else {
 		DRM_INFO("Forcing AGP to PCI mode\n");
 		rdev->flags |= RADEON_IS_PCI;
 		rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
+		rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
 		rdev->asic->gart.set_page = &r100_pci_gart_set_page;
 	}
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
@@ -199,6 +201,7 @@
 	.mc_wait_for_idle = &r100_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@@ -265,6 +268,7 @@
 	.mc_wait_for_idle = &r100_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@@ -333,6 +337,20 @@
 	.set_wptr = &r100_gfx_set_wptr,
 };
 
+static struct radeon_asic_ring rv515_gfx_ring = {
+	.ib_execute = &r100_ring_ib_execute,
+	.emit_fence = &r300_fence_ring_emit,
+	.emit_semaphore = &r100_semaphore_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.ring_start = &rv515_ring_start,
+	.ring_test = &r100_ring_test,
+	.ib_test = &r100_ib_test,
+	.is_lockup = &r100_gpu_is_lockup,
+	.get_rptr = &r100_gfx_get_rptr,
+	.get_wptr = &r100_gfx_get_wptr,
+	.set_wptr = &r100_gfx_set_wptr,
+};
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.fini = &r300_fini,
@@ -345,6 +363,7 @@
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &r100_pci_gart_tlb_flush,
+		.get_page_entry = &r100_pci_gart_get_page_entry,
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
@@ -411,6 +430,7 @@
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@@ -477,6 +497,7 @@
 	.mc_wait_for_idle = &r300_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
@@ -543,6 +564,7 @@
 	.mc_wait_for_idle = &rs400_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs400_gart_tlb_flush,
+		.get_page_entry = &rs400_gart_get_page_entry,
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
@@ -609,6 +631,7 @@
 	.mc_wait_for_idle = &rs600_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs600_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -677,6 +700,7 @@
 	.mc_wait_for_idle = &rs690_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rs400_gart_tlb_flush,
+		.get_page_entry = &rs400_gart_get_page_entry,
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
@@ -745,10 +769,11 @@
 	.mc_wait_for_idle = &rv515_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
+		[RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -811,10 +836,11 @@
 	.mc_wait_for_idle = &r520_mc_wait_for_idle,
 	.gart = {
 		.tlb_flush = &rv370_pcie_gart_tlb_flush,
+		.get_page_entry = &rv370_pcie_gart_get_page_entry,
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
+		[RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -905,6 +931,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -990,6 +1017,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1081,6 +1109,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1185,6 +1214,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &r600_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1303,6 +1333,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1395,6 +1426,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1486,6 +1518,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &evergreen_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
@@ -1621,6 +1654,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cayman_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@@ -1724,6 +1758,7 @@
 	.get_gpu_clock_counter = &r600_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cayman_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@@ -1857,6 +1892,7 @@
 	.get_gpu_clock_counter = &si_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &si_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@@ -2018,6 +2054,7 @@
 	.get_gpu_clock_counter = &cik_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cik_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
@@ -2125,6 +2162,7 @@
 	.get_gpu_clock_counter = &cik_get_gpu_clock_counter,
 	.gart = {
 		.tlb_flush = &cik_pcie_gart_tlb_flush,
+		.get_page_entry = &rs600_gart_get_page_entry,
 		.set_page = &rs600_gart_set_page,
 	},
 	.vm = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 2a45d54..8d787d1 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -67,8 +67,9 @@
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr, uint32_t flags);
+			    uint64_t entry);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -172,8 +173,9 @@
 				struct radeon_fence *fence);
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
+extern uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags);
 extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-				     uint64_t addr, uint32_t flags);
+				     uint64_t entry);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@@ -208,8 +210,9 @@
 extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags);
+			 uint64_t entry);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@@ -232,8 +235,9 @@
 void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
+uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags);
 void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags);
+			 uint64_t entry);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 0ec6516..bd7519f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -774,6 +774,8 @@
 		rdev->dummy_page.page = NULL;
 		return -ENOMEM;
 	}
+	rdev->dummy_page.entry = radeon_gart_get_page_entry(rdev->dummy_page.addr,
+							    RADEON_GART_PAGE_DUMMY);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 84146d5..5450fa9 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -165,6 +165,19 @@
 		radeon_bo_unpin(rdev->gart.robj);
 	radeon_bo_unreserve(rdev->gart.robj);
 	rdev->gart.table_addr = gpu_addr;
+
+	if (!r) {
+		int i;
+
+		/* We might have dropped some GART table updates while it wasn't
+		 * mapped, restore all entries
+		 */
+		for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+			radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
+		mb();
+		radeon_gart_tlb_flush(rdev);
+	}
+
 	return r;
 }
 
@@ -228,7 +241,6 @@
 	unsigned t;
 	unsigned p;
 	int i, j;
-	u64 page_base;
 
 	if (!rdev->gart.ready) {
 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
@@ -239,14 +251,12 @@
 	for (i = 0; i < pages; i++, p++) {
 		if (rdev->gart.pages[p]) {
 			rdev->gart.pages[p] = NULL;
-			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
-			page_base = rdev->gart.pages_addr[p];
 			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
+				rdev->gart.pages_entry[t] = rdev->dummy_page.entry;
 				if (rdev->gart.ptr) {
-					radeon_gart_set_page(rdev, t, page_base,
-							     RADEON_GART_PAGE_DUMMY);
+					radeon_gart_set_page(rdev, t,
+							     rdev->dummy_page.entry);
 				}
-				page_base += RADEON_GPU_PAGE_SIZE;
 			}
 		}
 	}
@@ -274,7 +284,7 @@
 {
 	unsigned t;
 	unsigned p;
-	uint64_t page_base;
+	uint64_t page_base, page_entry;
 	int i, j;
 
 	if (!rdev->gart.ready) {
@@ -285,14 +295,15 @@
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		rdev->gart.pages_addr[p] = dma_addr[i];
 		rdev->gart.pages[p] = pagelist[i];
-		if (rdev->gart.ptr) {
-			page_base = rdev->gart.pages_addr[p];
-			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
-				radeon_gart_set_page(rdev, t, page_base, flags);
-				page_base += RADEON_GPU_PAGE_SIZE;
+		page_base = dma_addr[i];
+		for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
+			page_entry = radeon_gart_get_page_entry(page_base, flags);
+			rdev->gart.pages_entry[t] = page_entry;
+			if (rdev->gart.ptr) {
+				radeon_gart_set_page(rdev, t, page_entry);
 			}
+			page_base += RADEON_GPU_PAGE_SIZE;
 		}
 	}
 	mb();
@@ -334,16 +345,15 @@
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
-	rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
-					rdev->gart.num_cpu_pages);
-	if (rdev->gart.pages_addr == NULL) {
+	rdev->gart.pages_entry = vmalloc(sizeof(uint64_t) *
+					 rdev->gart.num_gpu_pages);
+	if (rdev->gart.pages_entry == NULL) {
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
 	/* set GART entry to point to the dummy page by default */
-	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
-		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
-	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		rdev->gart.pages_entry[i] = rdev->dummy_page.entry;
 	return 0;
 }
 
@@ -356,15 +366,15 @@
  */
 void radeon_gart_fini(struct radeon_device *rdev)
 {
-	if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
+	if (rdev->gart.ready) {
 		/* unbind pages */
 		radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages);
 	}
 	rdev->gart.ready = false;
 	vfree(rdev->gart.pages);
-	vfree(rdev->gart.pages_addr);
+	vfree(rdev->gart.pages_entry);
 	rdev->gart.pages = NULL;
-	rdev->gart.pages_addr = NULL;
+	rdev->gart.pages_entry = NULL;
 
 	radeon_dummy_page_fini(rdev);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index a46f737..d0b4f7d 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -576,7 +576,7 @@
 error_free:
 	drm_free_large(vm_bos);
 
-	if (r)
+	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 8bf87f1..bef9a09 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -436,7 +436,7 @@
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
+	uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 32522cc..f7da8fe 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1287,8 +1287,39 @@
 	return ret;
 }
 
+struct radeon_dpm_quirk {
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+};
+
+/* cards with dpm stability problems */
+static struct radeon_dpm_quirk radeon_dpm_quirk_list[] = {
+	/* TURKS - https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 */
+	{ PCI_VENDOR_ID_ATI, 0x6759, 0x1682, 0x3195 },
+	/* TURKS - https://bugzilla.kernel.org/show_bug.cgi?id=83731 */
+	{ PCI_VENDOR_ID_ATI, 0x6840, 0x1179, 0xfb81 },
+	{ 0, 0, 0, 0 },
+};
+
 int radeon_pm_init(struct radeon_device *rdev)
 {
+	struct radeon_dpm_quirk *p = radeon_dpm_quirk_list;
+	bool disable_dpm = false;
+
+	/* Apply dpm quirks */
+	while (p && p->chip_device != 0) {
+		if (rdev->pdev->vendor == p->chip_vendor &&
+		    rdev->pdev->device == p->chip_device &&
+		    rdev->pdev->subsystem_vendor == p->subsys_vendor &&
+		    rdev->pdev->subsystem_device == p->subsys_device) {
+			disable_dpm = true;
+			break;
+		}
+		++p;
+	}
+
 	/* enable dpm on rv6xx+ */
 	switch (rdev->family) {
 	case CHIP_RV610:
@@ -1344,6 +1375,8 @@
 			 (!(rdev->flags & RADEON_IS_IGP)) &&
 			 (!rdev->smc_fw))
 			rdev->pm.pm_method = PM_METHOD_PROFILE;
+		else if (disable_dpm && (radeon_dpm == -1))
+			rdev->pm.pm_method = PM_METHOD_PROFILE;
 		else if (radeon_dpm == 0)
 			rdev->pm.pm_method = PM_METHOD_PROFILE;
 		else
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index cde48c4..06d2246 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -587,10 +587,8 @@
 	uint64_t result;
 
 	/* page table offset */
-	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
-
-	/* in case cpu page size != gpu page size*/
-	result |= addr & (~PAGE_MASK);
+	result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT];
+	result &= ~RADEON_GPU_PAGE_MASK;
 
 	return result;
 }
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index c5799f16..34e3235 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -212,11 +212,9 @@
 #define RS400_PTE_WRITEABLE (1 << 2)
 #define RS400_PTE_READABLE  (1 << 3)
 
-void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags)
+uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags)
 {
 	uint32_t entry;
-	u32 *gtt = rdev->gart.ptr;
 
 	entry = (lower_32_bits(addr) & PAGE_MASK) |
 		((upper_32_bits(addr) & 0xff) << 4);
@@ -226,8 +224,14 @@
 		entry |= RS400_PTE_WRITEABLE;
 	if (!(flags & RADEON_GART_PAGE_SNOOP))
 		entry |= RS400_PTE_UNSNOOPED;
-	entry = cpu_to_le32(entry);
-	gtt[i] = entry;
+	return entry;
+}
+
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t entry)
+{
+	u32 *gtt = rdev->gart.ptr;
+	gtt[i] = cpu_to_le32(lower_32_bits(entry));
 }
 
 int rs400_mc_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 9acb1c3..74bce91 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -625,11 +625,8 @@
 	radeon_gart_table_vram_free(rdev);
 }
 
-void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr, uint32_t flags)
+uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags)
 {
-	void __iomem *ptr = (void *)rdev->gart.ptr;
-
 	addr = addr & 0xFFFFFFFFFFFFF000ULL;
 	addr |= R600_PTE_SYSTEM;
 	if (flags & RADEON_GART_PAGE_VALID)
@@ -640,7 +637,14 @@
 		addr |= R600_PTE_WRITEABLE;
 	if (flags & RADEON_GART_PAGE_SNOOP)
 		addr |= R600_PTE_SNOOPED;
-	writeq(addr, ptr + (i * 8));
+	return addr;
+}
+
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t entry)
+{
+	void __iomem *ptr = (void *)rdev->gart.ptr;
+	writeq(entry, ptr + (i * 8));
 }
 
 int rs600_irq_set(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 60df444..5d89b87 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -5057,6 +5057,16 @@
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 1 << vm_id);
 
+	/* wait for the invalidate to complete */
+	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
+				 WAIT_REG_MEM_ENGINE(0))); /* me */
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0); /* ref */
+	radeon_ring_write(ring, 0); /* mask */
+	radeon_ring_write(ring, 0x20); /* poll interval */
+
 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 	radeon_ring_write(ring, 0x0);
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index f5cc777..8320792 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -123,7 +123,6 @@
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
 			if (flags & R600_PTE_SYSTEM) {
 				value = radeon_vm_map_gart(rdev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
 			} else if (flags & R600_PTE_VALID) {
 				value = addr;
 			} else {
@@ -206,6 +205,14 @@
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
 	radeon_ring_write(ring, 1 << vm_id);
+
+	/* wait for invalidate to complete */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
+	radeon_ring_write(ring, 0xff << 16); /* retry */
+	radeon_ring_write(ring, 1 << vm_id); /* mask */
+	radeon_ring_write(ring, 0); /* value */
+	radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 32e354b..eff8a64 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2908,6 +2908,22 @@
 	return ret;
 }
 
+struct si_dpm_quirk {
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+	u32 max_sclk;
+	u32 max_mclk;
+};
+
+/* cards with dpm stability problems */
+static struct si_dpm_quirk si_dpm_quirk_list[] = {
+	/* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */
+	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
+	{ 0, 0, 0, 0 },
+};
+
 static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 					struct radeon_ps *rps)
 {
@@ -2918,7 +2934,22 @@
 	u32 mclk, sclk;
 	u16 vddc, vddci;
 	u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
+	u32 max_sclk = 0, max_mclk = 0;
 	int i;
+	struct si_dpm_quirk *p = si_dpm_quirk_list;
+
+	/* Apply dpm quirks */
+	while (p && p->chip_device != 0) {
+		if (rdev->pdev->vendor == p->chip_vendor &&
+		    rdev->pdev->device == p->chip_device &&
+		    rdev->pdev->subsystem_vendor == p->subsys_vendor &&
+		    rdev->pdev->subsystem_device == p->subsys_device) {
+			max_sclk = p->max_sclk;
+			max_mclk = p->max_mclk;
+			break;
+		}
+		++p;
+	}
 
 	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
 	    ni_dpm_vblank_too_short(rdev))
@@ -2972,6 +3003,14 @@
 			if (ps->performance_levels[i].mclk > max_mclk_vddc)
 				ps->performance_levels[i].mclk = max_mclk_vddc;
 		}
+		if (max_mclk) {
+			if (ps->performance_levels[i].mclk > max_mclk)
+				ps->performance_levels[i].mclk = max_mclk;
+		}
+		if (max_sclk) {
+			if (ps->performance_levels[i].sclk > max_sclk)
+				ps->performance_levels[i].sclk = max_sclk;
+		}
 	}
 
 	/* XXX validate the min clocks required for display */
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 4069be89..8499924 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -1632,6 +1632,23 @@
 #define	PACKET3_MPEG_INDEX				0x3A
 #define	PACKET3_COPY_DW					0x3B
 #define	PACKET3_WAIT_REG_MEM				0x3C
+#define		WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
+                /* 0 - always
+		 * 1 - <
+		 * 2 - <=
+		 * 3 - ==
+		 * 4 - !=
+		 * 5 - >=
+		 * 6 - >
+		 */
+#define		WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
+                /* 0 - reg
+		 * 1 - mem
+		 */
+#define		WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
+                /* 0 - me
+		 * 1 - pfp
+		 */
 #define	PACKET3_MEM_WRITE				0x3D
 #define	PACKET3_COPY_DATA				0x40
 #define	PACKET3_CP_DMA					0x41
@@ -1835,6 +1852,7 @@
 #define	DMA_PACKET_TRAP					  0x7
 #define	DMA_PACKET_SRBM_WRITE				  0x9
 #define	DMA_PACKET_CONSTANT_FILL			  0xd
+#define	DMA_PACKET_POLL_REG_MEM				  0xe
 #define	DMA_PACKET_NOP					  0xf
 
 #define VCE_STATUS					0x20004
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 7b5d221..6c6b655 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -406,11 +406,9 @@
 		if (unlikely(ret != 0))
 			--dev_priv->num_3d_resources;
 	} else if (unhide_svga) {
-		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_ENABLE,
 			  vmw_read(dev_priv, SVGA_REG_ENABLE) &
 			  ~SVGA_REG_ENABLE_HIDE);
-		mutex_unlock(&dev_priv->hw_mutex);
 	}
 
 	mutex_unlock(&dev_priv->release_mutex);
@@ -433,13 +431,10 @@
 	mutex_lock(&dev_priv->release_mutex);
 	if (unlikely(--dev_priv->num_3d_resources == 0))
 		vmw_release_device(dev_priv);
-	else if (hide_svga) {
-		mutex_lock(&dev_priv->hw_mutex);
+	else if (hide_svga)
 		vmw_write(dev_priv, SVGA_REG_ENABLE,
 			  vmw_read(dev_priv, SVGA_REG_ENABLE) |
 			  SVGA_REG_ENABLE_HIDE);
-		mutex_unlock(&dev_priv->hw_mutex);
-	}
 
 	n3d = (int32_t) dev_priv->num_3d_resources;
 	mutex_unlock(&dev_priv->release_mutex);
@@ -600,12 +595,14 @@
 	dev_priv->dev = dev;
 	dev_priv->vmw_chipset = chipset;
 	dev_priv->last_read_seqno = (uint32_t) -100;
-	mutex_init(&dev_priv->hw_mutex);
 	mutex_init(&dev_priv->cmdbuf_mutex);
 	mutex_init(&dev_priv->release_mutex);
 	mutex_init(&dev_priv->binding_mutex);
 	rwlock_init(&dev_priv->resource_lock);
 	ttm_lock_init(&dev_priv->reservation_sem);
+	spin_lock_init(&dev_priv->hw_lock);
+	spin_lock_init(&dev_priv->waiter_lock);
+	spin_lock_init(&dev_priv->cap_lock);
 
 	for (i = vmw_res_context; i < vmw_res_max; ++i) {
 		idr_init(&dev_priv->res_idr[i]);
@@ -626,14 +623,11 @@
 
 	dev_priv->enable_fb = enable_fbdev;
 
-	mutex_lock(&dev_priv->hw_mutex);
-
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
 	svga_id = vmw_read(dev_priv, SVGA_REG_ID);
 	if (svga_id != SVGA_ID_2) {
 		ret = -ENOSYS;
 		DRM_ERROR("Unsupported SVGA ID 0x%x\n", svga_id);
-		mutex_unlock(&dev_priv->hw_mutex);
 		goto out_err0;
 	}
 
@@ -683,10 +677,8 @@
 		dev_priv->prim_bb_mem = dev_priv->vram_size;
 
 	ret = vmw_dma_masks(dev_priv);
-	if (unlikely(ret != 0)) {
-		mutex_unlock(&dev_priv->hw_mutex);
+	if (unlikely(ret != 0))
 		goto out_err0;
-	}
 
 	/*
 	 * Limit back buffer size to VRAM size.  Remove this once
@@ -695,8 +687,6 @@
 	if (dev_priv->prim_bb_mem > dev_priv->vram_size)
 		dev_priv->prim_bb_mem = dev_priv->vram_size;
 
-	mutex_unlock(&dev_priv->hw_mutex);
-
 	vmw_print_capabilities(dev_priv->capabilities);
 
 	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
@@ -1160,9 +1150,7 @@
 		if (unlikely(ret != 0))
 			return ret;
 		vmw_kms_save_vga(dev_priv);
-		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_TRACES, 0);
-		mutex_unlock(&dev_priv->hw_mutex);
 	}
 
 	if (active) {
@@ -1196,9 +1184,7 @@
 	if (!dev_priv->enable_fb) {
 		vmw_kms_restore_vga(dev_priv);
 		vmw_3d_resource_dec(dev_priv, true);
-		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
-		mutex_unlock(&dev_priv->hw_mutex);
 	}
 	return ret;
 }
@@ -1233,9 +1219,7 @@
 			DRM_ERROR("Unable to clean VRAM on master drop.\n");
 		vmw_kms_restore_vga(dev_priv);
 		vmw_3d_resource_dec(dev_priv, true);
-		mutex_lock(&dev_priv->hw_mutex);
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
-		mutex_unlock(&dev_priv->hw_mutex);
 	}
 
 	dev_priv->active_master = &dev_priv->fbdev_master;
@@ -1367,10 +1351,8 @@
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct vmw_private *dev_priv = vmw_priv(dev);
 
-	mutex_lock(&dev_priv->hw_mutex);
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
 	(void) vmw_read(dev_priv, SVGA_REG_ID);
-	mutex_unlock(&dev_priv->hw_mutex);
 
 	/**
 	 * Reclaim 3d reference held by fbdev and potentially
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 4ee799b..d26a6da 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -399,7 +399,8 @@
 	uint32_t memory_size;
 	bool has_gmr;
 	bool has_mob;
-	struct mutex hw_mutex;
+	spinlock_t hw_lock;
+	spinlock_t cap_lock;
 
 	/*
 	 * VGA registers.
@@ -449,8 +450,9 @@
 	atomic_t marker_seq;
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
-	int fence_queue_waiters; /* Protected by hw_mutex */
-	int goal_queue_waiters; /* Protected by hw_mutex */
+	spinlock_t waiter_lock;
+	int fence_queue_waiters; /* Protected by waiter_lock */
+	int goal_queue_waiters; /* Protected by waiter_lock */
 	atomic_t fifo_queue_waiters;
 	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
@@ -553,20 +555,35 @@
 	return (struct vmw_master *) master->driver_priv;
 }
 
+/*
+ * The locking here is fine-grained, so that it is performed once
+ * for every read- and write operation. This is of course costly, but we
+ * don't perform much register access in the timing critical paths anyway.
+ * Instead we have the extra benefit of being sure that we don't forget
+ * the hw lock around register accesses.
+ */
 static inline void vmw_write(struct vmw_private *dev_priv,
 			     unsigned int offset, uint32_t value)
 {
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
 	outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
 	outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
+	spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
 }
 
 static inline uint32_t vmw_read(struct vmw_private *dev_priv,
 				unsigned int offset)
 {
-	uint32_t val;
+	unsigned long irq_flags;
+	u32 val;
 
+	spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
 	outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
 	val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
+	spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
+
 	return val;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index b7594cb..945f1e0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -35,7 +35,7 @@
 	struct vmw_private *dev_priv;
 	spinlock_t lock;
 	struct list_head fence_list;
-	struct work_struct work, ping_work;
+	struct work_struct work;
 	u32 user_fence_size;
 	u32 fence_size;
 	u32 event_fence_action_size;
@@ -134,14 +134,6 @@
 	return "svga";
 }
 
-static void vmw_fence_ping_func(struct work_struct *work)
-{
-	struct vmw_fence_manager *fman =
-		container_of(work, struct vmw_fence_manager, ping_work);
-
-	vmw_fifo_ping_host(fman->dev_priv, SVGA_SYNC_GENERIC);
-}
-
 static bool vmw_fence_enable_signaling(struct fence *f)
 {
 	struct vmw_fence_obj *fence =
@@ -155,11 +147,7 @@
 	if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
 		return false;
 
-	if (mutex_trylock(&dev_priv->hw_mutex)) {
-		vmw_fifo_ping_host_locked(dev_priv, SVGA_SYNC_GENERIC);
-		mutex_unlock(&dev_priv->hw_mutex);
-	} else
-		schedule_work(&fman->ping_work);
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 
 	return true;
 }
@@ -305,7 +293,6 @@
 	INIT_LIST_HEAD(&fman->fence_list);
 	INIT_LIST_HEAD(&fman->cleanup_list);
 	INIT_WORK(&fman->work, &vmw_fence_work_func);
-	INIT_WORK(&fman->ping_work, &vmw_fence_ping_func);
 	fman->fifo_down = true;
 	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
 	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
@@ -323,7 +310,6 @@
 	bool lists_empty;
 
 	(void) cancel_work_sync(&fman->work);
-	(void) cancel_work_sync(&fman->ping_work);
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
 	lists_empty = list_empty(&fman->fence_list) &&
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 09e10ae..39f2b03 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -44,10 +44,10 @@
 		if (!dev_priv->has_mob)
 			return false;
 
-		mutex_lock(&dev_priv->hw_mutex);
+		spin_lock(&dev_priv->cap_lock);
 		vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
 		result = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
-		mutex_unlock(&dev_priv->hw_mutex);
+		spin_unlock(&dev_priv->cap_lock);
 
 		return (result != 0);
 	}
@@ -120,7 +120,6 @@
 	DRM_INFO("height %d\n", vmw_read(dev_priv, SVGA_REG_HEIGHT));
 	DRM_INFO("bpp %d\n", vmw_read(dev_priv, SVGA_REG_BITS_PER_PIXEL));
 
-	mutex_lock(&dev_priv->hw_mutex);
 	dev_priv->enable_state = vmw_read(dev_priv, SVGA_REG_ENABLE);
 	dev_priv->config_done_state = vmw_read(dev_priv, SVGA_REG_CONFIG_DONE);
 	dev_priv->traces_state = vmw_read(dev_priv, SVGA_REG_TRACES);
@@ -143,7 +142,6 @@
 	mb();
 
 	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1);
-	mutex_unlock(&dev_priv->hw_mutex);
 
 	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
 	min = ioread32(fifo_mem  + SVGA_FIFO_MIN);
@@ -160,31 +158,28 @@
 	return vmw_fifo_send_fence(dev_priv, &dummy);
 }
 
-void vmw_fifo_ping_host_locked(struct vmw_private *dev_priv, uint32_t reason)
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	static DEFINE_SPINLOCK(ping_lock);
+	unsigned long irq_flags;
 
+	/*
+	 * The ping_lock is needed because we don't have an atomic
+	 * test-and-set of the SVGA_FIFO_BUSY register.
+	 */
+	spin_lock_irqsave(&ping_lock, irq_flags);
 	if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
 		iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
 		vmw_write(dev_priv, SVGA_REG_SYNC, reason);
 	}
-}
-
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
-{
-	mutex_lock(&dev_priv->hw_mutex);
-
-	vmw_fifo_ping_host_locked(dev_priv, reason);
-
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock_irqrestore(&ping_lock, irq_flags);
 }
 
 void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 
-	mutex_lock(&dev_priv->hw_mutex);
-
 	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
 		;
@@ -198,7 +193,6 @@
 	vmw_write(dev_priv, SVGA_REG_TRACES,
 		  dev_priv->traces_state);
 
-	mutex_unlock(&dev_priv->hw_mutex);
 	vmw_marker_queue_takedown(&fifo->marker_queue);
 
 	if (likely(fifo->static_buffer != NULL)) {
@@ -271,7 +265,7 @@
 		return vmw_fifo_wait_noirq(dev_priv, bytes,
 					   interruptible, timeout);
 
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (atomic_add_return(1, &dev_priv->fifo_queue_waiters) > 0) {
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_FIFO_PROGRESS,
@@ -280,7 +274,7 @@
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 
 	if (interruptible)
 		ret = wait_event_interruptible_timeout
@@ -296,14 +290,14 @@
 	else if (likely(ret > 0))
 		ret = 0;
 
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS;
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 37881ec..69c8ce2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -135,13 +135,13 @@
 		(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
 	compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
 
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->cap_lock);
 	for (i = 0; i < max_size; ++i) {
 		vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
 		compat_cap->pairs[i][0] = i;
 		compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->cap_lock);
 
 	return 0;
 }
@@ -191,12 +191,12 @@
 		if (num > SVGA3D_DEVCAP_MAX)
 			num = SVGA3D_DEVCAP_MAX;
 
-		mutex_lock(&dev_priv->hw_mutex);
+		spin_lock(&dev_priv->cap_lock);
 		for (i = 0; i < num; ++i) {
 			vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
 			*bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
 		}
-		mutex_unlock(&dev_priv->hw_mutex);
+		spin_unlock(&dev_priv->cap_lock);
 	} else if (gb_objects) {
 		ret = vmw_fill_compat_cap(dev_priv, bounce, size);
 		if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 0c42376..9fe9827 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -62,13 +62,8 @@
 
 static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
 {
-	uint32_t busy;
 
-	mutex_lock(&dev_priv->hw_mutex);
-	busy = vmw_read(dev_priv, SVGA_REG_BUSY);
-	mutex_unlock(&dev_priv->hw_mutex);
-
-	return (busy == 0);
+	return (vmw_read(dev_priv, SVGA_REG_BUSY) == 0);
 }
 
 void vmw_update_seqno(struct vmw_private *dev_priv,
@@ -184,7 +179,7 @@
 
 void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 {
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (dev_priv->fence_queue_waiters++ == 0) {
 		unsigned long irq_flags;
 
@@ -195,12 +190,12 @@
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 }
 
 void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 {
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (--dev_priv->fence_queue_waiters == 0) {
 		unsigned long irq_flags;
 
@@ -209,13 +204,13 @@
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 }
 
 
 void vmw_goal_waiter_add(struct vmw_private *dev_priv)
 {
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (dev_priv->goal_queue_waiters++ == 0) {
 		unsigned long irq_flags;
 
@@ -226,12 +221,12 @@
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 }
 
 void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
 {
-	mutex_lock(&dev_priv->hw_mutex);
+	spin_lock(&dev_priv->waiter_lock);
 	if (--dev_priv->goal_queue_waiters == 0) {
 		unsigned long irq_flags;
 
@@ -240,7 +235,7 @@
 		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	spin_unlock(&dev_priv->waiter_lock);
 }
 
 int vmw_wait_seqno(struct vmw_private *dev_priv,
@@ -315,9 +310,7 @@
 	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
 		return;
 
-	mutex_lock(&dev_priv->hw_mutex);
 	vmw_write(dev_priv, SVGA_REG_IRQMASK, 0);
-	mutex_unlock(&dev_priv->hw_mutex);
 
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 3725b52..8725b79 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1828,9 +1828,7 @@
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct vmw_display_unit *du = vmw_connector_to_du(connector);
 
-	mutex_lock(&dev_priv->hw_mutex);
 	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
-	mutex_unlock(&dev_priv->hw_mutex);
 
 	return ((vmw_connector_to_du(connector)->unit < num_displays &&
 		 du->pref_active) ?
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 6529c09..a7de26d 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -574,6 +574,16 @@
 	  for those channels specified in the map.  This map can be provided
 	  either via platform data or the device tree bindings.
 
+config SENSORS_I5500
+	tristate "Intel 5500/5520/X58 temperature sensor"
+	depends on X86 && PCI
+	help
+	  If you say yes here you get support for the temperature
+	  sensor inside the Intel 5500, 5520 and X58 chipsets.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called i5500_temp.
+
 config SENSORS_CORETEMP
 	tristate "Intel Core/Core2/Atom temperature sensor"
 	depends on X86
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 6728064..6c94147 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -68,6 +68,7 @@
 obj-$(CONFIG_SENSORS_HIH6130)	+= hih6130.o
 obj-$(CONFIG_SENSORS_HTU21)	+= htu21.o
 obj-$(CONFIG_SENSORS_ULTRA45)	+= ultra45_env.o
+obj-$(CONFIG_SENSORS_I5500)	+= i5500_temp.o
 obj-$(CONFIG_SENSORS_I5K_AMB)	+= i5k_amb.o
 obj-$(CONFIG_SENSORS_IBMAEM)	+= ibmaem.o
 obj-$(CONFIG_SENSORS_IBMPEX)	+= ibmpex.o
diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c
new file mode 100644
index 0000000..3e3ccbf
--- /dev/null
+++ b/drivers/hwmon/i5500_temp.c
@@ -0,0 +1,149 @@
+/*
+ * i5500_temp - Driver for Intel 5500/5520/X58 chipset thermal sensor
+ *
+ * Copyright (C) 2012, 2014 Jean Delvare <jdelvare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+
+/* Register definitions from datasheet */
+#define REG_TSTHRCATA	0xE2
+#define REG_TSCTRL	0xE8
+#define REG_TSTHRRPEX	0xEB
+#define REG_TSTHRLO	0xEC
+#define REG_TSTHRHI	0xEE
+#define REG_CTHINT	0xF0
+#define REG_TSFSC	0xF3
+#define REG_CTSTS	0xF4
+#define REG_TSTHRRQPI	0xF5
+#define REG_CTCTRL	0xF7
+#define REG_TSTIMER	0xF8
+
+/*
+ * Sysfs stuff
+ */
+
+/* Sensor resolution : 0.5 degree C */
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->parent);
+	long temp;
+	u16 tsthrhi;
+	s8 tsfsc;
+
+	pci_read_config_word(pdev, REG_TSTHRHI, &tsthrhi);
+	pci_read_config_byte(pdev, REG_TSFSC, &tsfsc);
+	temp = ((long)tsthrhi - tsfsc) * 500;
+
+	return sprintf(buf, "%ld\n", temp);
+}
+
+static ssize_t show_thresh(struct device *dev,
+			   struct device_attribute *devattr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->parent);
+	int reg = to_sensor_dev_attr(devattr)->index;
+	long temp;
+	u16 tsthr;
+
+	pci_read_config_word(pdev, reg, &tsthr);
+	temp = tsthr * 500;
+
+	return sprintf(buf, "%ld\n", temp);
+}
+
+static ssize_t show_alarm(struct device *dev,
+			  struct device_attribute *devattr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->parent);
+	int nr = to_sensor_dev_attr(devattr)->index;
+	u8 ctsts;
+
+	pci_read_config_byte(pdev, REG_CTSTS, &ctsts);
+	return sprintf(buf, "%u\n", (unsigned int)ctsts & (1 << nr));
+}
+
+static DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL);
+static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_thresh, NULL, 0xE2);
+static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IRUGO, show_thresh, NULL, 0xEC);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_thresh, NULL, 0xEE);
+static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 1);
+
+static struct attribute *i5500_temp_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&sensor_dev_attr_temp1_crit.dev_attr.attr,
+	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_max.dev_attr.attr,
+	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
+	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(i5500_temp);
+
+static const struct pci_device_id i5500_temp_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3438) },
+	{ 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, i5500_temp_ids);
+
+static int i5500_temp_probe(struct pci_dev *pdev,
+			    const struct pci_device_id *id)
+{
+	int err;
+	struct device *hwmon_dev;
+	u32 tstimer;
+	s8 tsfsc;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to enable device\n");
+		return err;
+	}
+
+	pci_read_config_byte(pdev, REG_TSFSC, &tsfsc);
+	pci_read_config_dword(pdev, REG_TSTIMER, &tstimer);
+	if (tsfsc == 0x7F && tstimer == 0x07D30D40) {
+		dev_notice(&pdev->dev, "Sensor seems to be disabled\n");
+		return -ENODEV;
+	}
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(&pdev->dev,
+							   "intel5500", NULL,
+							   i5500_temp_groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static struct pci_driver i5500_temp_driver = {
+	.name = "i5500_temp",
+	.id_table = i5500_temp_ids,
+	.probe = i5500_temp_probe,
+};
+
+module_pci_driver(i5500_temp_driver);
+
+MODULE_AUTHOR("Jean Delvare <jdelvare@suse.de>");
+MODULE_DESCRIPTION("Intel 5500/5520/X58 chipset thermal sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index e37412d..b99de00 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -143,9 +143,15 @@
 	case ad7998:
 		return i2c_smbus_write_word_swapped(st->client, AD7998_CONF_REG,
 			val);
-	default:
+	case ad7992:
+	case ad7993:
+	case ad7994:
 		return i2c_smbus_write_byte_data(st->client, AD7998_CONF_REG,
 			val);
+	default:
+		/* Will be written when doing a conversion */
+		st->config = val;
+		return 0;
 	}
 }
 
@@ -155,8 +161,13 @@
 	case ad7997:
 	case ad7998:
 		return i2c_smbus_read_word_swapped(st->client, AD7998_CONF_REG);
-	default:
+	case ad7992:
+	case ad7993:
+	case ad7994:
 		return i2c_smbus_read_byte_data(st->client, AD7998_CONF_REG);
+	default:
+		/* No readback support */
+		return st->config;
 	}
 }
 
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 866fe90..90c8cb7 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -449,6 +449,9 @@
 	if (val2 == NULL)
 		val2 = &unused;
 
+	if(!iio_channel_has_info(chan->channel, info))
+		return -EINVAL;
+
 	if (chan->indio_dev->info->read_raw_multi) {
 		ret = chan->indio_dev->info->read_raw_multi(chan->indio_dev,
 					chan->channel, INDIO_MAX_RAW_ELEMENTS,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 57ecc5b..9117b7a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1114,7 +1114,8 @@
 	struct mlx4_dev	*dev = to_mdev(qp->device)->dev;
 	int err = 0;
 
-	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
+	    dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
 		return 0; /* do nothing */
 
 	ib_flow = flow_attr + 1;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index f2b9780..77ecf6d 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1520,6 +1520,8 @@
 		case 7:
 		case 8:
 		case 9:
+		case 10:
+		case 13:
 			etd->hw_version = 4;
 			break;
 		default:
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index c66d1b5..764857b 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -415,6 +415,13 @@
 		},
 	},
 	{
+		/* Acer Aspire 7738 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7738"),
+		},
+	},
+	{
 		/* Gericom Bellagio */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Gericom"),
@@ -745,6 +752,35 @@
 	{ }
 };
 
+/*
+ * Some laptops need keyboard reset before probing for the trackpad to get
+ * it detected, initialised & finally work.
+ */
+static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
+	{
+		/* Gigabyte P35 v2 - Elantech touchpad */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P35V2"),
+		},
+	},
+		{
+		/* Aorus branded Gigabyte X3 Plus - Elantech touchpad */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X3"),
+		},
+	},
+	{
+		/* Gigabyte P34 - Elantech touchpad */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "P34"),
+		},
+	},
+	{ }
+};
+
 #endif /* CONFIG_X86 */
 
 #ifdef CONFIG_PNP
@@ -1040,6 +1076,9 @@
 	if (dmi_check_system(i8042_dmi_dritek_table))
 		i8042_dritek = true;
 
+	if (dmi_check_system(i8042_dmi_kbdreset_table))
+		i8042_kbdreset = true;
+
 	/*
 	 * A20 was already enabled during early kernel init. But some buggy
 	 * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 924e4bf..986a71c 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -67,6 +67,10 @@
 module_param_named(notimeout, i8042_notimeout, bool, 0);
 MODULE_PARM_DESC(notimeout, "Ignore timeouts signalled by i8042");
 
+static bool i8042_kbdreset;
+module_param_named(kbdreset, i8042_kbdreset, bool, 0);
+MODULE_PARM_DESC(kbdreset, "Reset device connected to KBD port");
+
 #ifdef CONFIG_X86
 static bool i8042_dritek;
 module_param_named(dritek, i8042_dritek, bool, 0);
@@ -790,6 +794,16 @@
 		return -1;
 
 /*
+ * Reset keyboard (needed on some laptops to successfully detect
+ * touchpad, e.g., some Gigabyte laptop models with Elantech
+ * touchpads).
+ */
+	if (i8042_kbdreset) {
+		pr_warn("Attempting to reset device connected to KBD port\n");
+		i8042_kbd_write(NULL, (unsigned char) 0xff);
+	}
+
+/*
  * Test AUX IRQ delivery to make sure BIOS did not grab the IRQ and
  * used it for a PCI card or somethig else.
  */
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index f722a0c..c48da05 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -315,6 +315,7 @@
 	.attach_dev	= gart_iommu_attach_dev,
 	.detach_dev	= gart_iommu_detach_dev,
 	.map		= gart_iommu_map,
+	.map_sg		= default_iommu_map_sg,
 	.unmap		= gart_iommu_unmap,
 	.iova_to_phys	= gart_iommu_iova_to_phys,
 	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
@@ -395,7 +396,7 @@
 	do_gart_setup(gart, NULL);
 
 	gart_handle = gart;
-	bus_set_iommu(&platform_bus_type, &gart_iommu_ops);
+
 	return 0;
 }
 
diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c
index d111ac7..63cd031 100644
--- a/drivers/irqchip/irq-atmel-aic-common.c
+++ b/drivers/irqchip/irq-atmel-aic-common.c
@@ -28,7 +28,7 @@
 #define AT91_AIC_IRQ_MIN_PRIORITY	0
 #define AT91_AIC_IRQ_MAX_PRIORITY	7
 
-#define AT91_AIC_SRCTYPE		GENMASK(7, 6)
+#define AT91_AIC_SRCTYPE		GENMASK(6, 5)
 #define AT91_AIC_SRCTYPE_LOW		(0 << 5)
 #define AT91_AIC_SRCTYPE_FALLING	(1 << 5)
 #define AT91_AIC_SRCTYPE_HIGH		(2 << 5)
@@ -74,7 +74,7 @@
 		return -EINVAL;
 	}
 
-	*val &= AT91_AIC_SRCTYPE;
+	*val &= ~AT91_AIC_SRCTYPE;
 	*val |= aic_type;
 
 	return 0;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 86e4684..d8996bd 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1053,7 +1053,7 @@
 	 * of two entries. No, the architecture doesn't let you
 	 * express an ITT with a single entry.
 	 */
-	nr_ites = max(2, roundup_pow_of_two(nvecs));
+	nr_ites = max(2UL, roundup_pow_of_two(nvecs));
 	sz = nr_ites * its->ite_size;
 	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
 	itt = kmalloc(sz, GFP_KERNEL);
diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c
index 29b8f21..6bc2deb 100644
--- a/drivers/irqchip/irq-hip04.c
+++ b/drivers/irqchip/irq-hip04.c
@@ -381,7 +381,7 @@
 	 * It will be refined as each CPU probes its ID.
 	 */
 	for (i = 0; i < NR_HIP04_CPU_IF; i++)
-		hip04_cpu_map[i] = 0xff;
+		hip04_cpu_map[i] = 0xffff;
 
 	/*
 	 * Find out how many interrupts are supported.
diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
index 7e342df..0b0d2c0 100644
--- a/drivers/irqchip/irq-mtk-sysirq.c
+++ b/drivers/irqchip/irq-mtk-sysirq.c
@@ -137,9 +137,9 @@
 		return -ENOMEM;
 
 	chip_data->intpol_base = of_io_request_and_map(node, 0, "intpol");
-	if (!chip_data->intpol_base) {
+	if (IS_ERR(chip_data->intpol_base)) {
 		pr_err("mtk_sysirq: unable to map sysirq register\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(chip_data->intpol_base);
 		goto out_free;
 	}
 
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index 28718d3..c03f140 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -263,7 +263,7 @@
 	return ret;
 }
 
-static int __init omap_init_irq_legacy(u32 base)
+static int __init omap_init_irq_legacy(u32 base, struct device_node *node)
 {
 	int j, irq_base;
 
@@ -277,7 +277,7 @@
 		irq_base = 0;
 	}
 
-	domain = irq_domain_add_legacy(NULL, omap_nr_irqs, irq_base, 0,
+	domain = irq_domain_add_legacy(node, omap_nr_irqs, irq_base, 0,
 			&irq_domain_simple_ops, NULL);
 
 	omap_irq_soft_reset();
@@ -301,10 +301,26 @@
 {
 	int ret;
 
-	if (node)
+	/*
+	 * FIXME legacy OMAP DMA driver sitting under arch/arm/plat-omap/dma.c
+	 * depends is still not ready for linear IRQ domains; because of that
+	 * we need to temporarily "blacklist" OMAP2 and OMAP3 devices from using
+	 * linear IRQ Domain until that driver is finally fixed.
+	 */
+	if (of_device_is_compatible(node, "ti,omap2-intc") ||
+			of_device_is_compatible(node, "ti,omap3-intc")) {
+		struct resource res;
+
+		if (of_address_to_resource(node, 0, &res))
+			return -ENOMEM;
+
+		base = res.start;
+		ret = omap_init_irq_legacy(base, node);
+	} else if (node) {
 		ret = omap_init_irq_of(node);
-	else
-		ret = omap_init_irq_legacy(base);
+	} else {
+		ret = omap_init_irq_legacy(base, NULL);
+	}
 
 	if (ret == 0)
 		omap_irq_enable_protection();
diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c
index a82e542..0b38060 100644
--- a/drivers/isdn/hardware/eicon/message.c
+++ b/drivers/isdn/hardware/eicon/message.c
@@ -4880,7 +4880,7 @@
 	byte SS_Ind[] = "\x05\x02\x00\x02\x00\x00"; /* Hold_Ind struct*/
 	byte CF_Ind[] = "\x09\x02\x00\x06\x00\x00\x00\x00\x00\x00";
 	byte Interr_Err_Ind[] = "\x0a\x02\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
-	byte CONF_Ind[] = "\x09\x16\x00\x06\x00\x00\0x00\0x00\0x00\0x00";
+	byte CONF_Ind[] = "\x09\x16\x00\x06\x00\x00\x00\x00\x00\x00";
 	byte force_mt_info = false;
 	byte dir;
 	dword d;
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 26515c2..25e4197 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -330,18 +330,18 @@
 	led_dat->sata = 0;
 	led_dat->cdev.brightness = LED_OFF;
 	led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
-	/*
-	 * If available, expose the SATA activity blink capability through
-	 * a "sata" sysfs attribute.
-	 */
-	if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE)
-		led_dat->cdev.groups = netxbig_led_groups;
 	led_dat->mode_addr = template->mode_addr;
 	led_dat->mode_val = template->mode_val;
 	led_dat->bright_addr = template->bright_addr;
 	led_dat->bright_max = (1 << pdata->gpio_ext->num_data) - 1;
 	led_dat->timer = pdata->timer;
 	led_dat->num_timer = pdata->num_timer;
+	/*
+	 * If available, expose the SATA activity blink capability through
+	 * a "sata" sysfs attribute.
+	 */
+	if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE)
+		led_dat->cdev.groups = netxbig_led_groups;
 
 	return led_classdev_register(&pdev->dev, &led_dat->cdev);
 }
diff --git a/drivers/mcb/mcb-internal.h b/drivers/mcb/mcb-internal.h
index f956ef2..fb7493d 100644
--- a/drivers/mcb/mcb-internal.h
+++ b/drivers/mcb/mcb-internal.h
@@ -7,6 +7,7 @@
 #define PCI_DEVICE_ID_MEN_CHAMELEON	0x4d45
 #define CHAMELEON_FILENAME_LEN		12
 #define CHAMELEONV2_MAGIC		0xabce
+#define CHAM_HEADER_SIZE		0x200
 
 enum chameleon_descriptor_type {
 	CHAMELEON_DTYPE_GENERAL = 0x0,
diff --git a/drivers/mcb/mcb-pci.c b/drivers/mcb/mcb-pci.c
index b591819..5e1bd5d 100644
--- a/drivers/mcb/mcb-pci.c
+++ b/drivers/mcb/mcb-pci.c
@@ -17,6 +17,7 @@
 
 struct priv {
 	struct mcb_bus *bus;
+	phys_addr_t mapbase;
 	void __iomem *base;
 };
 
@@ -31,8 +32,8 @@
 
 static int mcb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct resource *res;
 	struct priv *priv;
-	phys_addr_t mapbase;
 	int ret;
 	int num_cells;
 	unsigned long flags;
@@ -47,19 +48,21 @@
 		return -ENODEV;
 	}
 
-	mapbase = pci_resource_start(pdev, 0);
-	if (!mapbase) {
+	priv->mapbase = pci_resource_start(pdev, 0);
+	if (!priv->mapbase) {
 		dev_err(&pdev->dev, "No PCI resource\n");
 		goto err_start;
 	}
 
-	ret = pci_request_region(pdev, 0, KBUILD_MODNAME);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to request PCI BARs\n");
+	res = request_mem_region(priv->mapbase, CHAM_HEADER_SIZE,
+				 KBUILD_MODNAME);
+	if (IS_ERR(res)) {
+		dev_err(&pdev->dev, "Failed to request PCI memory\n");
+		ret = PTR_ERR(res);
 		goto err_start;
 	}
 
-	priv->base = pci_iomap(pdev, 0, 0);
+	priv->base = ioremap(priv->mapbase, CHAM_HEADER_SIZE);
 	if (!priv->base) {
 		dev_err(&pdev->dev, "Cannot ioremap\n");
 		ret = -ENOMEM;
@@ -84,7 +87,7 @@
 
 	priv->bus->get_irq = mcb_pci_get_irq;
 
-	ret = chameleon_parse_cells(priv->bus, mapbase, priv->base);
+	ret = chameleon_parse_cells(priv->bus, priv->mapbase, priv->base);
 	if (ret < 0)
 		goto err_drvdata;
 	num_cells = ret;
@@ -93,8 +96,10 @@
 
 	mcb_bus_add_devices(priv->bus);
 
+	return 0;
+
 err_drvdata:
-	pci_iounmap(pdev, priv->base);
+	iounmap(priv->base);
 err_ioremap:
 	pci_release_region(pdev, 0);
 err_start:
@@ -107,6 +112,10 @@
 	struct priv *priv = pci_get_drvdata(pdev);
 
 	mcb_release_bus(priv->bus);
+
+	iounmap(priv->base);
+	release_region(priv->mapbase, CHAM_HEADER_SIZE);
+	pci_disable_device(pdev);
 }
 
 static const struct pci_device_id mcb_pci_tbl[] = {
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9fc616c..c1c0104 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -94,6 +94,9 @@
 } __packed;
 
 struct dm_cache_metadata {
+	atomic_t ref_count;
+	struct list_head list;
+
 	struct block_device *bdev;
 	struct dm_block_manager *bm;
 	struct dm_space_map *metadata_sm;
@@ -669,10 +672,10 @@
 
 /*----------------------------------------------------------------*/
 
-struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
-						 sector_t data_block_size,
-						 bool may_format_device,
-						 size_t policy_hint_size)
+static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
+					       sector_t data_block_size,
+					       bool may_format_device,
+					       size_t policy_hint_size)
 {
 	int r;
 	struct dm_cache_metadata *cmd;
@@ -680,9 +683,10 @@
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 	if (!cmd) {
 		DMERR("could not allocate metadata struct");
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
+	atomic_set(&cmd->ref_count, 1);
 	init_rwsem(&cmd->root_lock);
 	cmd->bdev = bdev;
 	cmd->data_block_size = data_block_size;
@@ -705,10 +709,96 @@
 	return cmd;
 }
 
+/*
+ * We keep a little list of ref counted metadata objects to prevent two
+ * different target instances creating separate bufio instances.  This is
+ * an issue if a table is reloaded before the suspend.
+ */
+static DEFINE_MUTEX(table_lock);
+static LIST_HEAD(table);
+
+static struct dm_cache_metadata *lookup(struct block_device *bdev)
+{
+	struct dm_cache_metadata *cmd;
+
+	list_for_each_entry(cmd, &table, list)
+		if (cmd->bdev == bdev) {
+			atomic_inc(&cmd->ref_count);
+			return cmd;
+		}
+
+	return NULL;
+}
+
+static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
+						sector_t data_block_size,
+						bool may_format_device,
+						size_t policy_hint_size)
+{
+	struct dm_cache_metadata *cmd, *cmd2;
+
+	mutex_lock(&table_lock);
+	cmd = lookup(bdev);
+	mutex_unlock(&table_lock);
+
+	if (cmd)
+		return cmd;
+
+	cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
+	if (!IS_ERR(cmd)) {
+		mutex_lock(&table_lock);
+		cmd2 = lookup(bdev);
+		if (cmd2) {
+			mutex_unlock(&table_lock);
+			__destroy_persistent_data_objects(cmd);
+			kfree(cmd);
+			return cmd2;
+		}
+		list_add(&cmd->list, &table);
+		mutex_unlock(&table_lock);
+	}
+
+	return cmd;
+}
+
+static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
+{
+	if (cmd->data_block_size != data_block_size) {
+		DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
+		      (unsigned long long) data_block_size,
+		      (unsigned long long) cmd->data_block_size);
+		return false;
+	}
+
+	return true;
+}
+
+struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
+						 sector_t data_block_size,
+						 bool may_format_device,
+						 size_t policy_hint_size)
+{
+	struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
+						       may_format_device, policy_hint_size);
+
+	if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
+		dm_cache_metadata_close(cmd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return cmd;
+}
+
 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
 {
-	__destroy_persistent_data_objects(cmd);
-	kfree(cmd);
+	if (atomic_dec_and_test(&cmd->ref_count)) {
+		mutex_lock(&table_lock);
+		list_del(&cmd->list);
+		mutex_unlock(&table_lock);
+
+		__destroy_persistent_data_objects(cmd);
+		kfree(cmd);
+	}
 }
 
 /*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1e96d78..e165053 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -221,7 +221,13 @@
 	struct list_head need_commit_migrations;
 	sector_t migration_threshold;
 	wait_queue_head_t migration_wait;
-	atomic_t nr_migrations;
+	atomic_t nr_allocated_migrations;
+
+	/*
+	 * The number of in flight migrations that are performing
+	 * background io. eg, promotion, writeback.
+	 */
+	atomic_t nr_io_migrations;
 
 	wait_queue_head_t quiescing_wait;
 	atomic_t quiescing;
@@ -258,7 +264,6 @@
 	struct dm_deferred_set *all_io_ds;
 
 	mempool_t *migration_pool;
-	struct dm_cache_migration *next_migration;
 
 	struct dm_cache_policy *policy;
 	unsigned policy_nr_args;
@@ -350,10 +355,31 @@
 	dm_bio_prison_free_cell(cache->prison, cell);
 }
 
+static struct dm_cache_migration *alloc_migration(struct cache *cache)
+{
+	struct dm_cache_migration *mg;
+
+	mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+	if (mg) {
+		mg->cache = cache;
+		atomic_inc(&mg->cache->nr_allocated_migrations);
+	}
+
+	return mg;
+}
+
+static void free_migration(struct dm_cache_migration *mg)
+{
+	if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
+		wake_up(&mg->cache->migration_wait);
+
+	mempool_free(mg, mg->cache->migration_pool);
+}
+
 static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
 {
 	if (!p->mg) {
-		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+		p->mg = alloc_migration(cache);
 		if (!p->mg)
 			return -ENOMEM;
 	}
@@ -382,7 +408,7 @@
 		free_prison_cell(cache, p->cell1);
 
 	if (p->mg)
-		mempool_free(p->mg, cache->migration_pool);
+		free_migration(p->mg);
 }
 
 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@ -854,24 +880,14 @@
  * Migration covers moving data from the origin device to the cache, or
  * vice versa.
  *--------------------------------------------------------------*/
-static void free_migration(struct dm_cache_migration *mg)
+static void inc_io_migrations(struct cache *cache)
 {
-	mempool_free(mg, mg->cache->migration_pool);
+	atomic_inc(&cache->nr_io_migrations);
 }
 
-static void inc_nr_migrations(struct cache *cache)
+static void dec_io_migrations(struct cache *cache)
 {
-	atomic_inc(&cache->nr_migrations);
-}
-
-static void dec_nr_migrations(struct cache *cache)
-{
-	atomic_dec(&cache->nr_migrations);
-
-	/*
-	 * Wake the worker in case we're suspending the target.
-	 */
-	wake_up(&cache->migration_wait);
+	atomic_dec(&cache->nr_io_migrations);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -894,11 +910,10 @@
 	wake_worker(cache);
 }
 
-static void cleanup_migration(struct dm_cache_migration *mg)
+static void free_io_migration(struct dm_cache_migration *mg)
 {
-	struct cache *cache = mg->cache;
+	dec_io_migrations(mg->cache);
 	free_migration(mg);
-	dec_nr_migrations(cache);
 }
 
 static void migration_failure(struct dm_cache_migration *mg)
@@ -923,7 +938,7 @@
 		cell_defer(cache, mg->new_ocell, true);
 	}
 
-	cleanup_migration(mg);
+	free_io_migration(mg);
 }
 
 static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@ -934,7 +949,7 @@
 	if (mg->writeback) {
 		clear_dirty(cache, mg->old_oblock, mg->cblock);
 		cell_defer(cache, mg->old_ocell, false);
-		cleanup_migration(mg);
+		free_io_migration(mg);
 		return;
 
 	} else if (mg->demote) {
@@ -944,14 +959,14 @@
 					     mg->old_oblock);
 			if (mg->promote)
 				cell_defer(cache, mg->new_ocell, true);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 			return;
 		}
 	} else {
 		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
 			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
 			policy_remove_mapping(cache->policy, mg->new_oblock);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 			return;
 		}
 	}
@@ -984,7 +999,7 @@
 		} else {
 			if (mg->invalidate)
 				policy_remove_mapping(cache->policy, mg->old_oblock);
-			cleanup_migration(mg);
+			free_io_migration(mg);
 		}
 
 	} else {
@@ -999,7 +1014,7 @@
 			bio_endio(mg->new_ocell->holder, 0);
 			cell_defer(cache, mg->new_ocell, false);
 		}
-		cleanup_migration(mg);
+		free_io_migration(mg);
 	}
 }
 
@@ -1251,7 +1266,7 @@
 	mg->new_ocell = cell;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1275,7 +1290,7 @@
 	mg->new_ocell = NULL;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1302,7 +1317,7 @@
 	mg->new_ocell = new_ocell;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1330,7 +1345,7 @@
 	mg->new_ocell = NULL;
 	mg->start_jiffies = jiffies;
 
-	inc_nr_migrations(cache);
+	inc_io_migrations(cache);
 	quiesce_migration(mg);
 }
 
@@ -1412,7 +1427,7 @@
 
 static bool spare_migration_bandwidth(struct cache *cache)
 {
-	sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 	return current_volume < cache->migration_threshold;
 }
@@ -1764,7 +1779,7 @@
 
 static void wait_for_migrations(struct cache *cache)
 {
-	wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+	wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
 }
 
 static void stop_worker(struct cache *cache)
@@ -1876,9 +1891,6 @@
 {
 	unsigned i;
 
-	if (cache->next_migration)
-		mempool_free(cache->next_migration, cache->migration_pool);
-
 	if (cache->migration_pool)
 		mempool_destroy(cache->migration_pool);
 
@@ -2424,7 +2436,8 @@
 	INIT_LIST_HEAD(&cache->quiesced_migrations);
 	INIT_LIST_HEAD(&cache->completed_migrations);
 	INIT_LIST_HEAD(&cache->need_commit_migrations);
-	atomic_set(&cache->nr_migrations, 0);
+	atomic_set(&cache->nr_allocated_migrations, 0);
+	atomic_set(&cache->nr_io_migrations, 0);
 	init_waitqueue_head(&cache->migration_wait);
 
 	init_waitqueue_head(&cache->quiescing_wait);
@@ -2487,8 +2500,6 @@
 		goto bad;
 	}
 
-	cache->next_migration = NULL;
-
 	cache->need_tick_bio = true;
 	cache->sized = false;
 	cache->invalidate = false;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4934789..07705ee 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3385,6 +3385,12 @@
 	struct pool_c *pt = ti->private;
 	struct pool *pool = pt->pool;
 
+	if (get_pool_mode(pool) >= PM_READ_ONLY) {
+		DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
+		      dm_device_name(pool->pool_md));
+		return -EINVAL;
+	}
+
 	if (!strcasecmp(argv[0], "create_thin"))
 		r = process_create_thin_mesg(argc, argv, pool);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b98cd9d..2caf5b3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -206,6 +206,9 @@
 	/* zero-length flush that will be cloned and submitted to targets */
 	struct bio flush_bio;
 
+	/* the number of internal suspends */
+	unsigned internal_suspend_count;
+
 	struct dm_stats stats;
 };
 
@@ -2928,7 +2931,7 @@
 {
 	struct dm_table *map = NULL;
 
-	if (dm_suspended_internally_md(md))
+	if (md->internal_suspend_count++)
 		return; /* nested internal suspend */
 
 	if (dm_suspended_md(md)) {
@@ -2953,7 +2956,9 @@
 
 static void __dm_internal_resume(struct mapped_device *md)
 {
-	if (!dm_suspended_internally_md(md))
+	BUG_ON(!md->internal_suspend_count);
+
+	if (--md->internal_suspend_count)
 		return; /* resume from nested internal suspend */
 
 	if (dm_suspended_md(md))
diff --git a/drivers/media/pci/cx23885/cx23885-cards.c b/drivers/media/pci/cx23885/cx23885-cards.c
index db99ca2..06931f6 100644
--- a/drivers/media/pci/cx23885/cx23885-cards.c
+++ b/drivers/media/pci/cx23885/cx23885-cards.c
@@ -614,7 +614,7 @@
 		.portb		= CX23885_MPEG_DVB,
 	},
 	[CX23885_BOARD_HAUPPAUGE_HVR4400] = {
-		.name		= "Hauppauge WinTV-HVR4400",
+		.name		= "Hauppauge WinTV-HVR4400/HVR5500",
 		.porta		= CX23885_ANALOG_VIDEO,
 		.portb		= CX23885_MPEG_DVB,
 		.portc		= CX23885_MPEG_DVB,
@@ -622,6 +622,10 @@
 		.tuner_addr	= 0x60, /* 0xc0 >> 1 */
 		.tuner_bus	= 1,
 	},
+	[CX23885_BOARD_HAUPPAUGE_STARBURST] = {
+		.name		= "Hauppauge WinTV Starburst",
+		.portb		= CX23885_MPEG_DVB,
+	},
 	[CX23885_BOARD_AVERMEDIA_HC81R] = {
 		.name		= "AVerTV Hybrid Express Slim HC81R",
 		.tuner_type	= TUNER_XC2028,
@@ -936,19 +940,19 @@
 	}, {
 		.subvendor = 0x0070,
 		.subdevice = 0xc108,
-		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400,
+		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400, /* Hauppauge WinTV HVR-4400 (Model 121xxx, Hybrid DVB-T/S2, IR) */
 	}, {
 		.subvendor = 0x0070,
 		.subdevice = 0xc138,
-		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400,
+		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400, /* Hauppauge WinTV HVR-5500 (Model 121xxx, Hybrid DVB-T/C/S2, IR) */
 	}, {
 		.subvendor = 0x0070,
 		.subdevice = 0xc12a,
-		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400,
+		.card      = CX23885_BOARD_HAUPPAUGE_STARBURST, /* Hauppauge WinTV Starburst (Model 121x00, DVB-S2, IR) */
 	}, {
 		.subvendor = 0x0070,
 		.subdevice = 0xc1f8,
-		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400,
+		.card      = CX23885_BOARD_HAUPPAUGE_HVR4400, /* Hauppauge WinTV HVR-5500 (Model 121xxx, Hybrid DVB-T/C/S2, IR) */
 	}, {
 		.subvendor = 0x1461,
 		.subdevice = 0xd939,
@@ -1545,8 +1549,9 @@
 		cx_write(GPIO_ISM, 0x00000000);/* INTERRUPTS active low*/
 		break;
 	case CX23885_BOARD_HAUPPAUGE_HVR4400:
+	case CX23885_BOARD_HAUPPAUGE_STARBURST:
 		/* GPIO-8 tda10071 demod reset */
-		/* GPIO-9 si2165 demod reset */
+		/* GPIO-9 si2165 demod reset (only HVR4400/HVR5500)*/
 
 		/* Put the parts into reset and back */
 		cx23885_gpio_enable(dev, GPIO_8 | GPIO_9, 1);
@@ -1872,6 +1877,7 @@
 	case CX23885_BOARD_HAUPPAUGE_HVR1850:
 	case CX23885_BOARD_HAUPPAUGE_HVR1290:
 	case CX23885_BOARD_HAUPPAUGE_HVR4400:
+	case CX23885_BOARD_HAUPPAUGE_STARBURST:
 	case CX23885_BOARD_HAUPPAUGE_IMPACTVCBE:
 		if (dev->i2c_bus[0].i2c_rc == 0)
 			hauppauge_eeprom(dev, eeprom+0xc0);
@@ -1980,6 +1986,11 @@
 		ts2->ts_clk_en_val = 0x1; /* Enable TS_CLK */
 		ts2->src_sel_val   = CX23885_SRC_SEL_PARALLEL_MPEG_VIDEO;
 		break;
+	case CX23885_BOARD_HAUPPAUGE_STARBURST:
+		ts1->gen_ctrl_val  = 0xc; /* Serial bus + punctured clock */
+		ts1->ts_clk_en_val = 0x1; /* Enable TS_CLK */
+		ts1->src_sel_val   = CX23885_SRC_SEL_PARALLEL_MPEG_VIDEO;
+		break;
 	case CX23885_BOARD_DVBSKY_T9580:
 	case CX23885_BOARD_DVBSKY_T982:
 		ts1->gen_ctrl_val  = 0x5; /* Parallel */
diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index 1d9d0f8..1ad4994 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -2049,11 +2049,11 @@
 
 	cx23885_shutdown(dev);
 
-	pci_disable_device(pci_dev);
-
 	/* unregister stuff */
 	free_irq(pci_dev->irq, dev);
 
+	pci_disable_device(pci_dev);
+
 	cx23885_dev_unregister(dev);
 	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	v4l2_ctrl_handler_free(&dev->ctrl_handler);
diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
index c47d182..a9c450d 100644
--- a/drivers/media/pci/cx23885/cx23885-dvb.c
+++ b/drivers/media/pci/cx23885/cx23885-dvb.c
@@ -1710,6 +1710,17 @@
 			break;
 		}
 		break;
+	case CX23885_BOARD_HAUPPAUGE_STARBURST:
+		i2c_bus = &dev->i2c_bus[0];
+		fe0->dvb.frontend = dvb_attach(tda10071_attach,
+						&hauppauge_tda10071_config,
+						&i2c_bus->i2c_adap);
+		if (fe0->dvb.frontend != NULL) {
+			dvb_attach(a8293_attach, fe0->dvb.frontend,
+				   &i2c_bus->i2c_adap,
+				   &hauppauge_a8293_config);
+		}
+		break;
 	case CX23885_BOARD_DVBSKY_T9580:
 	case CX23885_BOARD_DVBSKY_S950:
 		i2c_bus = &dev->i2c_bus[0];
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index f55cd12..36f2f96 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -99,6 +99,7 @@
 #define CX23885_BOARD_DVBSKY_S950              49
 #define CX23885_BOARD_DVBSKY_S952              50
 #define CX23885_BOARD_DVBSKY_T982              51
+#define CX23885_BOARD_HAUPPAUGE_STARBURST      52
 
 #define GPIO_0 0x00000001
 #define GPIO_1 0x00000002
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index b463fe1..3fe9047 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -602,10 +602,13 @@
 	strlcpy(cap->card, video->video.name, sizeof(cap->card));
 	strlcpy(cap->bus_info, "media", sizeof(cap->bus_info));
 
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT
+		| V4L2_CAP_STREAMING | V4L2_CAP_DEVICE_CAPS;
+
 	if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
-		cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+		cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
 	else
-		cap->capabilities = V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_STREAMING;
+		cap->device_caps = V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_STREAMING;
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c
index 8efe403..6d88523 100644
--- a/drivers/media/platform/soc_camera/atmel-isi.c
+++ b/drivers/media/platform/soc_camera/atmel-isi.c
@@ -760,8 +760,9 @@
 {
 	strcpy(cap->driver, "atmel-isi");
 	strcpy(cap->card, "Atmel Image Sensor Interface");
-	cap->capabilities = (V4L2_CAP_VIDEO_CAPTURE |
-				V4L2_CAP_STREAMING);
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+
 	return 0;
 }
 
diff --git a/drivers/media/platform/soc_camera/mx2_camera.c b/drivers/media/platform/soc_camera/mx2_camera.c
index ce72bd2..192377f 100644
--- a/drivers/media/platform/soc_camera/mx2_camera.c
+++ b/drivers/media/platform/soc_camera/mx2_camera.c
@@ -1256,7 +1256,8 @@
 {
 	/* cap->name is set by the friendly caller:-> */
 	strlcpy(cap->card, MX2_CAM_DRIVER_DESCRIPTION, sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/mx3_camera.c b/drivers/media/platform/soc_camera/mx3_camera.c
index a60c3bb..0b3299d 100644
--- a/drivers/media/platform/soc_camera/mx3_camera.c
+++ b/drivers/media/platform/soc_camera/mx3_camera.c
@@ -967,7 +967,8 @@
 {
 	/* cap->name is set by the firendly caller:-> */
 	strlcpy(cap->card, "i.MX3x Camera", sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/omap1_camera.c b/drivers/media/platform/soc_camera/omap1_camera.c
index e6b9328..16f65ec 100644
--- a/drivers/media/platform/soc_camera/omap1_camera.c
+++ b/drivers/media/platform/soc_camera/omap1_camera.c
@@ -1427,7 +1427,8 @@
 {
 	/* cap->name is set by the friendly caller:-> */
 	strlcpy(cap->card, "OMAP1 Camera", sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/pxa_camera.c b/drivers/media/platform/soc_camera/pxa_camera.c
index 951226a..8d6e343 100644
--- a/drivers/media/platform/soc_camera/pxa_camera.c
+++ b/drivers/media/platform/soc_camera/pxa_camera.c
@@ -1576,7 +1576,8 @@
 {
 	/* cap->name is set by the firendly caller:-> */
 	strlcpy(cap->card, pxa_cam_driver_description, sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
 
 	return 0;
 }
diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c
index 0c1f556..9f1473c 100644
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c
@@ -1799,7 +1799,9 @@
 			     struct v4l2_capability *cap)
 {
 	strlcpy(cap->card, "R_Car_VIN", sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+
 	return 0;
 }
 
diff --git a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
index 8b27b3e..7178770 100644
--- a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
+++ b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
@@ -1652,7 +1652,9 @@
 				  struct v4l2_capability *cap)
 {
 	strlcpy(cap->card, "SuperH_Mobile_CEU", sizeof(cap->card));
-	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+
 	return 0;
 }
 
diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c
index 0f345b1..f327c49 100644
--- a/drivers/media/usb/dvb-usb/cxusb.c
+++ b/drivers/media/usb/dvb-usb/cxusb.c
@@ -2232,7 +2232,7 @@
 		{
 			"Mygica T230 DVB-T/T2/C",
 			{ NULL },
-			{ &cxusb_table[22], NULL },
+			{ &cxusb_table[20], NULL },
 		},
 	}
 };
diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
index 1b158f1..536210b 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
@@ -89,16 +89,6 @@
 module_param_array(vbi_nr, int, NULL, 0444);
 MODULE_PARM_DESC(vbi_nr, "Offset for device's vbi dev minor");
 
-static struct v4l2_capability pvr_capability ={
-	.driver         = "pvrusb2",
-	.card           = "Hauppauge WinTV pvr-usb2",
-	.bus_info       = "usb",
-	.version        = LINUX_VERSION_CODE,
-	.capabilities   = (V4L2_CAP_VIDEO_CAPTURE |
-			   V4L2_CAP_TUNER | V4L2_CAP_AUDIO | V4L2_CAP_RADIO |
-			   V4L2_CAP_READWRITE),
-};
-
 static struct v4l2_fmtdesc pvr_fmtdesc [] = {
 	{
 		.index          = 0,
@@ -160,10 +150,22 @@
 	struct pvr2_v4l2_fh *fh = file->private_data;
 	struct pvr2_hdw *hdw = fh->channel.mc_head->hdw;
 
-	memcpy(cap, &pvr_capability, sizeof(struct v4l2_capability));
+	strlcpy(cap->driver, "pvrusb2", sizeof(cap->driver));
 	strlcpy(cap->bus_info, pvr2_hdw_get_bus_info(hdw),
 			sizeof(cap->bus_info));
 	strlcpy(cap->card, pvr2_hdw_get_desc(hdw), sizeof(cap->card));
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER |
+			    V4L2_CAP_AUDIO | V4L2_CAP_RADIO |
+			    V4L2_CAP_READWRITE | V4L2_CAP_DEVICE_CAPS;
+	switch (fh->pdi->devbase.vfl_type) {
+	case VFL_TYPE_GRABBER:
+		cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_AUDIO;
+		break;
+	case VFL_TYPE_RADIO:
+		cap->device_caps = V4L2_CAP_RADIO;
+		break;
+	}
+	cap->device_caps |= V4L2_CAP_TUNER | V4L2_CAP_READWRITE;
 	return 0;
 }
 
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index d09a891..bc08a82 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -3146,27 +3146,26 @@
 			prequeue--;
 		} else {
 			call_void_qop(q, wait_finish, q);
-			ret = vb2_internal_dqbuf(q, &fileio->b, 0);
+			if (!threadio->stop)
+				ret = vb2_internal_dqbuf(q, &fileio->b, 0);
 			call_void_qop(q, wait_prepare, q);
 			dprintk(5, "file io: vb2_dqbuf result: %d\n", ret);
 		}
-		if (threadio->stop)
-			break;
-		if (ret)
+		if (ret || threadio->stop)
 			break;
 		try_to_freeze();
 
 		vb = q->bufs[fileio->b.index];
 		if (!(fileio->b.flags & V4L2_BUF_FLAG_ERROR))
-			ret = threadio->fnc(vb, threadio->priv);
-		if (ret)
-			break;
+			if (threadio->fnc(vb, threadio->priv))
+				break;
 		call_void_qop(q, wait_finish, q);
 		if (set_timestamp)
 			v4l2_get_timestamp(&fileio->b.timestamp);
-		ret = vb2_internal_qbuf(q, &fileio->b);
+		if (!threadio->stop)
+			ret = vb2_internal_qbuf(q, &fileio->b);
 		call_void_qop(q, wait_prepare, q);
-		if (ret)
+		if (ret || threadio->stop)
 			break;
 	}
 
@@ -3235,11 +3234,11 @@
 	threadio->stop = true;
 	vb2_internal_streamoff(q, q->type);
 	call_void_qop(q, wait_prepare, q);
+	err = kthread_stop(threadio->thread);
 	q->fileio = NULL;
 	fileio->req.count = 0;
 	vb2_reqbufs(q, &fileio->req);
 	kfree(fileio);
-	err = kthread_stop(threadio->thread);
 	threadio->thread = NULL;
 	kfree(threadio);
 	q->fileio = NULL;
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 52a0c2f..ae498b5 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -554,7 +554,8 @@
 		return ret;
 	}
 
-	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
+	ret = mfd_add_devices(da9052->dev, PLATFORM_DEVID_AUTO,
+			      da9052_subdev_info,
 			      ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL);
 	if (ret) {
 		dev_err(da9052->dev, "mfd_add_devices failed: %d\n", ret);
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index dbdd0fa..210d1f8 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -681,21 +681,9 @@
 #ifdef CONFIG_PM
 static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
 {
-	struct rtsx_ucr *ucr =
-		(struct rtsx_ucr *)usb_get_intfdata(intf);
-
 	dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n",
 			__func__, message.event);
 
-	/*
-	 * Call to make sure LED is off during suspend to save more power.
-	 * It is NOT a permanent state and could be turned on anytime later.
-	 * Thus no need to call turn_on when resunming.
-	 */
-	mutex_lock(&ucr->dev_mutex);
-	rtsx_usb_turn_off_led(ucr);
-	mutex_unlock(&ucr->dev_mutex);
-
 	return 0;
 }
 
diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 0d256cb..d6b7643 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -125,10 +125,21 @@
 }
 EXPORT_SYMBOL_GPL(tps65218_clear_bits);
 
+static const struct regmap_range tps65218_yes_ranges[] = {
+	regmap_reg_range(TPS65218_REG_INT1, TPS65218_REG_INT2),
+	regmap_reg_range(TPS65218_REG_STATUS, TPS65218_REG_STATUS),
+};
+
+static const struct regmap_access_table tps65218_volatile_table = {
+	.yes_ranges = tps65218_yes_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tps65218_yes_ranges),
+};
+
 static struct regmap_config tps65218_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &tps65218_volatile_table,
 };
 
 static const struct regmap_irq tps65218_irqs[] = {
@@ -193,6 +204,7 @@
 
 	.num_regs = 2,
 	.mask_base = TPS65218_REG_INT_MASK1,
+	.status_base = TPS65218_REG_INT1,
 };
 
 static const struct of_device_id of_tps65218_match_table[] = {
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 51fd6b5..d1b55fe 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -100,6 +100,46 @@
 	return 0;
 }
 
+static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct cxl_context *ctx = vma->vm_file->private_data;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	u64 area, offset;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+
+	pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
+			__func__, ctx->pe, address, offset);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		area = ctx->afu->psn_phys;
+		if (offset > ctx->afu->adapter->ps_size)
+			return VM_FAULT_SIGBUS;
+	} else {
+		area = ctx->psn_phys;
+		if (offset > ctx->psn_size)
+			return VM_FAULT_SIGBUS;
+	}
+
+	mutex_lock(&ctx->status_mutex);
+
+	if (ctx->status != STARTED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_devel("%s: Context not started, failing problem state access\n", __func__);
+		return VM_FAULT_SIGBUS;
+	}
+
+	vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
+
+	mutex_unlock(&ctx->status_mutex);
+
+	return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct cxl_mmap_vmops = {
+	.fault = cxl_mmap_fault,
+};
+
 /*
  * Map a per-context mmio space into the given vma.
  */
@@ -108,26 +148,25 @@
 	u64 len = vma->vm_end - vma->vm_start;
 	len = min(len, ctx->psn_size);
 
-	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		return vm_iomap_memory(vma, ctx->afu->psn_phys, ctx->afu->adapter->ps_size);
-	}
+	if (ctx->afu->current_mode != CXL_MODE_DEDICATED) {
+		/* make sure there is a valid per process space for this AFU */
+		if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) {
+			pr_devel("AFU doesn't support mmio space\n");
+			return -EINVAL;
+		}
 
-	/* make sure there is a valid per process space for this AFU */
-	if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) {
-		pr_devel("AFU doesn't support mmio space\n");
-		return -EINVAL;
+		/* Can't mmap until the AFU is enabled */
+		if (!ctx->afu->enabled)
+			return -EBUSY;
 	}
 
-	/* Can't mmap until the AFU is enabled */
-	if (!ctx->afu->enabled)
-		return -EBUSY;
-
 	pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__,
 		 ctx->psn_phys, ctx->pe , ctx->master);
 
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	return vm_iomap_memory(vma, ctx->psn_phys, len);
+	vma->vm_ops = &cxl_mmap_vmops;
+	return 0;
 }
 
 /*
@@ -150,12 +189,6 @@
 	afu_release_irqs(ctx);
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
 	wake_up_all(&ctx->wq);
-
-	/* Release Problem State Area mapping */
-	mutex_lock(&ctx->mapping_lock);
-	if (ctx->mapping)
-		unmap_mapping_range(ctx->mapping, 0, 0, 1);
-	mutex_unlock(&ctx->mapping_lock);
 }
 
 /*
@@ -184,6 +217,17 @@
 		 * created and torn down after the IDR removed
 		 */
 		__detach_context(ctx);
+
+		/*
+		 * We are force detaching - remove any active PSA mappings so
+		 * userspace cannot interfere with the card if it comes back.
+		 * Easiest way to exercise this is to unbind and rebind the
+		 * driver via sysfs while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
 	}
 	mutex_unlock(&afu->contexts_lock);
 }
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index e9f2f10..b15d811 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -140,18 +140,20 @@
 
 	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
 
-	mutex_lock(&ctx->status_mutex);
-	if (ctx->status != OPENED) {
-		rc = -EIO;
-		goto out;
-	}
-
+	/* Do this outside the status_mutex to avoid a circular dependency with
+	 * the locking in cxl_mmap_fault() */
 	if (copy_from_user(&work, uwork,
 			   sizeof(struct cxl_ioctl_start_work))) {
 		rc = -EFAULT;
 		goto out;
 	}
 
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
 	/*
 	 * if any of the reserved fields are set or any of the unused
 	 * flags are set it's invalid
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index ff27550..06ff0a2 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -234,6 +234,18 @@
 	struct mei_me_hw *hw = to_me_hw(dev);
 	u32 hcsr = mei_hcsr_read(hw);
 
+	/* H_RST may be found lit before reset is started,
+	 * for example if preceding reset flow hasn't completed.
+	 * In that case asserting H_RST will be ignored, therefore
+	 * we need to clean H_RST bit to start a successful reset sequence.
+	 */
+	if ((hcsr & H_RST) == H_RST) {
+		dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
+		hcsr &= ~H_RST;
+		mei_me_reg_write(hw, H_CSR, hcsr);
+		hcsr = mei_hcsr_read(hw);
+	}
+
 	hcsr |= H_RST | H_IG | H_IS;
 
 	if (intr_enable)
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index e3e56d3..970314e 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -247,6 +247,7 @@
 	{ "INT33BB"  , "3" , &sdhci_acpi_slot_int_sd },
 	{ "INT33C6"  , NULL, &sdhci_acpi_slot_int_sdio },
 	{ "INT3436"  , NULL, &sdhci_acpi_slot_int_sdio },
+	{ "INT344D"  , NULL, &sdhci_acpi_slot_int_sdio },
 	{ "PNP0D40"  },
 	{ },
 };
@@ -257,6 +258,7 @@
 	{ "INT33BB"  },
 	{ "INT33C6"  },
 	{ "INT3436"  },
+	{ "INT344D"  },
 	{ "PNP0D40"  },
 	{ },
 };
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 0342775..4f38554 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -993,6 +993,31 @@
 		.subdevice	= PCI_ANY_ID,
 		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrfl_mmc,
 	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_SPT_EMMC,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_emmc,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_SPT_SDIO,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_sdio,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_SPT_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_byt_sd,
+	},
+
 	{
 		.vendor		= PCI_VENDOR_ID_O2,
 		.device		= PCI_DEVICE_ID_O2_8120,
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index d57c3d1..1ec684d 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -21,6 +21,9 @@
 #define PCI_DEVICE_ID_INTEL_CLV_EMMC0	0x08e5
 #define PCI_DEVICE_ID_INTEL_CLV_EMMC1	0x08e6
 #define PCI_DEVICE_ID_INTEL_QRK_SD	0x08A7
+#define PCI_DEVICE_ID_INTEL_SPT_EMMC	0x9d2b
+#define PCI_DEVICE_ID_INTEL_SPT_SDIO	0x9d2c
+#define PCI_DEVICE_ID_INTEL_SPT_SD	0x9d2d
 
 /*
  * PCI registers
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 4523887..ca3424e 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -300,13 +300,6 @@
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
-	if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
-		ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
-		if (ret < 0)
-			goto err_mbus_win;
-	}
-
-
 	pltfm_host = sdhci_priv(host);
 	pltfm_host->priv = pxa;
 
@@ -325,6 +318,12 @@
 	if (!IS_ERR(pxa->clk_core))
 		clk_prepare_enable(pxa->clk_core);
 
+	if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
+		ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
+		if (ret < 0)
+			goto err_mbus_win;
+	}
+
 	/* enable 1/8V DDR capable */
 	host->mmc->caps |= MMC_CAP_1_8V_DDR;
 
@@ -396,11 +395,11 @@
 	pm_runtime_disable(&pdev->dev);
 err_of_parse:
 err_cd_req:
+err_mbus_win:
 	clk_disable_unprepare(pxa->clk_io);
 	if (!IS_ERR(pxa->clk_core))
 		clk_disable_unprepare(pxa->clk_core);
 err_clk_get:
-err_mbus_win:
 	sdhci_pltfm_free(pdev);
 	return ret;
 }
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cbb245b..f1a488e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -259,8 +259,6 @@
 
 		del_timer_sync(&host->tuning_timer);
 		host->flags &= ~SDHCI_NEEDS_RETUNING;
-		host->mmc->max_blk_count =
-			(host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535;
 	}
 	sdhci_enable_card_detection(host);
 }
@@ -1273,6 +1271,12 @@
 		spin_unlock_irq(&host->lock);
 		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
 		spin_lock_irq(&host->lock);
+
+		if (mode != MMC_POWER_OFF)
+			sdhci_writeb(host, SDHCI_POWER_ON, SDHCI_POWER_CONTROL);
+		else
+			sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
+
 		return;
 	}
 
@@ -1353,6 +1357,8 @@
 
 	sdhci_runtime_pm_get(host);
 
+	present = mmc_gpio_get_cd(host->mmc);
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	WARN_ON(host->mrq != NULL);
@@ -1381,7 +1387,6 @@
 	 *     zero: cd-gpio is used, and card is removed
 	 *     one: cd-gpio is used, and card is present
 	 */
-	present = mmc_gpio_get_cd(host->mmc);
 	if (present < 0) {
 		/* If polling, assume that the card is always present. */
 		if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
@@ -1880,6 +1885,18 @@
 	return !(present_state & SDHCI_DATA_LVL_MASK);
 }
 
+static int sdhci_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	host->flags |= SDHCI_HS400_TUNING;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return 0;
+}
+
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
@@ -1887,10 +1904,18 @@
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	int err = 0;
 	unsigned long flags;
+	unsigned int tuning_count = 0;
+	bool hs400_tuning;
 
 	sdhci_runtime_pm_get(host);
 	spin_lock_irqsave(&host->lock, flags);
 
+	hs400_tuning = host->flags & SDHCI_HS400_TUNING;
+	host->flags &= ~SDHCI_HS400_TUNING;
+
+	if (host->tuning_mode == SDHCI_TUNING_MODE_1)
+		tuning_count = host->tuning_count;
+
 	/*
 	 * The Host Controller needs tuning only in case of SDR104 mode
 	 * and for SDR50 mode when Use Tuning for SDR50 is set in the
@@ -1899,8 +1924,20 @@
 	 * tuning function has to be executed.
 	 */
 	switch (host->timing) {
+	/* HS400 tuning is done in HS200 mode */
 	case MMC_TIMING_MMC_HS400:
+		err = -EINVAL;
+		goto out_unlock;
+
 	case MMC_TIMING_MMC_HS200:
+		/*
+		 * Periodic re-tuning for HS400 is not expected to be needed, so
+		 * disable it here.
+		 */
+		if (hs400_tuning)
+			tuning_count = 0;
+		break;
+
 	case MMC_TIMING_UHS_SDR104:
 		break;
 
@@ -1911,9 +1948,7 @@
 		/* FALLTHROUGH */
 
 	default:
-		spin_unlock_irqrestore(&host->lock, flags);
-		sdhci_runtime_pm_put(host);
-		return 0;
+		goto out_unlock;
 	}
 
 	if (host->ops->platform_execute_tuning) {
@@ -2037,24 +2072,11 @@
 	}
 
 out:
-	/*
-	 * If this is the very first time we are here, we start the retuning
-	 * timer. Since only during the first time, SDHCI_NEEDS_RETUNING
-	 * flag won't be set, we check this condition before actually starting
-	 * the timer.
-	 */
-	if (!(host->flags & SDHCI_NEEDS_RETUNING) && host->tuning_count &&
-	    (host->tuning_mode == SDHCI_TUNING_MODE_1)) {
+	host->flags &= ~SDHCI_NEEDS_RETUNING;
+
+	if (tuning_count) {
 		host->flags |= SDHCI_USING_RETUNING_TIMER;
-		mod_timer(&host->tuning_timer, jiffies +
-			host->tuning_count * HZ);
-		/* Tuning mode 1 limits the maximum data length to 4MB */
-		mmc->max_blk_count = (4 * 1024 * 1024) / mmc->max_blk_size;
-	} else if (host->flags & SDHCI_USING_RETUNING_TIMER) {
-		host->flags &= ~SDHCI_NEEDS_RETUNING;
-		/* Reload the new initial value for timer */
-		mod_timer(&host->tuning_timer, jiffies +
-			  host->tuning_count * HZ);
+		mod_timer(&host->tuning_timer, jiffies + tuning_count * HZ);
 	}
 
 	/*
@@ -2070,6 +2092,7 @@
 
 	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+out_unlock:
 	spin_unlock_irqrestore(&host->lock, flags);
 	sdhci_runtime_pm_put(host);
 
@@ -2110,15 +2133,18 @@
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
+	int present;
 
 	/* First check if client has provided their own card event */
 	if (host->ops->card_event)
 		host->ops->card_event(host);
 
+	present = sdhci_do_get_cd(host);
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	/* Check host->mrq first in case we are runtime suspended */
-	if (host->mrq && !sdhci_do_get_cd(host)) {
+	if (host->mrq && !present) {
 		pr_err("%s: Card removed during transfer!\n",
 			mmc_hostname(host->mmc));
 		pr_err("%s: Resetting controller.\n",
@@ -2142,6 +2168,7 @@
 	.hw_reset	= sdhci_hw_reset,
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
+	.prepare_hs400_tuning		= sdhci_prepare_hs400_tuning,
 	.execute_tuning			= sdhci_execute_tuning,
 	.card_event			= sdhci_card_event,
 	.card_busy	= sdhci_card_busy,
@@ -3260,8 +3287,9 @@
 		mmc->max_segs = SDHCI_MAX_SEGS;
 
 	/*
-	 * Maximum number of sectors in one transfer. Limited by DMA boundary
-	 * size (512KiB).
+	 * Maximum number of sectors in one transfer. Limited by SDMA boundary
+	 * size (512KiB). Note some tuning modes impose a 4MiB limit, but this
+	 * is less anyway.
 	 */
 	mmc->max_req_size = 524288;
 
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index f94a9fa..c672c4d 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -615,6 +615,9 @@
 
 	c_can_irq_control(priv, false);
 
+	/* put ctrl to init on stop to end ongoing transmission */
+	priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_INIT);
+
 	/* deactivate pins */
 	pinctrl_pm_select_sleep_state(dev->dev.parent);
 	priv->can.state = CAN_STATE_STOPPED;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index f363972..e36d105 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -103,27 +103,34 @@
 	mask = 1 << raminit->bits.start | 1 << raminit->bits.done;
 	regmap_read(raminit->syscon, raminit->reg, &ctrl);
 
-	/* We clear the done and start bit first. The start bit is
+	/* We clear the start bit first. The start bit is
 	 * looking at the 0 -> transition, but is not self clearing;
-	 * And we clear the init done bit as well.
 	 * NOTE: DONE must be written with 1 to clear it.
+	 * We can't clear the DONE bit here using regmap_update_bits()
+	 * as it will bypass the write if initial condition is START:0 DONE:1
+	 * e.g. on DRA7 which needs START pulse.
 	 */
-	ctrl &= ~(1 << raminit->bits.start);
-	ctrl |= 1 << raminit->bits.done;
-	regmap_write(raminit->syscon, raminit->reg, ctrl);
+	ctrl &= ~mask;	/* START = 0, DONE = 0 */
+	regmap_update_bits(raminit->syscon, raminit->reg, mask, ctrl);
 
-	ctrl &= ~(1 << raminit->bits.done);
-	c_can_hw_raminit_wait_syscon(priv, mask, ctrl);
+	/* check if START bit is 0. Ignore DONE bit for now
+	 * as it can be either 0 or 1.
+	 */
+	c_can_hw_raminit_wait_syscon(priv, 1 << raminit->bits.start, ctrl);
 
 	if (enable) {
-		/* Set start bit and wait for the done bit. */
+		/* Clear DONE bit & set START bit. */
 		ctrl |= 1 << raminit->bits.start;
-		regmap_write(raminit->syscon, raminit->reg, ctrl);
-
+		/* DONE must be written with 1 to clear it */
+		ctrl |= 1 << raminit->bits.done;
+		regmap_update_bits(raminit->syscon, raminit->reg, mask, ctrl);
+		/* prevent further clearing of DONE bit */
+		ctrl &= ~(1 << raminit->bits.done);
 		/* clear START bit if start pulse is needed */
 		if (raminit->needs_pulse) {
 			ctrl &= ~(1 << raminit->bits.start);
-			regmap_write(raminit->syscon, raminit->reg, ctrl);
+			regmap_update_bits(raminit->syscon, raminit->reg,
+					   mask, ctrl);
 		}
 
 		ctrl |= 1 << raminit->bits.done;
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 3ec8f6f..847c1f8 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -807,10 +807,14 @@
 		if (dev->flags & IFF_UP)
 			return -EBUSY;
 		cm = nla_data(data[IFLA_CAN_CTRLMODE]);
-		if (cm->flags & ~priv->ctrlmode_supported)
+
+		/* check whether changed bits are allowed to be modified */
+		if (cm->mask & ~priv->ctrlmode_supported)
 			return -EOPNOTSUPP;
+
+		/* clear bits to be modified and copy the flag values */
 		priv->ctrlmode &= ~cm->mask;
-		priv->ctrlmode |= cm->flags;
+		priv->ctrlmode |= (cm->flags & cm->mask);
 
 		/* CAN_CTRLMODE_FD can only be set when driver supports FD */
 		if (priv->ctrlmode & CAN_CTRLMODE_FD)
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index d7bc462..2445298 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -955,6 +955,11 @@
 	priv->can.data_bittiming_const = &m_can_data_bittiming_const;
 	priv->can.do_set_mode = m_can_set_mode;
 	priv->can.do_get_berr_counter = m_can_get_berr_counter;
+
+	/* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.1 */
+	priv->can.ctrlmode = CAN_CTRLMODE_FD_NON_ISO;
+
+	/* CAN_CTRLMODE_FD_NON_ISO can not be changed with M_CAN IP v3.0.1 */
 	priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
 					CAN_CTRLMODE_LISTENONLY |
 					CAN_CTRLMODE_BERR_REPORTING |
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 541fb7a..7af379c 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -520,10 +520,10 @@
 		skb = alloc_can_err_skb(priv->netdev, &cf);
 		if (skb) {
 			cf->can_id |= CAN_ERR_RESTARTED;
-			netif_rx(skb);
 
 			stats->rx_packets++;
 			stats->rx_bytes += cf->can_dlc;
+			netif_rx(skb);
 		} else {
 			netdev_err(priv->netdev,
 				   "No memory left for err_skb\n");
@@ -587,7 +587,7 @@
 			  usb_sndbulkpipe(dev->udev,
 					  dev->bulk_out->bEndpointAddress),
 			  buf, msg->len,
-			  kvaser_usb_simple_msg_callback, priv);
+			  kvaser_usb_simple_msg_callback, netdev);
 	usb_anchor_urb(urb, &priv->tx_submitted);
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -662,11 +662,6 @@
 	priv = dev->nets[channel];
 	stats = &priv->netdev->stats;
 
-	if (status & M16C_STATE_BUS_RESET) {
-		kvaser_usb_unlink_tx_urbs(priv);
-		return;
-	}
-
 	skb = alloc_can_err_skb(priv->netdev, &cf);
 	if (!skb) {
 		stats->rx_dropped++;
@@ -677,7 +672,7 @@
 
 	netdev_dbg(priv->netdev, "Error status: 0x%02x\n", status);
 
-	if (status & M16C_STATE_BUS_OFF) {
+	if (status & (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) {
 		cf->can_id |= CAN_ERR_BUSOFF;
 
 		priv->can.can_stats.bus_off++;
@@ -703,9 +698,7 @@
 		}
 
 		new_state = CAN_STATE_ERROR_PASSIVE;
-	}
-
-	if (status == M16C_STATE_BUS_ERROR) {
+	} else if (status & M16C_STATE_BUS_ERROR) {
 		if ((priv->can.state < CAN_STATE_ERROR_WARNING) &&
 		    ((txerr >= 96) || (rxerr >= 96))) {
 			cf->can_id |= CAN_ERR_CRTL;
@@ -715,7 +708,8 @@
 
 			priv->can.can_stats.error_warning++;
 			new_state = CAN_STATE_ERROR_WARNING;
-		} else if (priv->can.state > CAN_STATE_ERROR_ACTIVE) {
+		} else if ((priv->can.state > CAN_STATE_ERROR_ACTIVE) &&
+			   ((txerr < 96) && (rxerr < 96))) {
 			cf->can_id |= CAN_ERR_PROT;
 			cf->data[2] = CAN_ERR_PROT_ACTIVE;
 
@@ -770,10 +764,9 @@
 
 	priv->can.state = new_state;
 
-	netif_rx(skb);
-
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;
+	netif_rx(skb);
 }
 
 static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
@@ -805,10 +798,9 @@
 		stats->rx_over_errors++;
 		stats->rx_errors++;
 
-		netif_rx(skb);
-
 		stats->rx_packets++;
 		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
 	}
 }
 
@@ -887,10 +879,9 @@
 			       cf->can_dlc);
 	}
 
-	netif_rx(skb);
-
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;
+	netif_rx(skb);
 }
 
 static void kvaser_usb_start_chip_reply(const struct kvaser_usb *dev,
@@ -1246,6 +1237,9 @@
 	if (err)
 		netdev_warn(netdev, "Cannot stop device, error %d\n", err);
 
+	/* reset tx contexts */
+	kvaser_usb_unlink_tx_urbs(priv);
+
 	priv->can.state = CAN_STATE_STOPPED;
 	close_candev(priv->netdev);
 
@@ -1294,12 +1288,14 @@
 	if (!urb) {
 		netdev_err(netdev, "No memory left for URBs\n");
 		stats->tx_dropped++;
-		goto nourbmem;
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
 	}
 
 	buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
 	if (!buf) {
 		stats->tx_dropped++;
+		dev_kfree_skb(skb);
 		goto nobufmem;
 	}
 
@@ -1334,6 +1330,7 @@
 		}
 	}
 
+	/* This should never happen; it implies a flow control bug */
 	if (!context) {
 		netdev_warn(netdev, "cannot find free context\n");
 		ret =  NETDEV_TX_BUSY;
@@ -1364,9 +1361,6 @@
 	if (unlikely(err)) {
 		can_free_echo_skb(netdev, context->echo_index);
 
-		skb = NULL; /* set to NULL to avoid double free in
-			     * dev_kfree_skb(skb) */
-
 		atomic_dec(&priv->active_tx_urbs);
 		usb_unanchor_urb(urb);
 
@@ -1388,8 +1382,6 @@
 	kfree(buf);
 nobufmem:
 	usb_free_urb(urb);
-nourbmem:
-	dev_kfree_skb(skb);
 	return ret;
 }
 
@@ -1502,6 +1494,10 @@
 	struct kvaser_usb_net_priv *priv;
 	int i, err;
 
+	err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel);
+	if (err)
+		return err;
+
 	netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS);
 	if (!netdev) {
 		dev_err(&intf->dev, "Cannot alloc candev\n");
@@ -1588,7 +1584,7 @@
 {
 	struct kvaser_usb *dev;
 	int err = -ENOMEM;
-	int i;
+	int i, retry = 3;
 
 	dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL);
 	if (!dev)
@@ -1606,10 +1602,15 @@
 
 	usb_set_intfdata(intf, dev);
 
-	for (i = 0; i < MAX_NET_DEVICES; i++)
-		kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i);
+	/* On some x86 laptops, plugging a Kvaser device again after
+	 * an unplug makes the firmware always ignore the very first
+	 * command. For such a case, provide some room for retries
+	 * instead of completely exiting the driver.
+	 */
+	do {
+		err = kvaser_usb_get_software_info(dev);
+	} while (--retry && err == -ETIMEDOUT);
 
-	err = kvaser_usb_get_software_info(dev);
 	if (err) {
 		dev_err(&intf->dev,
 			"Cannot get software infos, error %d\n", err);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 75b08c6..29a0927 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -767,16 +767,17 @@
 #define MTL_Q_RQOMR			0x40
 #define MTL_Q_RQMPOCR			0x44
 #define MTL_Q_RQDR			0x4c
+#define MTL_Q_RQFCR			0x50
 #define MTL_Q_IER			0x70
 #define MTL_Q_ISR			0x74
 
 /* MTL queue register entry bit positions and sizes */
+#define MTL_Q_RQFCR_RFA_INDEX		1
+#define MTL_Q_RQFCR_RFA_WIDTH		6
+#define MTL_Q_RQFCR_RFD_INDEX		17
+#define MTL_Q_RQFCR_RFD_WIDTH		6
 #define MTL_Q_RQOMR_EHFC_INDEX		7
 #define MTL_Q_RQOMR_EHFC_WIDTH		1
-#define MTL_Q_RQOMR_RFA_INDEX		8
-#define MTL_Q_RQOMR_RFA_WIDTH		3
-#define MTL_Q_RQOMR_RFD_INDEX		13
-#define MTL_Q_RQOMR_RFD_WIDTH		3
 #define MTL_Q_RQOMR_RQS_INDEX		16
 #define MTL_Q_RQOMR_RQS_WIDTH		9
 #define MTL_Q_RQOMR_RSF_INDEX		5
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 53f5f66..4c66cd1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -2079,10 +2079,10 @@
 
 	for (i = 0; i < pdata->rx_q_count; i++) {
 		/* Activate flow control when less than 4k left in fifo */
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RFA, 2);
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, 2);
 
 		/* De-activate flow control when more than 6k left in fifo */
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RFD, 4);
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, 4);
 	}
 }
 
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index e398eda..c8af3ce 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -184,15 +184,16 @@
 	schedule_work(&alx->reset_wk);
 }
 
-static bool alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
 	struct alx_rrd *rrd;
 	struct alx_buffer *rxb;
 	struct sk_buff *skb;
 	u16 length, rfd_cleaned = 0;
+	int work = 0;
 
-	while (budget > 0) {
+	while (work < budget) {
 		rrd = &rxq->rrd[rxq->rrd_read_idx];
 		if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
 			break;
@@ -203,7 +204,7 @@
 		    ALX_GET_FIELD(le32_to_cpu(rrd->word0),
 				  RRD_NOR) != 1) {
 			alx_schedule_reset(alx);
-			return 0;
+			return work;
 		}
 
 		rxb = &rxq->bufs[rxq->read_idx];
@@ -243,7 +244,7 @@
 		}
 
 		napi_gro_receive(&alx->napi, skb);
-		budget--;
+		work++;
 
 next_pkt:
 		if (++rxq->read_idx == alx->rx_ringsz)
@@ -258,21 +259,22 @@
 	if (rfd_cleaned)
 		alx_refill_rx_ring(alx, GFP_ATOMIC);
 
-	return budget > 0;
+	return work;
 }
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
 	struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
 	struct alx_hw *hw = &alx->hw;
-	bool complete = true;
 	unsigned long flags;
+	bool tx_complete;
+	int work;
 
-	complete = alx_clean_tx_irq(alx) &&
-		   alx_clean_rx_irq(alx, budget);
+	tx_complete = alx_clean_tx_irq(alx);
+	work = alx_clean_rx_irq(alx, budget);
 
-	if (!complete)
-		return 1;
+	if (!tx_complete || work == budget)
+		return budget;
 
 	napi_complete(&alx->napi);
 
@@ -284,7 +286,7 @@
 
 	alx_post_write(hw);
 
-	return 0;
+	return work;
 }
 
 static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 05c6af6..3007d95 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1167,10 +1167,10 @@
 		bgmac->int_status = 0;
 	}
 
-	if (handled < weight)
+	if (handled < weight) {
 		napi_complete(napi);
-
-	bgmac_chip_intrs_on(bgmac);
+		bgmac_chip_intrs_on(bgmac);
+	}
 
 	return handled;
 }
@@ -1515,6 +1515,8 @@
 	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
 		bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
 
+	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
+
 	err = bgmac_mii_register(bgmac);
 	if (err) {
 		bgmac_err(bgmac, "Cannot register MDIO\n");
@@ -1529,8 +1531,6 @@
 
 	netif_carrier_off(net_dev);
 
-	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
-
 	return 0;
 
 err_mii_unregister:
@@ -1549,9 +1549,9 @@
 {
 	struct bgmac *bgmac = bcma_get_drvdata(core);
 
-	netif_napi_del(&bgmac->napi);
 	unregister_netdev(bgmac->net_dev);
 	bgmac_mii_unregister(bgmac);
+	netif_napi_del(&bgmac->napi);
 	bgmac_dma_free(bgmac);
 	bcma_set_drvdata(core, NULL);
 	free_netdev(bgmac->net_dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1d1147c..e468ed3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3175,7 +3175,7 @@
 		}
 #endif
 		if (!bnx2x_fp_lock_napi(fp))
-			return work_done;
+			return budget;
 
 		for_each_cos_in_tx_queue(fp, cos)
 			if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos]))
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 553dcd8..96bf01b 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7413,6 +7413,8 @@
 }
 
 static void tg3_irq_quiesce(struct tg3 *tp)
+	__releases(tp->lock)
+	__acquires(tp->lock)
 {
 	int i;
 
@@ -7421,8 +7423,12 @@
 	tp->irq_sync = 1;
 	smp_mb();
 
+	spin_unlock_bh(&tp->lock);
+
 	for (i = 0; i < tp->irq_cnt; i++)
 		synchronize_irq(tp->napi[i].irq_vec);
+
+	spin_lock_bh(&tp->lock);
 }
 
 /* Fully shutdown all tg3 driver activity elsewhere in the system.
@@ -9018,6 +9024,8 @@
 
 /* tp->lock is held. */
 static int tg3_chip_reset(struct tg3 *tp)
+	__releases(tp->lock)
+	__acquires(tp->lock)
 {
 	u32 val;
 	void (*write_op)(struct tg3 *, u32, u32);
@@ -9073,9 +9081,13 @@
 	}
 	smp_mb();
 
+	tg3_full_unlock(tp);
+
 	for (i = 0; i < tp->irq_cnt; i++)
 		synchronize_irq(tp->napi[i].irq_vec);
 
+	tg3_full_lock(tp, 0);
+
 	if (tg3_asic_rev(tp) == ASIC_REV_57780) {
 		val = tr32(TG3_PCIE_LNKCTL) & ~TG3_PCIE_LNKCTL_L1_PLL_PD_EN;
 		tw32(TG3_PCIE_LNKCTL, val | TG3_PCIE_LNKCTL_L1_PLL_PD_DIS);
@@ -10903,11 +10915,13 @@
 {
 	struct tg3 *tp = (struct tg3 *) __opaque;
 
-	if (tp->irq_sync || tg3_flag(tp, RESET_TASK_PENDING))
-		goto restart_timer;
-
 	spin_lock(&tp->lock);
 
+	if (tp->irq_sync || tg3_flag(tp, RESET_TASK_PENDING)) {
+		spin_unlock(&tp->lock);
+		goto restart_timer;
+	}
+
 	if (tg3_asic_rev(tp) == ASIC_REV_5717 ||
 	    tg3_flag(tp, 57765_CLASS))
 		tg3_chk_missed_msi(tp);
@@ -11101,11 +11115,13 @@
 	struct tg3 *tp = container_of(work, struct tg3, reset_task);
 	int err;
 
+	rtnl_lock();
 	tg3_full_lock(tp, 0);
 
 	if (!netif_running(tp->dev)) {
 		tg3_flag_clear(tp, RESET_TASK_PENDING);
 		tg3_full_unlock(tp);
+		rtnl_unlock();
 		return;
 	}
 
@@ -11138,6 +11154,7 @@
 		tg3_phy_start(tp);
 
 	tg3_flag_clear(tp, RESET_TASK_PENDING);
+	rtnl_unlock();
 }
 
 static int tg3_request_irq(struct tg3 *tp, int irq_num)
diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
index 55eb7f2..7ef55f5 100644
--- a/drivers/net/ethernet/cadence/at91_ether.c
+++ b/drivers/net/ethernet/cadence/at91_ether.c
@@ -340,7 +340,7 @@
 		res = PTR_ERR(lp->pclk);
 		goto err_free_dev;
 	}
-	clk_enable(lp->pclk);
+	clk_prepare_enable(lp->pclk);
 
 	lp->hclk = ERR_PTR(-ENOENT);
 	lp->tx_clk = ERR_PTR(-ENOENT);
@@ -406,7 +406,7 @@
 err_out_unregister_netdev:
 	unregister_netdev(dev);
 err_disable_clock:
-	clk_disable(lp->pclk);
+	clk_disable_unprepare(lp->pclk);
 err_free_dev:
 	free_netdev(dev);
 	return res;
@@ -424,7 +424,7 @@
 	kfree(lp->mii_bus->irq);
 	mdiobus_free(lp->mii_bus);
 	unregister_netdev(dev);
-	clk_disable(lp->pclk);
+	clk_disable_unprepare(lp->pclk);
 	free_netdev(dev);
 
 	return 0;
@@ -440,7 +440,7 @@
 		netif_stop_queue(net_dev);
 		netif_device_detach(net_dev);
 
-		clk_disable(lp->pclk);
+		clk_disable_unprepare(lp->pclk);
 	}
 	return 0;
 }
@@ -451,7 +451,7 @@
 	struct macb *lp = netdev_priv(net_dev);
 
 	if (netif_running(net_dev)) {
-		clk_enable(lp->pclk);
+		clk_prepare_enable(lp->pclk);
 
 		netif_device_attach(net_dev);
 		netif_start_queue(net_dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 2215d43..a936ee8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2430,7 +2430,7 @@
 	 */
 	n10g = 0;
 	for_each_port(adapter, pidx)
-		n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
+		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
 
 	/*
 	 * We default to 1 queue per non-10G port and up to # of cores queues
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 21dc9a2..60426cf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -323,6 +323,8 @@
 		return v;
 
 	v = be32_to_cpu(port_rpl.u.info.lstatus_to_modtype);
+	pi->mdio_addr = (v & FW_PORT_CMD_MDIOCAP_F) ?
+			FW_PORT_CMD_MDIOADDR_G(v) : -1;
 	pi->port_type = FW_PORT_CMD_PTYPE_G(v);
 	pi->mod_type = FW_PORT_MOD_TYPE_NA;
 
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index b29e027..e356afa 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1335,7 +1335,7 @@
 	int err;
 
 	if (!enic_poll_lock_napi(&enic->rq[rq]))
-		return work_done;
+		return budget;
 	/* Service RQ
 	 */
 
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index a379c3e..13d00a3 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -398,13 +398,8 @@
 		 * break out of while loop if there are no more
 		 * packets waiting
 		 */
-		if (!(dnet_readl(bp, RX_FIFO_WCNT) >> 16)) {
-			napi_complete(napi);
-			int_enable = dnet_readl(bp, INTR_ENB);
-			int_enable |= DNET_INTR_SRC_RX_CMDFIFOAF;
-			dnet_writel(bp, int_enable, INTR_ENB);
-			return 0;
-		}
+		if (!(dnet_readl(bp, RX_FIFO_WCNT) >> 16))
+			break;
 
 		cmd_word = dnet_readl(bp, RX_LEN_FIFO);
 		pkt_len = cmd_word & 0xFFFF;
@@ -433,20 +428,17 @@
 			       "size %u.\n", dev->name, pkt_len);
 	}
 
-	budget -= npackets;
-
 	if (npackets < budget) {
 		/* We processed all packets available.  Tell NAPI it can
-		 * stop polling then re-enable rx interrupts */
+		 * stop polling then re-enable rx interrupts.
+		 */
 		napi_complete(napi);
 		int_enable = dnet_readl(bp, INTR_ENB);
 		int_enable |= DNET_INTR_SRC_RX_CMDFIFOAF;
 		dnet_writel(bp, int_enable, INTR_ENB);
-		return 0;
 	}
 
-	/* There are still packets waiting */
-	return 1;
+	return npackets;
 }
 
 static irqreturn_t dnet_interrupt(int irq, void *dev_id)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 41a0a54..d48806b 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4383,8 +4383,9 @@
  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
  * is expected to work across all types of IP tunnels once exported. Skyhawk
  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
- * offloads in hw_enc_features only when a VxLAN port is added. Note this only
- * ensures that other tunnels work fine while VxLAN offloads are not enabled.
+ * offloads in hw_enc_features only when a VxLAN port is added. If other (non
+ * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
+ * those other tunnels are unexported on the fly through ndo_features_check().
  *
  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
  * adds more than one port, disable offloads and don't re-enable them again
@@ -4463,7 +4464,41 @@
 					   struct net_device *dev,
 					   netdev_features_t features)
 {
-	return vxlan_features_check(skb, features);
+	struct be_adapter *adapter = netdev_priv(dev);
+	u8 l4_hdr = 0;
+
+	/* The code below restricts offload features for some tunneled packets.
+	 * Offload features for normal (non tunnel) packets are unchanged.
+	 */
+	if (!skb->encapsulation ||
+	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
+		return features;
+
+	/* It's an encapsulated packet and VxLAN offloads are enabled. We
+	 * should disable tunnel offload features if it's not a VxLAN packet,
+	 * as tunnel offloads have been enabled only for VxLAN. This is done to
+	 * allow other tunneled traffic like GRE work fine while VxLAN
+	 * offloads are configured in Skyhawk-R.
+	 */
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features;
+	}
+
+	if (l4_hdr != IPPROTO_UDP ||
+	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	    skb->inner_protocol != htons(ETH_P_TEB) ||
+	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
+	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
+
+	return features;
 }
 #endif
 
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 469691a..4013292 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -424,6 +424,8 @@
  * (40ns * 6).
  */
 #define FEC_QUIRK_BUG_CAPTURE		(1 << 10)
+/* Controller has only one MDIO bus */
+#define FEC_QUIRK_SINGLE_MDIO		(1 << 11)
 
 struct fec_enet_priv_tx_q {
 	int index;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5ebdf8d..bba8777 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -91,7 +91,8 @@
 		.driver_data = 0,
 	}, {
 		.name = "imx28-fec",
-		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
+		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
+				FEC_QUIRK_SINGLE_MDIO,
 	}, {
 		.name = "imx6q-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
@@ -1937,7 +1938,7 @@
 	int err = -ENXIO, i;
 
 	/*
-	 * The dual fec interfaces are not equivalent with enet-mac.
+	 * The i.MX28 dual fec interfaces are not equal.
 	 * Here are the differences:
 	 *
 	 *  - fec0 supports MII & RMII modes while fec1 only supports RMII
@@ -1952,7 +1953,7 @@
 	 * mdio interface in board design, and need to be configured by
 	 * fec0 mii_bus.
 	 */
-	if ((fep->quirks & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) {
+	if ((fep->quirks & FEC_QUIRK_SINGLE_MDIO) && fep->dev_id > 0) {
 		/* fec1 uses fec0 mii_bus */
 		if (mii_cnt && fec0_mii_bus) {
 			fep->mii_bus = fec0_mii_bus;
@@ -2015,7 +2016,7 @@
 	mii_cnt++;
 
 	/* save fec0 mii_bus */
-	if (fep->quirks & FEC_QUIRK_ENET_MAC)
+	if (fep->quirks & FEC_QUIRK_SINGLE_MDIO)
 		fec0_mii_bus = fep->mii_bus;
 
 	return 0;
@@ -3129,6 +3130,7 @@
 		pdev->id_entry = of_id->data;
 	fep->quirks = pdev->id_entry->driver_data;
 
+	fep->netdev = ndev;
 	fep->num_rx_queues = num_rx_qs;
 	fep->num_tx_queues = num_tx_qs;
 
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 5b8300a..4d61ef5 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -281,6 +281,17 @@
 
 	  If unsure, say N.
 
+config I40E_FCOE
+	bool "Fibre Channel over Ethernet (FCoE)"
+	default n
+	depends on I40E && DCB && FCOE
+	---help---
+	  Say Y here if you want to use Fibre Channel over Ethernet (FCoE)
+	  in the driver. This will create new netdev for exclusive FCoE
+	  use with XL710 FCoE offloads enabled.
+
+	  If unsure, say N.
+
 config I40EVF
 	tristate "Intel(R) XL710 X710 Virtual Function Ethernet support"
 	depends on PCI_MSI
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 4b94ddb..c405819 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -44,4 +44,4 @@
 	i40e_virtchnl_pf.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
-i40e-$(CONFIG_FCOE:m=y) += i40e_fcoe.o
+i40e-$(CONFIG_I40E_FCOE) += i40e_fcoe.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index 045b5c4..ad802dd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -78,7 +78,7 @@
 } while (0)
 
 typedef enum i40e_status_code i40e_status;
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifdef CONFIG_I40E_FCOE
 #define I40E_FCOE
-#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
+#endif
 #endif /* _I40E_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 04b4414..cecb340 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -658,6 +658,8 @@
 	return le32_to_cpu(*(volatile __le32 *)head);
 }
 
+#define WB_STRIDE 0x3
+
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
  * @tx_ring:  tx ring to clean
@@ -759,6 +761,18 @@
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
+	/* check to see if there are any non-cache aligned descriptors
+	 * waiting to be written back, and kick the hardware to force
+	 * them to be written back in case of napi polling
+	 */
+	if (budget &&
+	    !((i & WB_STRIDE) == WB_STRIDE) &&
+	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
+	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+		tx_ring->arm_wb = true;
+	else
+		tx_ring->arm_wb = false;
+
 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
 		/* schedule immediate reset if we believe we hung */
 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
@@ -777,13 +791,16 @@
 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
 
 		dev_info(tx_ring->dev,
-			 "tx hang detected on queue %d, resetting adapter\n",
+			 "tx hang detected on queue %d, reset requested\n",
 			 tx_ring->queue_index);
 
-		tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
+		/* do not fire the reset immediately, wait for the stack to
+		 * decide we are truly stuck, also prevents every queue from
+		 * simultaneously requesting a reset
+		 */
 
-		/* the adapter is about to reset, no point in enabling stuff */
-		return true;
+		/* the adapter is about to reset, no point in enabling polling */
+		budget = 1;
 	}
 
 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
@@ -806,7 +823,25 @@
 		}
 	}
 
-	return budget > 0;
+	return !!budget;
+}
+
+/**
+ * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
+ * @vsi: the VSI we care about
+ * @q_vector: the vector  on which to force writeback
+ *
+ **/
+static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+	u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+		  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+		  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
+		  /* allow 00 to be written to the index */;
+
+	wr32(&vsi->back->hw,
+	     I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
+	     val);
 }
 
 /**
@@ -1290,9 +1325,7 @@
 	 * so the total length of IPv4 header is IHL*4 bytes
 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
 	 */
-	if (ipv4_tunnel &&
-	    (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
-	    !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
+	if (ipv4_tunnel) {
 		skb->transport_header = skb->mac_header +
 					sizeof(struct ethhdr) +
 					(ip_hdr(skb)->ihl * 4);
@@ -1302,15 +1335,19 @@
 					  skb->protocol == htons(ETH_P_8021AD))
 					  ? VLAN_HLEN : 0;
 
-		rx_udp_csum = udp_csum(skb);
-		iph = ip_hdr(skb);
-		csum = csum_tcpudp_magic(
-				iph->saddr, iph->daddr,
-				(skb->len - skb_transport_offset(skb)),
-				IPPROTO_UDP, rx_udp_csum);
+		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
+		    (udp_hdr(skb)->check != 0)) {
+			rx_udp_csum = udp_csum(skb);
+			iph = ip_hdr(skb);
+			csum = csum_tcpudp_magic(
+					iph->saddr, iph->daddr,
+					(skb->len - skb_transport_offset(skb)),
+					IPPROTO_UDP, rx_udp_csum);
 
-		if (udp_hdr(skb)->check != csum)
-			goto checksum_fail;
+			if (udp_hdr(skb)->check != csum)
+				goto checksum_fail;
+
+		} /* else its GRE and so no outer UDP header */
 	}
 
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1581,6 +1618,7 @@
 	struct i40e_vsi *vsi = q_vector->vsi;
 	struct i40e_ring *ring;
 	bool clean_complete = true;
+	bool arm_wb = false;
 	int budget_per_ring;
 
 	if (test_bit(__I40E_DOWN, &vsi->state)) {
@@ -1591,8 +1629,10 @@
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
-	i40e_for_each_ring(ring, q_vector->tx)
+	i40e_for_each_ring(ring, q_vector->tx) {
 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+		arm_wb |= ring->arm_wb;
+	}
 
 	/* We attempt to distribute budget to each Rx queue fairly, but don't
 	 * allow the budget to go below 1 because that would exit polling early.
@@ -1603,8 +1643,11 @@
 		clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
 
 	/* If work not completed, return budget and polling will return */
-	if (!clean_complete)
+	if (!clean_complete) {
+		if (arm_wb)
+			i40e_force_wb(vsi, q_vector);
 		return budget;
+	}
 
 	/* Work is done so exit the polling mode and re-enable the interrupt */
 	napi_complete(napi);
@@ -1840,17 +1883,16 @@
 	if (err < 0)
 		return err;
 
-	if (protocol == htons(ETH_P_IP)) {
-		iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+
+	if (iph->version == 4) {
 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
 		iph->tot_len = 0;
 		iph->check = 0;
 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 						 0, IPPROTO_TCP, 0);
-	} else if (skb_is_gso_v6(skb)) {
-
-		ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
-					   : ipv6_hdr(skb);
+	} else if (ipv6h->version == 6) {
 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
 		ipv6h->payload_len = 0;
 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
@@ -1946,13 +1988,9 @@
 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
 			}
 		} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
-			if (tx_flags & I40E_TX_FLAGS_TSO) {
-				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+			if (tx_flags & I40E_TX_FLAGS_TSO)
 				ip_hdr(skb)->check = 0;
-			} else {
-				*cd_tunneling |=
-					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
-			}
 		}
 
 		/* Now set the ctx descriptor fields */
@@ -1962,7 +2000,10 @@
 				   ((skb_inner_network_offset(skb) -
 					skb_transport_offset(skb)) >> 1) <<
 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
-
+		if (this_ip_hdr->version == 6) {
+			tx_flags &= ~I40E_TX_FLAGS_IPV4;
+			tx_flags |= I40E_TX_FLAGS_IPV6;
+		}
 	} else {
 		network_hdr_len = skb_network_header_len(skb);
 		this_ip_hdr = ip_hdr(skb);
@@ -2198,7 +2239,6 @@
 	/* Place RS bit on last descriptor of any packet that spans across the
 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
 	 */
-#define WB_STRIDE 0x3
 	if (((i & WB_STRIDE) != WB_STRIDE) &&
 	    (first <= &tx_ring->tx_bi[i]) &&
 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e60d3ac..18b0023 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -241,6 +241,7 @@
 	unsigned long last_rx_timestamp;
 
 	bool ring_active;		/* is ring online or not */
+	bool arm_wb;		/* do something to arm write back */
 
 	/* stats structs */
 	struct i40e_queue_stats	stats;
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index a62fc38..1c75829 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -192,6 +192,10 @@
 #define IS_TSO_HEADER(txq, addr) \
 	((addr >= txq->tso_hdrs_dma) && \
 	 (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
+#define DESC_DMA_MAP_SINGLE 0
+#define DESC_DMA_MAP_PAGE 1
+
 /*
  * RX/TX descriptors.
  */
@@ -362,6 +366,7 @@
 	dma_addr_t tso_hdrs_dma;
 
 	struct tx_desc *tx_desc_area;
+	char *tx_desc_mapping; /* array to track the type of the dma mapping */
 	dma_addr_t tx_desc_dma;
 	int tx_desc_area_size;
 
@@ -750,6 +755,7 @@
 	if (txq->tx_curr_desc == txq->tx_ring_size)
 		txq->tx_curr_desc = 0;
 	desc = &txq->tx_desc_area[tx_index];
+	txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
 
 	desc->l4i_chk = 0;
 	desc->byte_cnt = length;
@@ -879,14 +885,13 @@
 		skb_frag_t *this_frag;
 		int tx_index;
 		struct tx_desc *desc;
-		void *addr;
 
 		this_frag = &skb_shinfo(skb)->frags[frag];
-		addr = page_address(this_frag->page.p) + this_frag->page_offset;
 		tx_index = txq->tx_curr_desc++;
 		if (txq->tx_curr_desc == txq->tx_ring_size)
 			txq->tx_curr_desc = 0;
 		desc = &txq->tx_desc_area[tx_index];
+		txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_PAGE;
 
 		/*
 		 * The last fragment will generate an interrupt
@@ -902,8 +907,9 @@
 
 		desc->l4i_chk = 0;
 		desc->byte_cnt = skb_frag_size(this_frag);
-		desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
-					       desc->byte_cnt, DMA_TO_DEVICE);
+		desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent,
+						 this_frag, 0, desc->byte_cnt,
+						 DMA_TO_DEVICE);
 	}
 }
 
@@ -936,6 +942,7 @@
 	if (txq->tx_curr_desc == txq->tx_ring_size)
 		txq->tx_curr_desc = 0;
 	desc = &txq->tx_desc_area[tx_index];
+	txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
 
 	if (nr_frags) {
 		txq_submit_frag_skb(txq, skb);
@@ -1047,9 +1054,12 @@
 		int tx_index;
 		struct tx_desc *desc;
 		u32 cmd_sts;
+		char desc_dma_map;
 
 		tx_index = txq->tx_used_desc;
 		desc = &txq->tx_desc_area[tx_index];
+		desc_dma_map = txq->tx_desc_mapping[tx_index];
+
 		cmd_sts = desc->cmd_sts;
 
 		if (cmd_sts & BUFFER_OWNED_BY_DMA) {
@@ -1065,9 +1075,19 @@
 		reclaimed++;
 		txq->tx_desc_count--;
 
-		if (!IS_TSO_HEADER(txq, desc->buf_ptr))
-			dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
-					 desc->byte_cnt, DMA_TO_DEVICE);
+		if (!IS_TSO_HEADER(txq, desc->buf_ptr)) {
+
+			if (desc_dma_map == DESC_DMA_MAP_PAGE)
+				dma_unmap_page(mp->dev->dev.parent,
+					       desc->buf_ptr,
+					       desc->byte_cnt,
+					       DMA_TO_DEVICE);
+			else
+				dma_unmap_single(mp->dev->dev.parent,
+						 desc->buf_ptr,
+						 desc->byte_cnt,
+						 DMA_TO_DEVICE);
+		}
 
 		if (cmd_sts & TX_ENABLE_INTERRUPT) {
 			struct sk_buff *skb = __skb_dequeue(&txq->tx_skb);
@@ -1996,6 +2016,7 @@
 	struct tx_queue *txq = mp->txq + index;
 	struct tx_desc *tx_desc;
 	int size;
+	int ret;
 	int i;
 
 	txq->index = index;
@@ -2048,18 +2069,34 @@
 					nexti * sizeof(struct tx_desc);
 	}
 
+	txq->tx_desc_mapping = kcalloc(txq->tx_ring_size, sizeof(char),
+				       GFP_KERNEL);
+	if (!txq->tx_desc_mapping) {
+		ret = -ENOMEM;
+		goto err_free_desc_area;
+	}
+
 	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
 	txq->tso_hdrs = dma_alloc_coherent(mp->dev->dev.parent,
 					   txq->tx_ring_size * TSO_HEADER_SIZE,
 					   &txq->tso_hdrs_dma, GFP_KERNEL);
 	if (txq->tso_hdrs == NULL) {
-		dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
-				  txq->tx_desc_area, txq->tx_desc_dma);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_free_desc_mapping;
 	}
 	skb_queue_head_init(&txq->tx_skb);
 
 	return 0;
+
+err_free_desc_mapping:
+	kfree(txq->tx_desc_mapping);
+err_free_desc_area:
+	if (index == 0 && size <= mp->tx_desc_sram_size)
+		iounmap(txq->tx_desc_area);
+	else
+		dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
+				  txq->tx_desc_area, txq->tx_desc_dma);
+	return ret;
 }
 
 static void txq_deinit(struct tx_queue *txq)
@@ -2077,6 +2114,8 @@
 	else
 		dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
 				  txq->tx_desc_area, txq->tx_desc_dma);
+	kfree(txq->tx_desc_mapping);
+
 	if (txq->tso_hdrs)
 		dma_free_coherent(mp->dev->dev.parent,
 				  txq->tx_ring_size * TSO_HEADER_SIZE,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d0d6dc1..ac6a8f1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -475,7 +475,8 @@
 {
 	int err;
 
-	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+	if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
+	    priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
 		return 0; /* do nothing */
 
 	err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 03e9eb0..6e08352 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1744,8 +1744,7 @@
 				       struct mlx4_dev_cap *dev_cap)
 {
 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
-	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS &&
-	    dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
+	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
 	else
 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index f5e4b82..db0c7a9 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6987,7 +6987,9 @@
 			if (sp->s2io_entries[i].in_use == MSIX_FLG) {
 				if (sp->s2io_entries[i].type ==
 				    MSIX_RING_TYPE) {
-					sprintf(sp->desc[i], "%s:MSI-X-%d-RX",
+					snprintf(sp->desc[i],
+						sizeof(sp->desc[i]),
+						"%s:MSI-X-%d-RX",
 						dev->name, i);
 					err = request_irq(sp->entries[i].vector,
 							  s2io_msix_ring_handle,
@@ -6996,7 +6998,9 @@
 							  sp->s2io_entries[i].arg);
 				} else if (sp->s2io_entries[i].type ==
 					   MSIX_ALARM_TYPE) {
-					sprintf(sp->desc[i], "%s:MSI-X-%d-TX",
+					snprintf(sp->desc[i],
+						sizeof(sp->desc[i]),
+						"%s:MSI-X-%d-TX",
 						dev->name, i);
 					err = request_irq(sp->entries[i].vector,
 							  s2io_msix_fifo_handle,
@@ -8154,7 +8158,8 @@
 			  "%s: UDP Fragmentation Offload(UFO) enabled\n",
 			  dev->name);
 	/* Initialize device name */
-	sprintf(sp->name, "%s Neterion %s", dev->name, sp->product_name);
+	snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name,
+		 sp->product_name);
 
 	if (vlan_tag_strip)
 		sp->vlan_strip_flag = 1;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 6130375..c531c8a 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -2388,7 +2388,10 @@
 
 	work_done = netxen_process_rcv_ring(sds_ring, budget);
 
-	if ((work_done < budget) && tx_complete) {
+	if (!tx_complete)
+		work_done = budget;
+
+	if (work_done < budget) {
 		napi_complete(&sds_ring->napi);
 		if (test_bit(__NX_DEV_UP, &adapter->state))
 			netxen_nic_enable_int(sds_ring);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c29ba80..04283fe 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -396,6 +396,9 @@
 	[TSU_ADRL31]	= 0x01fc,
 };
 
+static void sh_eth_rcv_snd_disable(struct net_device *ndev);
+static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev);
+
 static bool sh_eth_is_gether(struct sh_eth_private *mdp)
 {
 	return mdp->reg_offset == sh_eth_offset_gigabit;
@@ -473,6 +476,7 @@
 	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
 			  EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE |
 			  EESR_ECI,
+	.fdr_value	= 0x00000f0f,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -495,6 +499,9 @@
 	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
 			  EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE |
 			  EESR_ECI,
+	.fdr_value	= 0x00000f0f,
+
+	.trscer_err_mask = DESC_I_RINT8,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -856,6 +863,9 @@
 
 	if (!cd->eesr_err_check)
 		cd->eesr_err_check = DEFAULT_EESR_ERR_CHECK;
+
+	if (!cd->trscer_err_mask)
+		cd->trscer_err_mask = DEFAULT_TRSCER_ERR_MASK;
 }
 
 static int sh_eth_check_reset(struct net_device *ndev)
@@ -1113,6 +1123,7 @@
 	int rx_ringsize = sizeof(*rxdesc) * mdp->num_rx_ring;
 	int tx_ringsize = sizeof(*txdesc) * mdp->num_tx_ring;
 	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN - 1;
+	dma_addr_t dma_addr;
 
 	mdp->cur_rx = 0;
 	mdp->cur_tx = 0;
@@ -1126,7 +1137,6 @@
 		/* skb */
 		mdp->rx_skbuff[i] = NULL;
 		skb = netdev_alloc_skb(ndev, skbuff_size);
-		mdp->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
 		sh_eth_set_receive_align(skb);
@@ -1135,9 +1145,15 @@
 		rxdesc = &mdp->rx_ring[i];
 		/* The size of the buffer is a multiple of 16 bytes. */
 		rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 16);
-		dma_map_single(&ndev->dev, skb->data, rxdesc->buffer_length,
-			       DMA_FROM_DEVICE);
-		rxdesc->addr = virt_to_phys(skb->data);
+		dma_addr = dma_map_single(&ndev->dev, skb->data,
+					  rxdesc->buffer_length,
+					  DMA_FROM_DEVICE);
+		if (dma_mapping_error(&ndev->dev, dma_addr)) {
+			kfree_skb(skb);
+			break;
+		}
+		mdp->rx_skbuff[i] = skb;
+		rxdesc->addr = dma_addr;
 		rxdesc->status = cpu_to_edmac(mdp, RD_RACT | RD_RFP);
 
 		/* Rx descriptor address set */
@@ -1294,7 +1310,7 @@
 	/* Frame recv control (enable multiple-packets per rx irq) */
 	sh_eth_write(ndev, RMCR_RNC, RMCR);
 
-	sh_eth_write(ndev, DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2, TRSCER);
+	sh_eth_write(ndev, mdp->cd->trscer_err_mask, TRSCER);
 
 	if (mdp->cd->bculr)
 		sh_eth_write(ndev, 0x800, BCULR);	/* Burst sycle set */
@@ -1309,8 +1325,10 @@
 		     RFLR);
 
 	sh_eth_write(ndev, sh_eth_read(ndev, EESR), EESR);
-	if (start)
+	if (start) {
+		mdp->irq_enabled = true;
 		sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR);
+	}
 
 	/* PAUSE Prohibition */
 	val = (sh_eth_read(ndev, ECMR) & ECMR_DM) |
@@ -1349,6 +1367,33 @@
 	return ret;
 }
 
+static void sh_eth_dev_exit(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	int i;
+
+	/* Deactivate all TX descriptors, so DMA should stop at next
+	 * packet boundary if it's currently running
+	 */
+	for (i = 0; i < mdp->num_tx_ring; i++)
+		mdp->tx_ring[i].status &= ~cpu_to_edmac(mdp, TD_TACT);
+
+	/* Disable TX FIFO egress to MAC */
+	sh_eth_rcv_snd_disable(ndev);
+
+	/* Stop RX DMA at next packet boundary */
+	sh_eth_write(ndev, 0, EDRRR);
+
+	/* Aside from TX DMA, we can't tell when the hardware is
+	 * really stopped, so we need to reset to make sure.
+	 * Before doing that, wait for long enough to *probably*
+	 * finish transmitting the last packet and poll stats.
+	 */
+	msleep(2); /* max frame time at 10 Mbps < 1250 us */
+	sh_eth_get_stats(ndev);
+	sh_eth_reset(ndev);
+}
+
 /* free Tx skb function */
 static int sh_eth_txfree(struct net_device *ndev)
 {
@@ -1393,6 +1438,7 @@
 	u16 pkt_len = 0;
 	u32 desc_status;
 	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN - 1;
+	dma_addr_t dma_addr;
 
 	boguscnt = min(boguscnt, *quota);
 	limit = boguscnt;
@@ -1440,9 +1486,9 @@
 			mdp->rx_skbuff[entry] = NULL;
 			if (mdp->cd->rpadir)
 				skb_reserve(skb, NET_IP_ALIGN);
-			dma_sync_single_for_cpu(&ndev->dev, rxdesc->addr,
-						ALIGN(mdp->rx_buf_sz, 16),
-						DMA_FROM_DEVICE);
+			dma_unmap_single(&ndev->dev, rxdesc->addr,
+					 ALIGN(mdp->rx_buf_sz, 16),
+					 DMA_FROM_DEVICE);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
 			netif_receive_skb(skb);
@@ -1462,15 +1508,20 @@
 
 		if (mdp->rx_skbuff[entry] == NULL) {
 			skb = netdev_alloc_skb(ndev, skbuff_size);
-			mdp->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break;	/* Better luck next round. */
 			sh_eth_set_receive_align(skb);
-			dma_map_single(&ndev->dev, skb->data,
-				       rxdesc->buffer_length, DMA_FROM_DEVICE);
+			dma_addr = dma_map_single(&ndev->dev, skb->data,
+						  rxdesc->buffer_length,
+						  DMA_FROM_DEVICE);
+			if (dma_mapping_error(&ndev->dev, dma_addr)) {
+				kfree_skb(skb);
+				break;
+			}
+			mdp->rx_skbuff[entry] = skb;
 
 			skb_checksum_none_assert(skb);
-			rxdesc->addr = virt_to_phys(skb->data);
+			rxdesc->addr = dma_addr;
 		}
 		if (entry >= mdp->num_rx_ring - 1)
 			rxdesc->status |=
@@ -1566,7 +1617,6 @@
 		if (intr_status & EESR_RFRMER) {
 			/* Receive Frame Overflow int */
 			ndev->stats.rx_frame_errors++;
-			netif_err(mdp, rx_err, ndev, "Receive Abort\n");
 		}
 	}
 
@@ -1585,13 +1635,11 @@
 	if (intr_status & EESR_RDE) {
 		/* Receive Descriptor Empty int */
 		ndev->stats.rx_over_errors++;
-		netif_err(mdp, rx_err, ndev, "Receive Descriptor Empty\n");
 	}
 
 	if (intr_status & EESR_RFE) {
 		/* Receive FIFO Overflow int */
 		ndev->stats.rx_fifo_errors++;
-		netif_err(mdp, rx_err, ndev, "Receive FIFO Overflow\n");
 	}
 
 	if (!mdp->cd->no_ade && (intr_status & EESR_ADE)) {
@@ -1646,7 +1694,12 @@
 	if (intr_status & (EESR_RX_CHECK | cd->tx_check | cd->eesr_err_check))
 		ret = IRQ_HANDLED;
 	else
-		goto other_irq;
+		goto out;
+
+	if (!likely(mdp->irq_enabled)) {
+		sh_eth_write(ndev, 0, EESIPR);
+		goto out;
+	}
 
 	if (intr_status & EESR_RX_CHECK) {
 		if (napi_schedule_prep(&mdp->napi)) {
@@ -1677,7 +1730,7 @@
 		sh_eth_error(ndev, intr_status);
 	}
 
-other_irq:
+out:
 	spin_unlock(&mdp->lock);
 
 	return ret;
@@ -1705,7 +1758,8 @@
 	napi_complete(napi);
 
 	/* Reenable Rx interrupts */
-	sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR);
+	if (mdp->irq_enabled)
+		sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR);
 out:
 	return budget - quota;
 }
@@ -1820,6 +1874,9 @@
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 	ret = phy_ethtool_gset(mdp->phydev, ecmd);
 	spin_unlock_irqrestore(&mdp->lock, flags);
@@ -1834,6 +1891,9 @@
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 
 	/* disable tx and rx */
@@ -1868,6 +1928,9 @@
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 	ret = phy_start_aneg(mdp->phydev);
 	spin_unlock_irqrestore(&mdp->lock, flags);
@@ -1952,40 +2015,50 @@
 		return -EINVAL;
 
 	if (netif_running(ndev)) {
+		netif_device_detach(ndev);
 		netif_tx_disable(ndev);
-		/* Disable interrupts by clearing the interrupt mask. */
-		sh_eth_write(ndev, 0x0000, EESIPR);
-		/* Stop the chip's Tx and Rx processes. */
-		sh_eth_write(ndev, 0, EDTRR);
-		sh_eth_write(ndev, 0, EDRRR);
-		synchronize_irq(ndev->irq);
-	}
 
-	/* Free all the skbuffs in the Rx queue. */
-	sh_eth_ring_free(ndev);
-	/* Free DMA buffer */
-	sh_eth_free_dma_buffer(mdp);
+		/* Serialise with the interrupt handler and NAPI, then
+		 * disable interrupts.  We have to clear the
+		 * irq_enabled flag first to ensure that interrupts
+		 * won't be re-enabled.
+		 */
+		mdp->irq_enabled = false;
+		synchronize_irq(ndev->irq);
+		napi_synchronize(&mdp->napi);
+		sh_eth_write(ndev, 0x0000, EESIPR);
+
+		sh_eth_dev_exit(ndev);
+
+		/* Free all the skbuffs in the Rx queue. */
+		sh_eth_ring_free(ndev);
+		/* Free DMA buffer */
+		sh_eth_free_dma_buffer(mdp);
+	}
 
 	/* Set new parameters */
 	mdp->num_rx_ring = ring->rx_pending;
 	mdp->num_tx_ring = ring->tx_pending;
 
-	ret = sh_eth_ring_init(ndev);
-	if (ret < 0) {
-		netdev_err(ndev, "%s: sh_eth_ring_init failed.\n", __func__);
-		return ret;
-	}
-	ret = sh_eth_dev_init(ndev, false);
-	if (ret < 0) {
-		netdev_err(ndev, "%s: sh_eth_dev_init failed.\n", __func__);
-		return ret;
-	}
-
 	if (netif_running(ndev)) {
+		ret = sh_eth_ring_init(ndev);
+		if (ret < 0) {
+			netdev_err(ndev, "%s: sh_eth_ring_init failed.\n",
+				   __func__);
+			return ret;
+		}
+		ret = sh_eth_dev_init(ndev, false);
+		if (ret < 0) {
+			netdev_err(ndev, "%s: sh_eth_dev_init failed.\n",
+				   __func__);
+			return ret;
+		}
+
+		mdp->irq_enabled = true;
 		sh_eth_write(ndev, mdp->cd->eesipr_value, EESIPR);
 		/* Setting the Rx mode will start the Rx process. */
 		sh_eth_write(ndev, EDRRR_R, EDRRR);
-		netif_wake_queue(ndev);
+		netif_device_attach(ndev);
 	}
 
 	return 0;
@@ -2101,6 +2174,9 @@
 	}
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
+	if (skb_padto(skb, ETH_ZLEN))
+		return NETDEV_TX_OK;
+
 	entry = mdp->cur_tx % mdp->num_tx_ring;
 	mdp->tx_skbuff[entry] = skb;
 	txdesc = &mdp->tx_ring[entry];
@@ -2110,10 +2186,11 @@
 				 skb->len + 2);
 	txdesc->addr = dma_map_single(&ndev->dev, skb->data, skb->len,
 				      DMA_TO_DEVICE);
-	if (skb->len < ETH_ZLEN)
-		txdesc->buffer_length = ETH_ZLEN;
-	else
-		txdesc->buffer_length = skb->len;
+	if (dma_mapping_error(&ndev->dev, txdesc->addr)) {
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+	txdesc->buffer_length = skb->len;
 
 	if (entry >= mdp->num_tx_ring - 1)
 		txdesc->status |= cpu_to_edmac(mdp, TD_TACT | TD_TDLE);
@@ -2165,24 +2242,26 @@
 
 	netif_stop_queue(ndev);
 
-	/* Disable interrupts by clearing the interrupt mask. */
+	/* Serialise with the interrupt handler and NAPI, then disable
+	 * interrupts.  We have to clear the irq_enabled flag first to
+	 * ensure that interrupts won't be re-enabled.
+	 */
+	mdp->irq_enabled = false;
+	synchronize_irq(ndev->irq);
+	napi_disable(&mdp->napi);
 	sh_eth_write(ndev, 0x0000, EESIPR);
 
-	/* Stop the chip's Tx and Rx processes. */
-	sh_eth_write(ndev, 0, EDTRR);
-	sh_eth_write(ndev, 0, EDRRR);
+	sh_eth_dev_exit(ndev);
 
-	sh_eth_get_stats(ndev);
 	/* PHY Disconnect */
 	if (mdp->phydev) {
 		phy_stop(mdp->phydev);
 		phy_disconnect(mdp->phydev);
+		mdp->phydev = NULL;
 	}
 
 	free_irq(ndev->irq, ndev);
 
-	napi_disable(&mdp->napi);
-
 	/* Free all the skbuffs in the Rx queue. */
 	sh_eth_ring_free(ndev);
 
@@ -2410,7 +2489,7 @@
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int i, ret;
 
-	if (unlikely(!mdp->cd->tsu))
+	if (!mdp->cd->tsu)
 		return 0;
 
 	for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++) {
@@ -2433,7 +2512,7 @@
 	void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
 	int i;
 
-	if (unlikely(!mdp->cd->tsu))
+	if (!mdp->cd->tsu)
 		return;
 
 	for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++, reg_offset += 8) {
@@ -2443,8 +2522,8 @@
 	}
 }
 
-/* Multicast reception directions set */
-static void sh_eth_set_multicast_list(struct net_device *ndev)
+/* Update promiscuous flag and multicast filter */
+static void sh_eth_set_rx_mode(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	u32 ecmr_bits;
@@ -2455,7 +2534,9 @@
 	/* Initial condition is MCT = 1, PRM = 0.
 	 * Depending on ndev->flags, set PRM or clear MCT
 	 */
-	ecmr_bits = (sh_eth_read(ndev, ECMR) & ~ECMR_PRM) | ECMR_MCT;
+	ecmr_bits = sh_eth_read(ndev, ECMR) & ~ECMR_PRM;
+	if (mdp->cd->tsu)
+		ecmr_bits |= ECMR_MCT;
 
 	if (!(ndev->flags & IFF_MULTICAST)) {
 		sh_eth_tsu_purge_mcast(ndev);
@@ -2484,9 +2565,6 @@
 				}
 			}
 		}
-	} else {
-		/* Normal, unicast/broadcast-only mode. */
-		ecmr_bits = (ecmr_bits & ~ECMR_PRM) | ECMR_MCT;
 	}
 
 	/* update the ethernet mode */
@@ -2694,6 +2772,7 @@
 	.ndo_stop		= sh_eth_close,
 	.ndo_start_xmit		= sh_eth_start_xmit,
 	.ndo_get_stats		= sh_eth_get_stats,
+	.ndo_set_rx_mode	= sh_eth_set_rx_mode,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
 	.ndo_do_ioctl		= sh_eth_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -2706,7 +2785,7 @@
 	.ndo_stop		= sh_eth_close,
 	.ndo_start_xmit		= sh_eth_start_xmit,
 	.ndo_get_stats		= sh_eth_get_stats,
-	.ndo_set_rx_mode	= sh_eth_set_multicast_list,
+	.ndo_set_rx_mode	= sh_eth_set_rx_mode,
 	.ndo_vlan_rx_add_vid	= sh_eth_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= sh_eth_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 22301bf..332d3c1 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -369,6 +369,8 @@
 	DESC_I_RINT1 = 0x0001,
 };
 
+#define DEFAULT_TRSCER_ERR_MASK (DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2)
+
 /* RPADIR */
 enum RPADIR_BIT {
 	RPADIR_PADS1 = 0x20000, RPADIR_PADS0 = 0x10000,
@@ -470,6 +472,9 @@
 	unsigned long tx_check;
 	unsigned long eesr_err_check;
 
+	/* Error mask */
+	unsigned long trscer_err_mask;
+
 	/* hardware features */
 	unsigned long irq_flags; /* IRQ configuration flags */
 	unsigned no_psr:1;	/* EtherC DO NOT have PSR */
@@ -508,6 +513,7 @@
 	u32 rx_buf_sz;			/* Based on MTU+slack. */
 	int edmac_endian;
 	struct napi_struct napi;
+	bool irq_enabled;
 	/* MII transceiver section. */
 	u32 phy_id;			/* PHY ID */
 	struct mii_bus *mii_bus;	/* MDIO bus control */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 6984944..b1a2718 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -474,13 +474,19 @@
 	/* allocate memory for RX skbuff array */
 	rx_ring->rx_skbuff_dma = kmalloc_array(rx_rsize,
 					       sizeof(dma_addr_t), GFP_KERNEL);
-	if (rx_ring->rx_skbuff_dma == NULL)
-		goto dmamem_err;
+	if (!rx_ring->rx_skbuff_dma) {
+		dma_free_coherent(priv->device,
+				  rx_rsize * sizeof(struct sxgbe_rx_norm_desc),
+				  rx_ring->dma_rx, rx_ring->dma_rx_phy);
+		goto error;
+	}
 
 	rx_ring->rx_skbuff = kmalloc_array(rx_rsize,
 					   sizeof(struct sk_buff *), GFP_KERNEL);
-	if (rx_ring->rx_skbuff == NULL)
-		goto rxbuff_err;
+	if (!rx_ring->rx_skbuff) {
+		kfree(rx_ring->rx_skbuff_dma);
+		goto error;
+	}
 
 	/* initialise the buffers */
 	for (desc_index = 0; desc_index < rx_rsize; desc_index++) {
@@ -502,13 +508,6 @@
 err_init_rx_buffers:
 	while (--desc_index >= 0)
 		free_rx_ring(priv->device, rx_ring, desc_index);
-	kfree(rx_ring->rx_skbuff);
-rxbuff_err:
-	kfree(rx_ring->rx_skbuff_dma);
-dmamem_err:
-	dma_free_coherent(priv->device,
-			  rx_rsize * sizeof(struct sxgbe_rx_norm_desc),
-			  rx_ring->dma_rx, rx_ring->dma_rx_phy);
 error:
 	return -ENOMEM;
 }
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index 866560e..b02eed1 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -108,10 +108,6 @@
 		}
 	}
 
-	/* Get MAC address if available (DT) */
-	if (mac)
-		ether_addr_copy(priv->dev->dev_addr, mac);
-
 	priv = sxgbe_drv_probe(&(pdev->dev), plat_dat, addr);
 	if (!priv) {
 		pr_err("%s: main driver probe failed\n", __func__);
@@ -125,6 +121,10 @@
 		goto err_drv_remove;
 	}
 
+	/* Get MAC address if available (DT) */
+	if (mac)
+		ether_addr_copy(priv->dev->dev_addr, mac);
+
 	/* Get the TX/RX IRQ numbers */
 	for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) {
 		priv->txq[i]->irq_no = irq_of_parse_and_map(node, chan++);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 8c6b7c1..cf62ff4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2778,6 +2778,9 @@
  * @addr: iobase memory address
  * Description: this is the main probe function used to
  * call the alloc_etherdev, allocate the priv structure.
+ * Return:
+ * on success the new private structure is returned, otherwise the error
+ * pointer.
  */
 struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 				     struct plat_stmmacenet_data *plat_dat,
@@ -2789,7 +2792,7 @@
 
 	ndev = alloc_etherdev(sizeof(struct stmmac_priv));
 	if (!ndev)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	SET_NETDEV_DEV(ndev, device);
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index e61ee83..a39131f 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -610,7 +610,7 @@
 
 			/* Clear all mcast from ALE */
 			cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS <<
-						 priv->host_port);
+						 priv->host_port, -1);
 
 			/* Flood All Unicast Packets to Host port */
 			cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1);
@@ -634,6 +634,12 @@
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	int vid;
+
+	if (priv->data.dual_emac)
+		vid = priv->slaves[priv->emac_port].port_vlan;
+	else
+		vid = priv->data.default_vlan;
 
 	if (ndev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
@@ -649,7 +655,8 @@
 	cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI);
 
 	/* Clear all mcast from ALE */
-	cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port);
+	cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port,
+				 vid);
 
 	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
@@ -1627,16 +1634,24 @@
 				unsigned short vid)
 {
 	int ret;
-	int unreg_mcast_mask;
+	int unreg_mcast_mask = 0;
+	u32 port_mask;
 
-	if (priv->ndev->flags & IFF_ALLMULTI)
-		unreg_mcast_mask = ALE_ALL_PORTS;
-	else
-		unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
+	if (priv->data.dual_emac) {
+		port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST;
 
-	ret = cpsw_ale_add_vlan(priv->ale, vid,
-				ALE_ALL_PORTS << priv->host_port,
-				0, ALE_ALL_PORTS << priv->host_port,
+		if (priv->ndev->flags & IFF_ALLMULTI)
+			unreg_mcast_mask = port_mask;
+	} else {
+		port_mask = ALE_ALL_PORTS;
+
+		if (priv->ndev->flags & IFF_ALLMULTI)
+			unreg_mcast_mask = ALE_ALL_PORTS;
+		else
+			unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
+	}
+
+	ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask,
 				unreg_mcast_mask << priv->host_port);
 	if (ret != 0)
 		return ret;
@@ -1647,8 +1662,7 @@
 		goto clean_vid;
 
 	ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,