Merge branch 'v3.13/misc-cherry-picks' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/keystone into v3.13/master
diff --git a/Documentation/devicetree/bindings/arm/davinci/nand.txt b/Documentation/devicetree/bindings/arm/davinci/nand.txt
deleted file mode 100644
index 3545ea7..0000000
--- a/Documentation/devicetree/bindings/arm/davinci/nand.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-* Texas Instruments Davinci NAND
-
-This file provides information, what the device node for the
-davinci nand interface contain.
-
-Required properties:
-- compatible: "ti,davinci-nand";
-- reg : contain 2 offset/length values:
-        - offset and length for the access window
-        - offset and length for accessing the aemif control registers
-- ti,davinci-chipselect: Indicates on the davinci_nand driver which
-                         chipselect is used for accessing the nand.
-
-Recommended properties :
-- ti,davinci-mask-ale: mask for ale
-- ti,davinci-mask-cle: mask for cle
-- ti,davinci-mask-chipsel: mask for chipselect
-- ti,davinci-ecc-mode: ECC mode valid values for davinci driver:
-		- "none"
-		- "soft"
-		- "hw"
-- ti,davinci-ecc-bits: used ECC bits, currently supported 1 or 4.
-- ti,davinci-nand-buswidth: buswidth 8 or 16
-- ti,davinci-nand-use-bbt: use flash based bad block table support.
-
-nand device bindings may contain additional sub-nodes describing
-partitions of the address space. See partition.txt for more detail.
-
-Example(da850 EVM ):
-nand_cs3@62000000 {
-	compatible = "ti,davinci-nand";
-	reg = <0x62000000 0x807ff
-		0x68000000 0x8000>;
-	ti,davinci-chipselect = <1>;
-	ti,davinci-mask-ale = <0>;
-	ti,davinci-mask-cle = <0>;
-	ti,davinci-mask-chipsel = <0>;
-	ti,davinci-ecc-mode = "hw";
-	ti,davinci-ecc-bits = <4>;
-	ti,davinci-nand-use-bbt;
-
-	partition@180000 {
-		label = "ubifs";
-		reg = <0x180000 0x7e80000>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt
index 12bd726..225990f 100644
--- a/Documentation/devicetree/bindings/clock/keystone-pll.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt
@@ -17,13 +17,14 @@
 - reg - pll control0 and pll multipler registers
 - reg-names : control and multiplier. The multiplier is applicable only for
 		main pll clock
-- fixed-postdiv : fixed post divider value
+- fixed-postdiv : fixed post divider value. If absent, use clkod register bits
+		for postdiv
 
 Example:
 	mainpllclk: mainpllclk@2310110 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,main-pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclksys>;
 		reg = <0x02620350 4>, <0x02310110 4>;
 		reg-names = "control", "multiplier";
 		fixed-postdiv = <2>;
@@ -32,11 +33,10 @@
 	papllclk: papllclk@2620358 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclkpass>;
 		clock-output-names = "pa-pll-clk";
 		reg = <0x02620358 4>;
 		reg-names = "control";
-		fixed-postdiv = <6>;
 	};
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
new file mode 100644
index 0000000..4ce9862
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
@@ -0,0 +1,41 @@
+Davinci/Keystone GPIO controller bindings
+
+Required Properties:
+- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio"
+
+- reg: Physical base address of the controller and the size of memory mapped
+       registers.
+
+- gpio-controller : Marks the device node as a gpio controller.
+
+- interrupt-parent: phandle of the parent interrupt controller.
+
+- interrupts: Array of GPIO interrupt number. Only banked or unbanked IRQs are
+	      supported at a time.
+
+- ti,ngpio: The number of GPIO pins supported.
+
+- ti,davinci-gpio-unbanked: The number of GPIOs that have an individual interrupt
+		             line to processor.
+
+The GPIO controller also acts as an interrupt controller. It uses the default
+two cells specifier as described in Documentation/devicetree/bindings/
+interrupt-controller/interrupts.txt.
+
+Example:
+
+gpio: gpio@1e26000 {
+	compatible = "ti,dm6441-gpio";
+	gpio-controller;
+	reg = <0x226000 0x1000>;
+	interrupt-parent = <&intc>;
+	interrupts = <42 IRQ_TYPE_EDGE_BOTH 43 IRQ_TYPE_EDGE_BOTH
+		44 IRQ_TYPE_EDGE_BOTH 45 IRQ_TYPE_EDGE_BOTH
+		46 IRQ_TYPE_EDGE_BOTH 47 IRQ_TYPE_EDGE_BOTH
+		48 IRQ_TYPE_EDGE_BOTH 49 IRQ_TYPE_EDGE_BOTH
+		50 IRQ_TYPE_EDGE_BOTH>;
+	ti,ngpio = <144>;
+	ti,davinci-gpio-unbanked = <0>;
+	interrupt-controller;
+	#interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt b/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt
new file mode 100644
index 0000000..d1466dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt
@@ -0,0 +1,210 @@
+* Device tree bindings for Texas instruments AEMIF controller
+
+The Async External Memory Interface (EMIF16/AEMIF) controller is intended to
+provide a glue-less interface to a variety of asynchronous memory devices like
+ASRA M, NOR and NAND memory. A total of 256M bytes of any of these memories
+can be accessed at any given time via four chip selects with 64M byte access
+per chip select. Synchronous memories such as DDR1 SD RAM, SDR SDRAM
+and Mobile SDR are not supported.
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/sprueq7c/sprueq7c.pdf
+OMAP-L138 (DA850) - http://www.ti.com/lit/ug/spruh77a/spruh77a.pdf
+Kestone - http://www.ti.com/lit/ug/sprugz3a/sprugz3a.pdf
+
+Required properties:
+
+- compatible:		"ti,davinci-aemif"
+			"ti,keystone-aemif"
+			"ti,da850-aemif"
+
+- reg:			contains offset/length value for AEMIF control registers
+			space.
+
+- #address-cells:	Must be 2. The partition number has to be encoded in the
+			first address cell and it may accept values 0..N-1
+			(N - total number of partitions). It's recommended to
+			assign N-1 number for the control partition. The second
+			cell is the offset into the partition.
+
+- #size-cells:		Must be set to 1.
+
+- ranges:		Contains memory regions. There are two types of
+			ranges/partitions:
+			- CS-specific partition/range. If continuous, must be
+			set up to reflect the memory layout for 4 chipselects,
+			if not then additional range/partition can be added and
+			child device can select the proper one.
+			- control partition which is common for all CS
+			interfaces.
+
+- clocks:		the clock feeding the controller clock. Required only
+			if clock tree data present in device tree.
+			See clock-bindings.txt
+
+- clock-names:		clock name. It has to be "aemif". Required only if clock
+			tree data present in device tree, in another case don't
+			use it.
+			See clock-bindings.txt
+
+- clock-ranges:		Empty property indicating that child nodes can inherit
+			named clocks. Required only if clock tree data present
+			in device tree.
+			See clock-bindings.txt
+
+
+Child chip-select (cs) nodes contain the memory devices nodes connected to
+such as NOR (e.g. cfi-flash) and NAND (ti,davinci-nand, see davinci-nand.txt).
+There might be board specific devices like FPGAs.
+
+Required child cs node properties:
+
+- #address-cells:	Must be 2.
+
+- #size-cells:		Must be 1.
+
+- ranges:		Empty property indicating that child nodes can inherit
+			memory layout.
+
+- clock-ranges:		Empty property indicating that child nodes can inherit
+			named clocks. Required only if clock tree data present
+			in device tree.
+
+- ti,cs-chipselect:	number of chipselect. Indicates on the aemif driver
+			which chipselect is used for accessing the memory. For
+			compatibles "ti,davinci-aemif" and "ti,keystone-aemif"
+			it can be in range [0-3]. For compatible
+			"ti,da850-aemif" range is [2-5].
+
+Optional child cs node properties:
+
+- ti,bus-width:			width of the asynchronous device's data bus
+				8 or 16 if not preset 8
+
+- ti,cs-ss:		enable/disable select strobe mode
+				In select strobe mode chip select behaves as
+				the strobe and is active only during the strobe
+				period. If present then enable.
+
+- ti,cs-ew:		enable/disable extended wait mode
+				if set, the controller monitors the EMIFWAIT pin
+				mapped to that chip select to determine if the
+				device wants to extend the strobe period. If
+				present then enable.
+
+- ti,cs-ta:		minimum turn around time, ns
+				Time between the end of one asynchronous memory
+				access and the start of another asynchronous
+				memory access. This delay is not incurred
+				between a read followed by read or a write
+				followed by a write to same chip select.
+
+- ti,cs-rsetup:		read setup width, ns
+				Time between the beginning of a memory cycle
+				and the activation of read strobe.
+				Minimum value is 1 (0 treated as 1).
+
+- ti,cs-rstobe:		read strobe width, ns
+				Time between the activation and deactivation of
+				the read strobe.
+				Minimum value is 1 (0 treated as 1).
+
+- ti,cs-rhold:		read hold width, ns
+				Time between the deactivation of the read
+				strobe and the end of the cycle (which may be
+				either an address change or the deactivation of
+				the chip select signal.
+				Minimum value is 1 (0 treated as 1).
+
+- ti,cs-wsetup:		write setup width, ns
+				Time between the beginning of a memory cycle
+				and the activation of write strobe.
+				Minimum value is 1 (0 treated as 1).
+
+- ti,cs-wstrobe:	write strobe width, ns
+				Time between the activation and deactivation of
+				the write strobe.
+				Minimum value is 1 (0 treated as 1).
+
+- ti,cs-whold:		write hold width, ns
+				Time between the deactivation of the write
+				strobe and the end of the cycle (which may be
+				either an address change or the deactivation of
+				the chip select signal.
+				Minimum value is 1 (0 treated as 1).
+
+If any of the above parameters are absent, current parameter value will be taken
+from the corresponding HW reg.
+
+Example for aemif, davinci nand and nor flash chip select shown below.
+
+memory-controller@21000A00 {
+	compatible = "ti,davinci-aemif";
+	#address-cells = <2>;
+	#size-cells = <1>;
+	clocks = <&clkaemif 0>;
+	clock-names = "aemif";
+	clock-ranges;
+	reg = <0x2100A00 0x00000100>;
+	ranges = <0 0 0x70000000 0x10000000
+		  1 0 0x21000A00 0x0000100>;
+		  /*
+		   * Partition0: CS-specific memory range which is
+		   * implemented as continuous physical memory region
+		   * Partition1: control memory range
+		   */
+
+	nand:cs2 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		clock-ranges;
+		ranges;
+
+		ti,cs-chipselect = <2>;
+		/* all timings in nanoseconds */
+		ti,cs-ta = <0>;
+		ti,cs-rhold = <7>;
+		ti,cs-rstrobe = <42>;
+		ti,cs-rsetup = <14>;
+		ti,cs-whold = <7>;
+		ti,cs-wstrobe = <42>;
+		ti,cs-wsetup = <14>;
+
+		nand@0,0x8000000 {
+			compatible = "ti,davinci-nand";
+			reg = <0 0x8000000 0x4000000
+			       1 0x0000000 0x0000100>;
+			/*
+			 * Partition0, offset 0x8000000, size 0x4000000
+			 * Partition1, offset 0x0000000, size 0x0000100
+			 */
+
+			.. see davinci-nand.txt
+		};
+	};
+
+	nor:cs0 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		clock-ranges;
+		ranges;
+
+		ti,cs-chipselect = <0>;
+		/* all timings in nanoseconds */
+		ti,cs-ta = <0>;
+		ti,cs-rhold = <8>;
+		ti,cs-rstrobe = <40>;
+		ti,cs-rsetup = <14>;
+		ti,cs-whold = <7>;
+		ti,cs-wstrobe = <40>;
+		ti,cs-wsetup = <14>;
+		ti,cs-asize = <1>;
+
+		flash@0,0x0000000 {
+			compatible = "cfi-flash";
+			reg = <0 0x0000000 0x4000000>;
+
+			...
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/mtd/davinci-nand.txt b/Documentation/devicetree/bindings/mtd/davinci-nand.txt
new file mode 100644
index 0000000..cfb18ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/davinci-nand.txt
@@ -0,0 +1,94 @@
+Device tree bindings for Texas instruments Davinci/Keystone NAND controller
+
+This file provides information, what the device node for the davinci/keystone
+NAND interface contains.
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/sprueq7c/sprueq7c.pdf
+Kestone - http://www.ti.com/lit/ug/sprugz3a/sprugz3a.pdf
+
+Required properties:
+
+- compatible:			"ti,davinci-nand"
+				"ti,keystone-nand"
+
+- reg:				Contains 2 offset/length values:
+				- offset and length for the access window.
+				- offset and length for accessing the AEMIF
+				control registers.
+
+- ti,davinci-chipselect:	number of chipselect. Indicates on the
+				davinci_nand driver which chipselect is used
+				for accessing the nand.
+				Can be in the range [0-3].
+
+Recommended properties :
+
+- ti,davinci-mask-ale:		mask for ALE. Needed for executing address
+				phase. These offset will be added to the base
+				address for the chip select space the NAND Flash
+				device is connected to.
+				If not set equal to 0x08.
+
+- ti,davinci-mask-cle:		mask for CLE. Needed for executing command
+				phase. These offset will be added to the base
+				address for the chip select space the NAND Flash
+				device is connected to.
+				If not set equal to 0x10.
+
+- ti,davinci-mask-chipsel:	mask for chipselect address. Needed to mask
+				addresses for given chipselect.
+
+- nand-ecc-mode:		operation mode of the NAND ecc mode. ECC mode
+				valid values for davinci driver:
+				- "none"
+				- "soft"
+				- "hw"
+
+- ti,davinci-ecc-bits:		used ECC bits, currently supported 1 or 4.
+
+- nand-bus-width:		buswidth 8 or 16. If not present 8.
+
+- nand-on-flash-bbt:		use flash based bad block table support. OOB
+				identifier is saved in OOB area. If not present
+				false.
+
+Deprecated properties:
+
+- ti,davinci-ecc-mode:		operation mode of the NAND ecc mode. ECC mode
+				valid values for davinci driver:
+				- "none"
+				- "soft"
+				- "hw"
+
+- ti,davinci-nand-buswidth:	buswidth 8 or 16. If not present 8.
+
+- ti,davinci-nand-use-bbt:	use flash based bad block table support. OOB
+				identifier is saved in OOB area. If not present
+				false.
+
+Nand device bindings may contain additional sub-nodes describing partitions of
+the address space. See partition.txt for more detail. The NAND Flash timing
+values must be programmed in the chip select’s node of AEMIF
+memory-controller (see Documentation/devicetree/bindings/memory-controllers/
+davinci-aemif.txt).
+
+Example(da850 EVM ):
+
+nand_cs3@62000000 {
+	compatible = "ti,davinci-nand";
+	reg = <0x62000000 0x807ff
+	       0x68000000 0x8000>;
+	ti,davinci-chipselect = <1>;
+	ti,davinci-mask-ale = <0>;
+	ti,davinci-mask-cle = <0>;
+	ti,davinci-mask-chipsel = <0>;
+	nand-ecc-mode = "hw";
+	ti,davinci-ecc-bits = <4>;
+	nand-on-flash-bbt;
+
+	partition@180000 {
+		label = "ubifs";
+		reg = <0x180000 0x7e80000>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
new file mode 100644
index 0000000..5fbe361
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
@@ -0,0 +1,29 @@
+* Device tree bindings for Texas instruments Keystone timer
+
+This document provides bindings for the 64-bit timer in the KeyStone
+architecture devices. The timer can be configured as a general-purpose 64-bit
+timer, dual general-purpose 32-bit timers. When configured as dual 32-bit
+timers, each half can operate in conjunction (chain mode) or independently
+(unchained mode) of each other.
+
+It is global timer is a free running up-counter and can generate interrupt
+when the counter reaches preset counter values.
+
+Documentation:
+http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+
+Required properties:
+
+- compatible : should be "ti,keystone-timer".
+- reg : specifies base physical address and count of the registers.
+- interrupts : interrupt generated by the timer.
+- clocks : the clock feeding the timer clock.
+
+Example:
+
+timer@22f0000 {
+	compatible = "ti,keystone-timer";
+	reg = <0x022f0000 0x80>;
+	interrupts = <GIC_SPI 110 IRQ_TYPE_EDGE_RISING>;
+	clocks = <&clktimer15>;
+};
diff --git a/Documentation/devicetree/bindings/usb/keystone-phy.txt b/Documentation/devicetree/bindings/usb/keystone-phy.txt
new file mode 100644
index 0000000..f37b3a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/keystone-phy.txt
@@ -0,0 +1,20 @@
+TI Keystone USB PHY
+
+Required properties:
+ - compatible: should be "ti,keystone-usbphy".
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+   with 'reg' property.
+ - reg : Address and length of the usb phy control register set.
+
+The main purpose of this PHY driver is to enable the USB PHY reference clock
+gate on the Keystone SOC for both the USB2 and USB3 PHY. Otherwise it is just
+an NOP PHY driver.  Hence this node is referenced as both the usb2 and usb3
+phy node in the USB Glue layer driver node.
+
+usb_phy: usb_phy@2620738 {
+	compatible = "ti,keystone-usbphy";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg = <0x2620738 32>;
+	status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/usb/keystone-usb.txt b/Documentation/devicetree/bindings/usb/keystone-usb.txt
new file mode 100644
index 0000000..60527d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/keystone-usb.txt
@@ -0,0 +1,42 @@
+TI Keystone Soc USB Controller
+
+DWC3 GLUE
+
+Required properties:
+ - compatible: should be "ti,keystone-dwc3".
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+   with 'reg' property.
+ - reg : Address and length of the register set for the USB subsystem on
+   the SOC.
+ - interrupts : The irq number of this device that is used to interrupt the
+   MPU.
+ - ranges: allows valid 1:1 translation between child's address space and
+   parent's address space.
+ - clocks: Clock IDs array as required by the controller.
+ - clock-names: names of clocks correseponding to IDs in the clock property.
+
+Sub-nodes:
+The dwc3 core should be added as subnode to Keystone DWC3 glue.
+- dwc3 :
+   The binding details of dwc3 can be found in:
+   Documentation/devicetree/bindings/usb/dwc3.txt
+
+Example:
+	usb: usb@2680000 {
+		compatible = "ti,keystone-dwc3";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x2680000 0x10000>;
+		clocks = <&clkusb>;
+		clock-names = "usb";
+		interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+		ranges;
+		status = "disabled";
+
+		dwc3@2690000 {
+			compatible = "synopsys,dwc3";
+			reg = <0x2690000 0x70000>;
+			interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+			usb-phy = <&usb_phy>, <&usb_phy>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt
index 75558cc..e60b9a1 100644
--- a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt
@@ -1,12 +1,24 @@
-DaVinci Watchdog Timer (WDT) Controller
+Texas Instruments DaVinci/Keystone Watchdog Timer (WDT) Controller
 
 Required properties:
-- compatible : Should be "ti,davinci-wdt"
+- compatible : Should be "ti,davinci-wdt", "ti,keystone-wdt"
 - reg : Should contain WDT registers location and length
 
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+- clocks : the clock feeding the watchdog timer.
+	   Needed if platform uses clocks.
+	   See clock-bindings.txt
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/spruer5b/spruer5b.pdf
+Keystone - http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+
 Examples:
 
 wdt: wdt@2320000 {
 	compatible = "ti,davinci-wdt";
 	reg = <0x02320000 0x80>;
+	timeout-sec = <30>;
+	clocks = <&clkwdtimer0>;
 };
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c1f1a7e..51411db 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -62,6 +62,7 @@
 	select IRQ_FORCED_THREADING
 	select KTIME_SCALAR
 	select MODULES_USE_ELF_REL
+	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select PERF_USE_VMALLOC
diff --git a/arch/arm/boot/dts/k2hk-evm.dts b/arch/arm/boot/dts/k2hk-evm.dts
new file mode 100644
index 0000000..2554537
--- /dev/null
+++ b/arch/arm/boot/dts/k2hk-evm.dts
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2013 Texas Instruments, Inc.
+ *
+ * Keystone 2 Kepler/Hawking EVM device tree
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "keystone.dtsi"
+
+/ {
+	compatible =  "ti,keystone-evm";
+
+	soc {
+		clocks {
+			refclksys: refclksys {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <122880000>;
+				clock-output-names = "refclk-sys";
+			};
+
+			refclkpass: refclkpass {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <122880000>;
+				clock-output-names = "refclk-pass";
+			};
+
+			refclkarm: refclkarm {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <125000000>;
+				clock-output-names = "refclk-arm";
+			};
+
+			refclkddr3a: refclkddr3a {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <100000000>;
+				clock-output-names = "refclk-ddr3a";
+			};
+
+			refclkddr3b: refclkddr3b {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <100000000>;
+				clock-output-names = "refclk-ddr3b";
+			};
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+		debug1_1 {
+			label = "keystone:green:debug1";
+			gpios = <&gpio0 12 GPIO_ACTIVE_HIGH>; /* 12 */
+		};
+
+		debug1_2 {
+			label = "keystone:red:debug1";
+			gpios = <&gpio0 13 GPIO_ACTIVE_HIGH>; /* 13 */
+		};
+
+		debug2 {
+			label = "keystone:blue:debug2";
+			gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>; /* 14 */
+		};
+
+		debug3 {
+			label = "keystone:blue:debug3";
+			gpios = <&gpio0 15 GPIO_ACTIVE_HIGH>; /* 15 */
+		};
+	};
+};
+
+&usb_phy {
+	status = "okay";
+};
+
+&usb {
+	status = "okay";
+};
+
+&aemif {
+	cs0 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		clock-ranges;
+		ranges;
+
+		ti,cs-chipselect = <0>;
+		/* all timings in nanoseconds */
+		ti,cs-ta = <12>;
+		ti,cs-rhold = <6>;
+		ti,cs-rstrobe = <23>;
+		ti,cs-rsetup = <9>;
+		ti,cs-whold = <8>;
+		ti,cs-wstrobe = <23>;
+		ti,cs-wsetup = <8>;
+
+		nand@0,0 {
+			compatible = "ti,keystone-nand","ti,davinci-nand";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0 0 0x4000000
+			       1 0 0x0000100>;
+
+			ti,davinci-chipselect = <0>;
+			ti,davinci-mask-ale = <0x2000>;
+			ti,davinci-mask-cle = <0x4000>;
+			ti,davinci-mask-chipsel = <0>;
+			nand-ecc-mode = "hw";
+			ti,davinci-ecc-bits = <4>;
+			nand-on-flash-bbt;
+
+			partition@0 {
+				label = "u-boot";
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			partition@100000 {
+				label = "params";
+				reg = <0x100000 0x80000>;
+				read-only;
+			};
+
+			partition@180000 {
+				label = "ubifs";
+				reg = <0x180000 0x7E80000>;
+			};
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/keystone-clocks.dtsi b/arch/arm/boot/dts/keystone-clocks.dtsi
index d6713b1..2943cde 100644
--- a/arch/arm/boot/dts/keystone-clocks.dtsi
+++ b/arch/arm/boot/dts/keystone-clocks.dtsi
@@ -13,17 +13,10 @@
 	#size-cells = <1>;
 	ranges;
 
-	refclkmain: refclkmain {
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <122880000>;
-		clock-output-names = "refclk-main";
-	};
-
 	mainpllclk: mainpllclk@2310110 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,main-pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclksys>;
 		reg = <0x02620350 4>, <0x02310110 4>;
 		reg-names = "control", "multiplier";
 		fixed-postdiv = <2>;
@@ -32,47 +25,43 @@
 	papllclk: papllclk@2620358 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclkpass>;
 		clock-output-names = "pa-pll-clk";
 		reg = <0x02620358 4>;
 		reg-names = "control";
-		fixed-postdiv = <6>;
 	};
 
-	ddr3allclk: ddr3apllclk@2620360 {
+	ddr3apllclk: ddr3apllclk@2620360 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclkddr3a>;
 		clock-output-names = "ddr-3a-pll-clk";
 		reg = <0x02620360 4>;
 		reg-names = "control";
-		fixed-postdiv = <6>;
 	};
 
-	ddr3bllclk: ddr3bpllclk@2620368 {
+	ddr3bpllclk: ddr3bpllclk@2620368 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclkddr3b>;
 		clock-output-names = "ddr-3b-pll-clk";
 		reg = <0x02620368 4>;
 		reg-names = "control";
-		fixed-postdiv = <6>;
 	};
 
 	armpllclk: armpllclk@2620370 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-clock";
-		clocks = <&refclkmain>;
+		clocks = <&refclkarm>;
 		clock-output-names = "arm-pll-clk";
 		reg = <0x02620370 4>;
 		reg-names = "control";
-		fixed-postdiv = <6>;
 	};
 
 	mainmuxclk: mainmuxclk@2310108 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,pll-mux-clock";
-		clocks = <&mainpllclk>, <&refclkmain>;
+		clocks = <&mainpllclk>, <&refclksys>;
 		reg = <0x02310108 4>;
 		bit-shift = <23>;
 		bit-mask = <1>;
@@ -135,6 +124,15 @@
 		clock-output-names = "chipclk13";
 	};
 
+	paclk13: paclk13 {
+		#clock-cells = <0>;
+		compatible = "fixed-factor-clock";
+		clocks = <&papllclk>;
+		clock-div = <3>;
+		clock-mult = <1>;
+		clock-output-names = "paclk13";
+	};
+
 	chipclk14: chipclk14 {
 		#clock-cells = <0>;
 		compatible = "fixed-factor-clock";
@@ -329,16 +327,6 @@
 		domain-id = <6>;
 	};
 
-	clkmsmcsram: clkmsmcsram {
-		#clock-cells = <0>;
-		compatible = "ti,keystone,psc-clock";
-		clocks = <&chipclk1>;
-		clock-output-names = "msmcsram";
-		reg = <0x02350038 0xb00>, <0x0235001c 0x400>;
-		reg-names = "control", "domain";
-		domain-id = <7>;
-	};
-
 	clkgem0: clkgem0 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,psc-clock";
@@ -614,7 +602,7 @@
 		compatible = "ti,keystone,psc-clock";
 		clocks = <&chipclk13>;
 		clock-output-names = "vcp-3";
-		reg = <0x0235000a8 0xb00>, <0x02350060 0x400>;
+		reg = <0x023500a8 0xb00>, <0x02350060 0x400>;
 		reg-names = "control", "domain";
 		domain-id = <24>;
 	};
@@ -739,6 +727,16 @@
 		domain-id = <0>;
 	};
 
+	clktimer15: clktimer15 {
+		#clock-cells = <0>;
+		compatible = "ti,keystone,psc-clock";
+		clocks = <&clkmodrst0>;
+		clock-output-names = "timer15";
+		reg = <0x02350000 0xb00>, <0x02350000 0x400>;
+		reg-names = "control", "domain";
+		domain-id = <0>;
+	};
+
 	clkuart0: clkuart0 {
 		#clock-cells = <0>;
 		compatible = "ti,keystone,psc-clock";
diff --git a/arch/arm/boot/dts/keystone.dts b/arch/arm/boot/dts/keystone.dts
deleted file mode 100644
index 100bdf5..0000000
--- a/arch/arm/boot/dts/keystone.dts
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright 2013 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/dts-v1/;
-#include <dt-bindings/interrupt-controller/arm-gic.h>
-
-#include "skeleton.dtsi"
-
-/ {
-	model = "Texas Instruments Keystone 2 SoC";
-	compatible =  "ti,keystone-evm";
-	#address-cells = <2>;
-	#size-cells = <2>;
-	interrupt-parent = <&gic>;
-
-	aliases {
-		serial0	= &uart0;
-	};
-
-	memory {
-		reg = <0x00000000 0x80000000 0x00000000 0x40000000>;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		interrupt-parent = <&gic>;
-
-		cpu@0 {
-			compatible = "arm,cortex-a15";
-			device_type = "cpu";
-			reg = <0>;
-		};
-
-		cpu@1 {
-			compatible = "arm,cortex-a15";
-			device_type = "cpu";
-			reg = <1>;
-		};
-
-		cpu@2 {
-			compatible = "arm,cortex-a15";
-			device_type = "cpu";
-			reg = <2>;
-		};
-
-		cpu@3 {
-			compatible = "arm,cortex-a15";
-			device_type = "cpu";
-			reg = <3>;
-		};
-	};
-
-	gic: interrupt-controller {
-		compatible = "arm,cortex-a15-gic";
-		#interrupt-cells = <3>;
-		#size-cells = <0>;
-		#address-cells = <1>;
-		interrupt-controller;
-		reg = <0x0 0x02561000 0x0 0x1000>,
-		      <0x0 0x02562000 0x0 0x2000>;
-	};
-
-	timer {
-		compatible = "arm,armv7-timer";
-		interrupts =
-			<GIC_PPI 13
-				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-			<GIC_PPI 14
-				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-			<GIC_PPI 11
-				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
-			<GIC_PPI 10
-				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
-	};
-
-	pmu {
-		compatible = "arm,cortex-a15-pmu";
-		interrupts = <GIC_SPI 20 IRQ_TYPE_EDGE_RISING>,
-			     <GIC_SPI 21 IRQ_TYPE_EDGE_RISING>,
-			     <GIC_SPI 22 IRQ_TYPE_EDGE_RISING>,
-			     <GIC_SPI 23 IRQ_TYPE_EDGE_RISING>;
-	};
-
-	soc {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		compatible = "ti,keystone","simple-bus";
-		interrupt-parent = <&gic>;
-		ranges = <0x0 0x0 0x0 0xc0000000>;
-
-		rstctrl: reset-controller {
-			compatible = "ti,keystone-reset";
-			reg = <0x023100e8 4>;	/* pll reset control reg */
-		};
-
-		/include/ "keystone-clocks.dtsi"
-
-		uart0: serial@02530c00 {
-			compatible = "ns16550a";
-			current-speed = <115200>;
-			reg-shift = <2>;
-			reg-io-width = <4>;
-			reg = <0x02530c00 0x100>;
-			clocks	= <&clkuart0>;
-			interrupts = <GIC_SPI 277 IRQ_TYPE_EDGE_RISING>;
-		};
-
-		uart1:	serial@02531000 {
-			compatible = "ns16550a";
-			current-speed = <115200>;
-			reg-shift = <2>;
-			reg-io-width = <4>;
-			reg = <0x02531000 0x100>;
-			clocks	= <&clkuart1>;
-			interrupts = <GIC_SPI 280 IRQ_TYPE_EDGE_RISING>;
-		};
-
-		i2c0: i2c@2530000 {
-			compatible = "ti,davinci-i2c";
-			reg = <0x02530000 0x400>;
-			clock-frequency = <100000>;
-			clocks = <&clki2c>;
-			interrupts = <GIC_SPI 283 IRQ_TYPE_EDGE_RISING>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			dtt@50 {
-				compatible = "at,24c1024";
-				reg = <0x50>;
-			};
-		};
-
-		i2c1: i2c@2530400 {
-			compatible = "ti,davinci-i2c";
-			reg = <0x02530400 0x400>;
-			clock-frequency = <100000>;
-			clocks = <&clki2c>;
-			interrupts = <GIC_SPI 286 IRQ_TYPE_EDGE_RISING>;
-		};
-
-		i2c2: i2c@2530800 {
-			compatible = "ti,davinci-i2c";
-			reg = <0x02530800 0x400>;
-			clock-frequency = <100000>;
-			clocks = <&clki2c>;
-			interrupts = <GIC_SPI 289 IRQ_TYPE_EDGE_RISING>;
-		};
-
-		spi0: spi@21000400 {
-			compatible = "ti,dm6441-spi";
-			reg = <0x21000400 0x200>;
-			num-cs = <4>;
-			ti,davinci-spi-intr-line = <0>;
-			interrupts = <GIC_SPI 292 IRQ_TYPE_EDGE_RISING>;
-			clocks = <&clkspi>;
-		};
-
-		spi1: spi@21000600 {
-			compatible = "ti,dm6441-spi";
-			reg = <0x21000600 0x200>;
-			num-cs = <4>;
-			ti,davinci-spi-intr-line = <0>;
-			interrupts = <GIC_SPI 296 IRQ_TYPE_EDGE_RISING>;
-			clocks = <&clkspi>;
-		};
-
-		spi2: spi@21000800 {
-			compatible = "ti,dm6441-spi";
-			reg = <0x21000800 0x200>;
-			num-cs = <4>;
-			ti,davinci-spi-intr-line = <0>;
-			interrupts = <GIC_SPI 300 IRQ_TYPE_EDGE_RISING>;
-			clocks = <&clkspi>;
-		};
-	};
-};
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
new file mode 100644
index 0000000..bc8c8e6
--- /dev/null
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2013 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/gpio/gpio.h>
+
+#include "skeleton.dtsi"
+
+/ {
+	model = "Texas Instruments Keystone 2 SoC";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&gic>;
+
+	aliases {
+		serial0	= &uart0;
+	};
+
+	memory {
+		reg = <0x00000000 0x80000000 0x00000000 0x40000000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		interrupt-parent = <&gic>;
+
+		cpu@0 {
+			compatible = "arm,cortex-a15";
+			device_type = "cpu";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			compatible = "arm,cortex-a15";
+			device_type = "cpu";
+			reg = <1>;
+		};
+
+		cpu@2 {
+			compatible = "arm,cortex-a15";
+			device_type = "cpu";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			compatible = "arm,cortex-a15";
+			device_type = "cpu";
+			reg = <3>;
+		};
+	};
+
+	gic: interrupt-controller {
+		compatible = "arm,cortex-a15-gic";
+		#interrupt-cells = <3>;
+		#size-cells = <0>;
+		#address-cells = <1>;
+		interrupt-controller;
+		reg = <0x0 0x02561000 0x0 0x1000>,
+		      <0x0 0x02562000 0x0 0x2000>,
+		      <0x0 0x02564000 0x0 0x1000>,
+		      <0x0 0x02566000 0x0 0x2000>;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) |
+				IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts =
+			<GIC_PPI 13
+				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			<GIC_PPI 14
+				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			<GIC_PPI 11
+				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			<GIC_PPI 10
+				(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	pmu {
+		compatible = "arm,cortex-a15-pmu";
+		interrupts = <GIC_SPI 20 IRQ_TYPE_EDGE_RISING>,
+			     <GIC_SPI 21 IRQ_TYPE_EDGE_RISING>,
+			     <GIC_SPI 22 IRQ_TYPE_EDGE_RISING>,
+			     <GIC_SPI 23 IRQ_TYPE_EDGE_RISING>;
+	};
+
+	soc {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "ti,keystone","simple-bus";
+		interrupt-parent = <&gic>;
+		ranges = <0x0 0x0 0x0 0xc0000000>;
+
+		rstctrl: reset-controller {
+			compatible = "ti,keystone-reset";
+			reg = <0x023100e8 4>;	/* pll reset control reg */
+		};
+
+		/include/ "keystone-clocks.dtsi"
+
+		uart0: serial@02530c00 {
+			compatible = "ns16550a";
+			current-speed = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			reg = <0x02530c00 0x100>;
+			clocks	= <&clkuart0>;
+			interrupts = <GIC_SPI 277 IRQ_TYPE_EDGE_RISING>;
+		};
+
+		uart1:	serial@02531000 {
+			compatible = "ns16550a";
+			current-speed = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			reg = <0x02531000 0x100>;
+			clocks	= <&clkuart1>;
+			interrupts = <GIC_SPI 280 IRQ_TYPE_EDGE_RISING>;
+		};
+
+		i2c0: i2c@2530000 {
+			compatible = "ti,davinci-i2c";
+			reg = <0x02530000 0x400>;
+			clock-frequency = <100000>;
+			clocks = <&clki2c>;
+			interrupts = <GIC_SPI 283 IRQ_TYPE_EDGE_RISING>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			dtt@50 {
+				compatible = "at,24c1024";
+				reg = <0x50>;
+			};
+		};
+
+		i2c1: i2c@2530400 {
+			compatible = "ti,davinci-i2c";
+			reg = <0x02530400 0x400>;
+			clock-frequency = <100000>;
+			clocks = <&clki2c>;
+			interrupts = <GIC_SPI 286 IRQ_TYPE_EDGE_RISING>;
+		};
+
+		i2c2: i2c@2530800 {
+			compatible = "ti,davinci-i2c";
+			reg = <0x02530800 0x400>;
+			clock-frequency = <100000>;
+			clocks = <&clki2c>;
+			interrupts = <GIC_SPI 289 IRQ_TYPE_EDGE_RISING>;
+		};
+
+		spi0: spi@21000400 {
+			compatible = "ti,dm6441-spi";
+			reg = <0x21000400 0x200>;
+			num-cs = <4>;
+			ti,davinci-spi-intr-line = <0>;
+			interrupts = <GIC_SPI 292 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkspi>;
+		};
+
+		spi1: spi@21000600 {
+			compatible = "ti,dm6441-spi";
+			reg = <0x21000600 0x200>;
+			num-cs = <4>;
+			ti,davinci-spi-intr-line = <0>;
+			interrupts = <GIC_SPI 296 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkspi>;
+		};
+
+		spi2: spi@21000800 {
+			compatible = "ti,dm6441-spi";
+			reg = <0x21000800 0x200>;
+			num-cs = <4>;
+			ti,davinci-spi-intr-line = <0>;
+			interrupts = <GIC_SPI 300 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkspi>;
+		};
+
+		usb_phy: usb_phy@2620738 {
+			compatible = "ti,keystone-usbphy";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x2620738 32>;
+			status = "disabled";
+		};
+
+		usb: usb@2680000 {
+			compatible = "ti,keystone-dwc3";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x2680000 0x10000>;
+			clocks = <&clkusb>;
+			clock-names = "usb";
+			interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+			ranges;
+			status = "disabled";
+
+			dwc3@2690000 {
+				compatible = "synopsys,dwc3";
+				reg = <0x2690000 0x70000>;
+				interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+				usb-phy = <&usb_phy>, <&usb_phy>;
+			};
+		};
+
+		wdt: wdt@022f0080 {
+			compatible = "ti,keystone-wdt","ti,davinci-wdt";
+			reg = <0x022f0080 0x80>;
+			clocks = <&clkwdtimer0>;
+		};
+
+		clock_event: timer@22f0000 {
+			compatible = "ti,keystone-timer";
+			reg = <0x022f0000 0x80>;
+			interrupts = <GIC_SPI 110 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clktimer15>;
+		};
+
+		aemif: aemif@21000A00 {
+			compatible = "ti,keystone-aemif", "ti,davinci-aemif";
+			#address-cells = <2>;
+			#size-cells = <1>;
+			clocks = <&clkaemif>;
+			clock-names = "aemif";
+			clock-ranges;
+
+			reg = <0x21000A00 0x00000100>;
+			ranges = <0 0 0x30000000 0x10000000
+				  1 0 0x21000A00 0x00000100>;
+		};
+
+		gpio0: gpio@260bf00 {
+			compatible = "ti,keystone-gpio";
+			reg = <0x0260bf00 0x100>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			/* HW Interrupts mapped to GPIO pins */
+			interrupts = <GIC_SPI 120 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 121 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 122 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 123 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 124 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 125 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 126 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 127 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 128 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 129 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 130 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 131 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 132 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 133 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 134 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 135 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 136 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 137 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 138 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 139 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 140 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 141 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 142 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 143 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 144 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 145 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 146 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 147 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 148 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 149 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 150 IRQ_TYPE_EDGE_RISING>,
+					<GIC_SPI 151 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&clkgpio>;
+			clock-names = "gpio";
+			ti,ngpio = <32>;
+			ti,davinci-gpio-unbanked = <32>;
+		};
+	};
+};
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 9943e5d..ec9a41d 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -111,10 +111,13 @@
 CONFIG_MTD_PLATRAM=y
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_DAVINCI=y
 CONFIG_MTD_UBI=y
 CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_EEPROM_AT24=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
@@ -129,10 +132,28 @@
 CONFIG_SPI_SPIDEV=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
-# CONFIG_USB_SUPPORT is not set
+CONFIG_WATCHDOG_CORE=y
+CONFIG_DAVINCI_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_DEBUG=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_DWC3=y
+CONFIG_USB_DWC3_DEBUG=y
+CONFIG_USB_DWC3_VERBOSE=y
+CONFIG_KEYSTONE_USB_PHY=y
 CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_COMMON_CLK_DEBUG=y
 CONFIG_MEMORY=y
+CONFIG_TI_AEMIF=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_JFFS2_FS_WBUF_VERIFY=y
@@ -144,6 +165,8 @@
 CONFIG_NFSD=y
 CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_DEBUG_INFO=y
@@ -158,3 +181,14 @@
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_DAVINCI=y
+CONFIG_LEDS_CLASS=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_ONESHOT=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+CONFIG_LEDS_TRIGGER_GPIO=y
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index e691ec9..b2e298a 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -254,25 +254,59 @@
 }
 
 /*
- * On ARMv5 and above those functions can be implemented around
- * the clz instruction for much better code efficiency.
+ * On ARMv5 and above those functions can be implemented around the
+ * clz instruction for much better code efficiency.  __clz returns
+ * the number of leading zeros, zero input will return 32, and
+ * 0x80000000 will return 0.
  */
-
-static inline int fls(int x)
+static inline unsigned int __clz(unsigned int x)
 {
-	int ret;
-
-	if (__builtin_constant_p(x))
-	       return constant_fls(x);
+	unsigned int ret;
 
 	asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
-       	ret = 32 - ret;
+
 	return ret;
 }
 
-#define __fls(x) (fls(x) - 1)
-#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
-#define __ffs(x) (ffs(x) - 1)
+/*
+ * fls() returns zero if the input is zero, otherwise returns the bit
+ * position of the last set bit, where the LSB is 1 and MSB is 32.
+ */
+static inline int fls(int x)
+{
+	if (__builtin_constant_p(x))
+	       return constant_fls(x);
+
+	return 32 - __clz(x);
+}
+
+/*
+ * __fls() returns the bit position of the last bit set, where the
+ * LSB is 0 and MSB is 31.  Zero input is undefined.
+ */
+static inline unsigned long __fls(unsigned long x)
+{
+	return fls(x) - 1;
+}
+
+/*
+ * ffs() returns zero if the input was zero, otherwise returns the bit
+ * position of the first set bit, where the LSB is 1 and MSB is 32.
+ */
+static inline int ffs(int x)
+{
+	return fls(x & -x);
+}
+
+/*
+ * __ffs() returns the bit position of the first bit set, where the
+ * LSB is 0 and MSB is 31.  Zero input is undefined.
+ */
+static inline unsigned long __ffs(unsigned long x)
+{
+	return ffs(x) - 1;
+}
+
 #define ffz(x) __ffs( ~(x) )
 
 #endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index e701a4d..84acc46 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -55,28 +55,16 @@
  * functions used internally by the DMA-mapping API to provide DMA
  * addresses. They must not be used by drivers.
  */
-#ifndef __arch_pfn_to_dma
-static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
-{
-	return (dma_addr_t)__pfn_to_bus(pfn);
-}
 
-static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
-{
-	return __bus_to_pfn(addr);
-}
+extern dma_addr_t (*__arch_pfn_to_dma)(struct device *dev, unsigned long pfn);
+extern unsigned long (*__arch_dma_to_pfn)(struct device *dev, dma_addr_t addr);
+extern void* (*__arch_dma_to_virt)(struct device *dev, dma_addr_t addr);
+extern dma_addr_t (*__arch_virt_to_dma)(struct device *dev, void *addr);
 
-static inline void *dma_to_virt(struct device *dev, dma_addr_t addr)
-{
-	return (void *)__bus_to_virt((unsigned long)addr);
-}
 
-static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
-{
-	return (dma_addr_t)__virt_to_bus((unsigned long)(addr));
-}
+/* Keep __arch_pfn_to_dma defined as it's used by some drivers (V4L2)*/
+#define __arch_pfn_to_dma __arch_pfn_to_dma
 
-#else
 static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn)
 {
 	return __arch_pfn_to_dma(dev, pfn);
@@ -96,7 +84,6 @@
 {
 	return __arch_virt_to_dma(dev, addr);
 }
-#endif
 
 /* The ARM override for dma_max_pfn() */
 static inline unsigned long dma_max_pfn(struct device *dev)
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index 58b8c6a..9908443 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -8,8 +8,8 @@
 #define MAX_DMA_ADDRESS	0xffffffffUL
 #else
 #define MAX_DMA_ADDRESS	({ \
-	extern unsigned long arm_dma_zone_size; \
-	arm_dma_zone_size ? \
+	extern phys_addr_t arm_dma_zone_size; \
+	arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
 		(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
 #endif
 
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 34d5fd5..f751714 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -33,7 +33,7 @@
 
 void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return alloc_bootmem_align(size, align);
+	return memblock_virt_alloc(size, align);
 }
 
 void __init arm_dt_memblock_reserve(void)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 987a7f5..60f5111 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -717,7 +717,7 @@
 	kernel_data.end     = virt_to_phys(_end - 1);
 
 	for_each_memblock(memory, region) {
-		res = alloc_bootmem_low(sizeof(*res));
+		res = memblock_virt_alloc(sizeof(*res), 0);
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
@@ -817,7 +817,7 @@
 	if (ret)
 		return;
 
-	ret = reserve_bootmem(crash_base, crash_size, BOOTMEM_EXCLUSIVE);
+	ret = memblock_reserve(crash_base, crash_size);
 	if (ret < 0) {
 		printk(KERN_WARNING "crashkernel reservation failed - "
 		       "memory is in use (0x%lx)\n", (unsigned long)crash_base);
diff --git a/arch/arm/mach-davinci/aemif.c b/arch/arm/mach-davinci/aemif.c
index f091a90..ff8b7e7 100644
--- a/arch/arm/mach-davinci/aemif.c
+++ b/arch/arm/mach-davinci/aemif.c
@@ -16,6 +16,7 @@
 #include <linux/time.h>
 
 #include <linux/platform_data/mtd-davinci-aemif.h>
+#include <linux/platform_data/mtd-davinci.h>
 
 /* Timing value configuration */
 
@@ -43,6 +44,17 @@
 				WSTROBE(WSTROBE_MAX) | \
 				WSETUP(WSETUP_MAX))
 
+static inline unsigned int davinci_aemif_readl(void __iomem *base, int offset)
+{
+	return readl_relaxed(base + offset);
+}
+
+static inline void davinci_aemif_writel(void __iomem *base,
+					int offset, unsigned long value)
+{
+	writel_relaxed(value, base + offset);
+}
+
 /*
  * aemif_calc_rate - calculate timing data.
  * @wanted: The cycle time needed in nanoseconds.
@@ -76,6 +88,7 @@
  * @t: timing values to be progammed
  * @base: The virtual base address of the AEMIF interface
  * @cs: chip-select to program the timing values for
+ * @clkrate: the AEMIF clkrate
  *
  * This function programs the given timing values (in real clock) into the
  * AEMIF registers taking the AEMIF clock into account.
@@ -86,24 +99,17 @@
  *
  * Returns 0 on success, else negative errno.
  */
-int davinci_aemif_setup_timing(struct davinci_aemif_timing *t,
-					void __iomem *base, unsigned cs)
+static int davinci_aemif_setup_timing(struct davinci_aemif_timing *t,
+					void __iomem *base, unsigned cs,
+					unsigned long clkrate)
 {
 	unsigned set, val;
 	int ta, rhold, rstrobe, rsetup, whold, wstrobe, wsetup;
 	unsigned offset = A1CR_OFFSET + cs * 4;
-	struct clk *aemif_clk;
-	unsigned long clkrate;
 
 	if (!t)
 		return 0;	/* Nothing to do */
 
-	aemif_clk = clk_get(NULL, "aemif");
-	if (IS_ERR(aemif_clk))
-		return PTR_ERR(aemif_clk);
-
-	clkrate = clk_get_rate(aemif_clk);
-
 	clkrate /= 1000;	/* turn clock into kHz for ease of use */
 
 	ta	= aemif_calc_rate(t->ta, clkrate, TA_MAX);
@@ -130,4 +136,83 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(davinci_aemif_setup_timing);
+
+/**
+ * davinci_aemif_setup - setup AEMIF interface by davinci_nand_pdata
+ * @pdev - link to platform device to setup settings for
+ *
+ * This function does not use any locking while programming the AEMIF
+ * because it is expected that there is only one user of a given
+ * chip-select.
+ *
+ * Returns 0 on success, else negative errno.
+ */
+int davinci_aemif_setup(struct platform_device *pdev)
+{
+	struct davinci_nand_pdata *pdata = dev_get_platdata(&pdev->dev);
+	uint32_t val;
+	unsigned long clkrate;
+	struct resource	*res;
+	void __iomem *base;
+	struct clk *clk;
+	int ret = 0;
+
+	clk = clk_get(&pdev->dev, "aemif");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret < 0) {
+		dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
+			ret);
+		goto err_put;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		dev_err(&pdev->dev, "cannot get IORESOURCE_MEM\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	base = ioremap(res->start, resource_size(res));
+	if (!base) {
+		dev_err(&pdev->dev, "ioremap failed for resource %pR\n", res);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * Setup Async configuration register in case we did not boot
+	 * from NAND and so bootloader did not bother to set it up.
+	 */
+	val = davinci_aemif_readl(base, A1CR_OFFSET + pdev->id * 4);
+	/*
+	 * Extended Wait is not valid and Select Strobe mode is not
+	 * used
+	 */
+	val &= ~(ACR_ASIZE_MASK | ACR_EW_MASK | ACR_SS_MASK);
+	if (pdata->options & NAND_BUSWIDTH_16)
+		val |= 0x1;
+
+	davinci_aemif_writel(base, A1CR_OFFSET + pdev->id * 4, val);
+
+	clkrate = clk_get_rate(clk);
+
+	if (pdata->timing)
+		ret = davinci_aemif_setup_timing(pdata->timing, base, pdev->id,
+						 clkrate);
+
+	if (ret < 0)
+		dev_dbg(&pdev->dev, "NAND timing values setup fail\n");
+
+	iounmap(base);
+err:
+	clk_disable_unprepare(clk);
+err_put:
+	clk_put(clk);
+	return ret;
+}
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index d1f45af..5623131 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -419,6 +419,9 @@
 	if (ret)
 		pr_warning("da830_evm_init: NAND device not registered.\n");
 
+	if (davinci_aemif_setup(&da830_evm_nand_device))
+		pr_warn("%s: Cannot configure AEMIF.\n", __func__);
+
 	gpio_direction_output(mux_mode, 1);
 }
 #else
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index e0af0ec..234c5bb 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -358,6 +358,9 @@
 
 		platform_add_devices(da850_evm_devices,
 					ARRAY_SIZE(da850_evm_devices));
+
+		if (davinci_aemif_setup(&da850_evm_nandflash_device))
+			pr_warn("%s: Cannot configure AEMIF.\n", __func__);
 	}
 }
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index 987605b7..5602957 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -778,6 +778,11 @@
 		/* only one device will be jumpered and detected */
 		if (HAS_NAND) {
 			platform_device_register(&davinci_evm_nandflash_device);
+
+			if (davinci_aemif_setup(&davinci_evm_nandflash_device))
+				pr_warn("%s: Cannot configure AEMIF.\n",
+					__func__);
+
 			evm_leds[7].default_trigger = "nand-disk";
 			if (HAS_NOR)
 				pr_warning("WARNING: both NAND and NOR flash "
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 13d0801..ae129bc 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -805,6 +805,9 @@
 
 	platform_device_register(&davinci_nand_device);
 
+	if (davinci_aemif_setup(&davinci_nand_device))
+		pr_warn("%s: Cannot configure AEMIF.\n", __func__);
+
 	dm646x_init_edma(dm646x_edma_rsv);
 
 	if (HAS_ATA)
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 7aa105b..96fc00a 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -27,6 +27,7 @@
 #include <mach/cp_intc.h>
 #include <mach/da8xx.h>
 #include <linux/platform_data/mtd-davinci.h>
+#include <linux/platform_data/mtd-davinci-aemif.h>
 #include <mach/mux.h>
 #include <linux/platform_data/spi-davinci.h>
 
@@ -432,6 +433,9 @@
 {
 	platform_add_devices(mityomapl138_devices,
 				 ARRAY_SIZE(mityomapl138_devices));
+
+	if (davinci_aemif_setup(&mityomapl138_nandflash_device))
+		pr_warn("%s: Cannot configure AEMIF.\n", __func__);
 }
 
 static const short mityomap_mii_pins[] = {
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 0813b51..82c6013 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -385,7 +385,7 @@
 	CLK(NULL,		"pll0_sysclk7",	&pll0_sysclk7),
 	CLK("i2c_davinci.1",	NULL,		&i2c0_clk),
 	CLK(NULL,		"timer0",	&timerp64_0_clk),
-	CLK("watchdog",		NULL,		&timerp64_1_clk),
+	CLK("davinci-wdt",	NULL,		&timerp64_1_clk),
 	CLK(NULL,		"arm_rom",	&arm_rom_clk),
 	CLK(NULL,		"scr0_ss",	&scr0_ss_clk),
 	CLK(NULL,		"scr1_ss",	&scr1_ss_clk),
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 352984e..ccb2f58 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -443,7 +443,7 @@
 	CLK(NULL,		"pll1_sysclk3",	&pll1_sysclk3),
 	CLK("i2c_davinci.1",	NULL,		&i2c0_clk),
 	CLK(NULL,		"timer0",	&timerp64_0_clk),
-	CLK("watchdog",		NULL,		&timerp64_1_clk),
+	CLK("davinci-wdt",	NULL,		&timerp64_1_clk),
 	CLK(NULL,		"arm_rom",	&arm_rom_clk),
 	CLK(NULL,		"tpcc0",	&tpcc0_clk),
 	CLK(NULL,		"tptc0",	&tptc0_clk),
diff --git a/arch/arm/mach-davinci/da8xx-dt.c b/arch/arm/mach-davinci/da8xx-dt.c
index d2bc574..ed19287 100644
--- a/arch/arm/mach-davinci/da8xx-dt.c
+++ b/arch/arm/mach-davinci/da8xx-dt.c
@@ -32,7 +32,7 @@
 
 static struct of_dev_auxdata da850_auxdata_lookup[] __initdata = {
 	OF_DEV_AUXDATA("ti,davinci-i2c", 0x01c22000, "i2c_davinci.1", NULL),
-	OF_DEV_AUXDATA("ti,davinci-wdt", 0x01c21000, "watchdog", NULL),
+	OF_DEV_AUXDATA("ti,davinci-wdt", 0x01c21000, "davinci-wdt", NULL),
 	OF_DEV_AUXDATA("ti,da830-mmc", 0x01c40000, "da830-mmc.0", NULL),
 	OF_DEV_AUXDATA("ti,da850-ehrpwm", 0x01f00000, "ehrpwm", NULL),
 	OF_DEV_AUXDATA("ti,da850-ehrpwm", 0x01f02000, "ehrpwm", NULL),
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 78829c5..0486cdf 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -389,7 +389,7 @@
 };
 
 static struct platform_device da8xx_wdt_device = {
-	.name		= "watchdog",
+	.name		= "davinci-wdt",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(da8xx_watchdog_resources),
 	.resource	= da8xx_watchdog_resources,
@@ -399,7 +399,7 @@
 {
 	struct device *dev;
 
-	dev = bus_find_device_by_name(&platform_bus_type, NULL, "watchdog");
+	dev = bus_find_device_by_name(&platform_bus_type, NULL, "davinci-wdt");
 	if (!dev) {
 		pr_err("%s: failed to find watchdog device\n", __func__);
 		return;
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 3996e98..5cf9a02 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -302,7 +302,7 @@
 };
 
 struct platform_device davinci_wdt_device = {
-	.name		= "watchdog",
+	.name		= "davinci-wdt",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(wdt_resources),
 	.resource	= wdt_resources,
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 6117fc6..2b085b0 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -375,7 +375,7 @@
 	CLK(NULL, "pwm3", &pwm3_clk),
 	CLK(NULL, "timer0", &timer0_clk),
 	CLK(NULL, "timer1", &timer1_clk),
-	CLK("watchdog", NULL, &timer2_clk),
+	CLK("davinci-wdt", NULL, &timer2_clk),
 	CLK(NULL, "timer3", &timer3_clk),
 	CLK(NULL, "rto", &rto_clk),
 	CLK(NULL, "usb", &usb_clk),
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index d7c6f85..77f64fd 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -473,7 +473,7 @@
 	CLK(NULL, "pwm3", &pwm3_clk),
 	CLK(NULL, "timer0", &timer0_clk),
 	CLK(NULL, "timer1", &timer1_clk),
-	CLK("watchdog", NULL, &timer2_clk),
+	CLK("davinci-wdt", NULL, &timer2_clk),
 	CLK(NULL, "timer3", &timer3_clk),
 	CLK(NULL, "usb", &usb_clk),
 	CLK("davinci_emac.1", NULL, &emac_clk),
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 3ce4799..7d69845 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -322,7 +322,7 @@
 	CLK(NULL, "pwm2", &pwm2_clk),
 	CLK(NULL, "timer0", &timer0_clk),
 	CLK(NULL, "timer1", &timer1_clk),
-	CLK("watchdog", NULL, &timer2_clk),
+	CLK("davinci-wdt", NULL, &timer2_clk),
 	CLK(NULL, NULL, NULL),
 };
 
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 0e81fea..cad08e6 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -356,7 +356,7 @@
 	CLK(NULL, "pwm1", &pwm1_clk),
 	CLK(NULL, "timer0", &timer0_clk),
 	CLK(NULL, "timer1", &timer1_clk),
-	CLK("watchdog", NULL, &timer2_clk),
+	CLK("davinci-wdt", NULL, &timer2_clk),
 	CLK("palm_bk3710", NULL, &ide_clk),
 	CLK(NULL, "vpif0", &vpif0_clk),
 	CLK(NULL, "vpif1", &vpif1_clk),
diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h
index 7c032d0..eab7930 100644
--- a/arch/arm/mach-iop13xx/include/mach/memory.h
+++ b/arch/arm/mach-iop13xx/include/mach/memory.h
@@ -8,65 +8,4 @@
  */
 #define PLAT_PHYS_OFFSET	UL(0x00000000)
 
-#ifndef __ASSEMBLY__
-
-#if defined(CONFIG_ARCH_IOP13XX)
-#define IOP13XX_PMMR_V_START (IOP13XX_PMMR_VIRT_MEM_BASE)
-#define IOP13XX_PMMR_V_END   (IOP13XX_PMMR_VIRT_MEM_BASE + IOP13XX_PMMR_SIZE)
-#define IOP13XX_PMMR_P_START (IOP13XX_PMMR_PHYS_MEM_BASE)
-#define IOP13XX_PMMR_P_END   (IOP13XX_PMMR_PHYS_MEM_BASE + IOP13XX_PMMR_SIZE)
-
-static inline dma_addr_t __virt_to_lbus(void __iomem *x)
-{
-	return x + IOP13XX_PMMR_PHYS_MEM_BASE - IOP13XX_PMMR_VIRT_MEM_BASE;
-}
-
-static inline void __iomem *__lbus_to_virt(dma_addr_t x)
-{
-	return x + IOP13XX_PMMR_VIRT_MEM_BASE - IOP13XX_PMMR_PHYS_MEM_BASE;
-}
-
-#define __is_lbus_dma(a)				\
-	((a) >= IOP13XX_PMMR_P_START && (a) < IOP13XX_PMMR_P_END)
-
-#define __is_lbus_virt(a)				\
-	((a) >= IOP13XX_PMMR_V_START && (a) < IOP13XX_PMMR_V_END)
-
-/* Device is an lbus device if it is on the platform bus of the IOP13XX */
-#define is_lbus_device(dev) 				\
-	(dev && strncmp(dev->bus->name, "platform", 8) == 0)
-
-#define __arch_dma_to_virt(dev, addr)					\
-	({								\
-		void * __virt;						\
-		dma_addr_t __dma = addr;				\
-		if (is_lbus_device(dev) && __is_lbus_dma(__dma))	\
-			__virt = __lbus_to_virt(__dma);			\
-		else							\
-			__virt = (void *)__phys_to_virt(__dma);		\
-		__virt;							\
-	})
-
-#define __arch_virt_to_dma(dev, addr)					\
-	({								\
-		void * __virt = addr;					\
-		dma_addr_t __dma;					\
-		if (is_lbus_device(dev) && __is_lbus_virt(__virt))	\
-			__dma = __virt_to_lbus(__virt);			\
-		else							\
-			__dma = __virt_to_phys((unsigned long)__virt);	\
-		__dma;							\
-	})
-
-#define __arch_pfn_to_dma(dev, pfn)					\
-	({								\
-		/* __is_lbus_virt() can never be true for RAM pages */	\
-		(dma_addr_t)__pfn_to_phys(pfn);				\
-	})
-
-#define __arch_dma_to_pfn(dev, addr)	__phys_to_pfn(addr)
-
-#endif /* CONFIG_ARCH_IOP13XX */
-#endif /* !ASSEMBLY */
-
 #endif
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 96e6c7a..0f44101 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -351,8 +351,66 @@
 	},
 };
 
+#define IOP13XX_PMMR_V_START (IOP13XX_PMMR_VIRT_MEM_BASE)
+#define IOP13XX_PMMR_V_END   (IOP13XX_PMMR_VIRT_MEM_BASE + IOP13XX_PMMR_SIZE)
+#define IOP13XX_PMMR_P_START (IOP13XX_PMMR_PHYS_MEM_BASE)
+#define IOP13XX_PMMR_P_END   (IOP13XX_PMMR_PHYS_MEM_BASE + IOP13XX_PMMR_SIZE)
+
+static inline dma_addr_t __virt_to_lbus(void __iomem *x)
+{
+	return x + IOP13XX_PMMR_PHYS_MEM_BASE - IOP13XX_PMMR_VIRT_MEM_BASE;
+}
+
+static inline void __iomem *__lbus_to_virt(dma_addr_t x)
+{
+	return x + IOP13XX_PMMR_VIRT_MEM_BASE - IOP13XX_PMMR_PHYS_MEM_BASE;
+}
+
+#define __is_lbus_dma(a)				\
+	((a) >= IOP13XX_PMMR_P_START && (a) < IOP13XX_PMMR_P_END)
+
+#define __is_lbus_virt(a)				\
+	((a) >= IOP13XX_PMMR_V_START && (a) < IOP13XX_PMMR_V_END)
+
+/* Device is an lbus device if it is on the platform bus of the IOP13XX */
+#define is_lbus_device(dev)				\
+	(dev && strncmp(dev->bus->name, "platform", 8) == 0)
+
+static dma_addr_t iop13xx_pfn_to_dma(struct device *dev, unsigned long pfn)
+{
+	/* __is_lbus_virt() can never be true for RAM pages */
+	return __pfn_to_phys(pfn);
+}
+
+static unsigned long iop13xx_dma_to_pfn(struct device *dev, dma_addr_t addr)
+{
+	return __phys_to_pfn(addr);
+}
+
+static void *iop13xx_dma_to_virt(struct device *dev, dma_addr_t addr)
+{
+	if (is_lbus_device(dev) && __is_lbus_dma(addr))
+		return __lbus_to_virt(addr);
+	else
+		return (void *)__phys_to_virt(addr);
+}
+
+static dma_addr_t iop13xx_virt_to_dma(struct device *dev, void *addr)
+{
+	if (is_lbus_device(dev) && __is_lbus_virt(addr))
+		return __virt_to_lbus(addr);
+	else
+		return __virt_to_phys((unsigned long)addr);
+}
+
+
 void __init iop13xx_map_io(void)
 {
+	__arch_pfn_to_dma = iop13xx_pfn_to_dma;
+	__arch_dma_to_pfn = iop13xx_dma_to_pfn;
+	__arch_dma_to_virt = iop13xx_dma_to_virt;
+	__arch_virt_to_dma = iop13xx_virt_to_dma;
+
 	/* Initialize the Static Page Table maps */
 	iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc));
 }
diff --git a/arch/arm/mach-keystone/Kconfig b/arch/arm/mach-keystone/Kconfig
index f20c53e..90a708f 100644
--- a/arch/arm/mach-keystone/Kconfig
+++ b/arch/arm/mach-keystone/Kconfig
@@ -10,7 +10,8 @@
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_ERRATA_798181 if SMP
 	select COMMON_CLK_KEYSTONE
-	select TI_EDMA
+	select ARCH_SUPPORTS_BIG_ENDIAN
+	select ZONE_DMA if ARM_LPAE
 	help
 	  Support for boards based on the Texas Instruments Keystone family of
 	  SoCs.
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index b661c5c..8db9e2c 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -14,12 +14,16 @@
 #include <linux/init.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/setup.h>
 #include <asm/mach/map.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 #include <asm/smp_plat.h>
+#include <asm/memory.h>
+
+#include "memory.h"
 
 #include "keystone.h"
 
@@ -28,6 +32,35 @@
 #define PLL_RESET				BIT(16)
 
 static void __iomem *keystone_rstctrl;
+static struct notifier_block platform_nb;
+
+static bool is_coherent(struct device *dev)
+{
+	struct device_node *node = of_node_get(dev->of_node);
+
+	while (node) {
+		if (of_property_read_bool(node, "dma-coherent")) {
+			of_node_put(node);
+			return true;
+		}
+		node = of_get_next_parent(node);
+	}
+	return false;
+}
+
+static int keystone_platform_notifier(struct notifier_block *nb,
+				      unsigned long event, void *dev)
+{
+	if (event != BUS_NOTIFY_ADD_DEVICE)
+		return NOTIFY_DONE;
+
+	if (is_coherent(dev)) {
+		set_dma_ops(dev, &arm_coherent_dma_ops);
+		pr_info("\t\t%s: keystone device is coherent\n", dev_name(dev));
+	}
+
+	return NOTIFY_OK;
+}
 
 static void __init keystone_init(void)
 {
@@ -41,9 +74,88 @@
 	if (WARN_ON(!keystone_rstctrl))
 		pr_warn("ti,keystone-reset iomap error\n");
 
+	keystone_pm_runtime_init();
+	if (platform_nb.notifier_call)
+		bus_register_notifier(&platform_bus_type, &platform_nb);
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
+static phys_addr_t keystone_virt_to_idmap(unsigned long x)
+{
+	return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
+}
+
+static unsigned long keystone_dma_pfn_offset __read_mostly;
+
+static dma_addr_t keystone_pfn_to_dma(struct device *dev, unsigned long pfn)
+{
+	return PFN_PHYS(pfn - keystone_dma_pfn_offset);
+}
+
+static unsigned long keystone_dma_to_pfn(struct device *dev, dma_addr_t addr)
+{
+	return PFN_DOWN(addr) + keystone_dma_pfn_offset;
+}
+
+static void *keystone_dma_to_virt(struct device *dev, dma_addr_t addr)
+{
+	return phys_to_virt(addr + PFN_PHYS(keystone_dma_pfn_offset));
+}
+
+static dma_addr_t keystone_virt_to_dma(struct device *dev, void *addr)
+{
+	return virt_to_phys(addr) - PFN_PHYS(keystone_dma_pfn_offset);
+}
+
+static void __init keystone_init_meminfo(void)
+{
+	bool lpae = IS_ENABLED(CONFIG_ARM_LPAE);
+	bool pvpatch = IS_ENABLED(CONFIG_ARM_PATCH_PHYS_VIRT);
+	phys_addr_t offset = PHYS_OFFSET - KEYSTONE_LOW_PHYS_START;
+	phys_addr_t mem_start, mem_end;
+
+	BUG_ON(meminfo.nr_banks < 1);
+	mem_start = meminfo.bank[0].start;
+	mem_end = mem_start + meminfo.bank[0].size - 1;
+
+	/* nothing to do if we are running out of the <32-bit space */
+	if (mem_start >= KEYSTONE_LOW_PHYS_START &&
+	    mem_end   <= KEYSTONE_LOW_PHYS_END)
+		return;
+
+	if (!lpae || !pvpatch) {
+		pr_crit("Enable %s%s%s to run outside 32-bit space\n",
+		      !lpae ? __stringify(CONFIG_ARM_LPAE) : "",
+		      (!lpae && !pvpatch) ? " and " : "",
+		      !pvpatch ? __stringify(CONFIG_ARM_PATCH_PHYS_VIRT) : "");
+	}
+
+	if (mem_start < KEYSTONE_HIGH_PHYS_START ||
+	    mem_end   > KEYSTONE_HIGH_PHYS_END) {
+		pr_crit("Invalid address space for memory (%08llx-%08llx)\n",
+		      (u64)mem_start, (u64)mem_end);
+	}
+
+	offset += KEYSTONE_HIGH_PHYS_START;
+	__pv_phys_offset = offset;
+	__pv_offset = (offset - PAGE_OFFSET);
+
+	/* Populate the arch idmap hook */
+	arch_virt_to_idmap = keystone_virt_to_idmap;
+
+	/* Populate the arch DMA hooks */
+	keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START -
+					   KEYSTONE_LOW_PHYS_START);
+	__arch_pfn_to_dma = keystone_pfn_to_dma;
+	__arch_dma_to_pfn = keystone_dma_to_pfn;
+	__arch_dma_to_virt = keystone_dma_to_virt;
+	__arch_virt_to_dma = keystone_virt_to_dma;
+
+	platform_nb.notifier_call = keystone_platform_notifier;
+
+	pr_info("Switching to high address space at 0x%llx\n", (u64)offset);
+}
+
 static const char *keystone_match[] __initconst = {
 	"ti,keystone-evm",
 	NULL,
@@ -68,8 +180,12 @@
 }
 
 DT_MACHINE_START(KEYSTONE, "Keystone")
+#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
+	.dma_zone_size	= SZ_2G,
+#endif
 	.smp		= smp_ops(keystone_smp_ops),
 	.init_machine	= keystone_init,
 	.dt_compat	= keystone_match,
 	.restart	= keystone_restart,
+	.init_meminfo   = keystone_init_meminfo,
 MACHINE_END
diff --git a/arch/arm/mach-keystone/keystone.h b/arch/arm/mach-keystone/keystone.h
index 60bef9d..cd04a1c 100644
--- a/arch/arm/mach-keystone/keystone.h
+++ b/arch/arm/mach-keystone/keystone.h
@@ -18,6 +18,7 @@
 extern struct smp_operations keystone_smp_ops;
 extern void secondary_startup(void);
 extern u32 keystone_cpu_smc(u32 command, u32 cpu, u32 addr);
+extern int keystone_pm_runtime_init(void);
 
 #endif /* __ASSEMBLER__ */
 #endif /* __KEYSTONE_H__ */
diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h
new file mode 100644
index 0000000..b854fb1
--- /dev/null
+++ b/arch/arm/mach-keystone/memory.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2014 Texas Instruments, Inc.
+ *	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef __MEMORY_H
+#define __MEMORY_H
+
+#define MAX_PHYSMEM_BITS	36
+#define SECTION_SIZE_BITS	34
+
+#define KEYSTONE_LOW_PHYS_START		0x80000000ULL
+#define KEYSTONE_LOW_PHYS_SIZE		0x80000000ULL /* 2G */
+#define KEYSTONE_LOW_PHYS_END		(KEYSTONE_LOW_PHYS_START + \
+					 KEYSTONE_LOW_PHYS_SIZE - 1)
+
+#define KEYSTONE_HIGH_PHYS_START	0x800000000ULL
+#define KEYSTONE_HIGH_PHYS_SIZE		0x400000000ULL	/* 16G */
+#define KEYSTONE_HIGH_PHYS_END		(KEYSTONE_HIGH_PHYS_START + \
+					 KEYSTONE_HIGH_PHYS_SIZE - 1)
+#endif /* __MEMORY_H */
diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5cf0683..5f46a7c 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c
@@ -17,13 +17,16 @@
 #include <linux/io.h>
 
 #include <asm/smp_plat.h>
+#include <asm/prom.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
 
 #include "keystone.h"
 
 static int keystone_smp_boot_secondary(unsigned int cpu,
 						struct task_struct *idle)
 {
-	unsigned long start = virt_to_phys(&secondary_startup);
+	unsigned long start = virt_to_idmap(&secondary_startup);
 	int error;
 
 	pr_debug("keystone-smp: booting cpu %d, vector %08lx\n",
@@ -36,6 +39,19 @@
 	return error;
 }
 
+#ifdef CONFIG_ARM_LPAE
+static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
+{
+	pgd_t *pgd0 = pgd_offset_k(0);
+	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+	local_flush_tlb_all();
+}
+#else
+static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
+{}
+#endif
+
 struct smp_operations keystone_smp_ops __initdata = {
 	.smp_boot_secondary	= keystone_smp_boot_secondary,
+	.smp_secondary_init     = keystone_smp_secondary_initmem,
 };
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index 2962523..ca79dda 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -74,9 +74,7 @@
 	if (!np)
 		return 0;
 
-	of_clk_init(NULL);
 	pm_clk_add_notifier(&platform_bus_type, &platform_domain_notifier);
 
 	return 0;
 }
-subsys_initcall(keystone_pm_runtime_init);
diff --git a/arch/arm/mach-ks8695/cpu.c b/arch/arm/mach-ks8695/cpu.c
index ddb2422..018fe46 100644
--- a/arch/arm/mach-ks8695/cpu.c
+++ b/arch/arm/mach-ks8695/cpu.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/arch.h>
@@ -64,8 +66,56 @@
 			sysclk[scdc] / 1000000, cpuclk[scdc] / 1000000);
 }
 
+#ifdef CONFIG_PCI
+
+/* PCI mappings */
+#define __virt_to_lbus(x)	((x) - PAGE_OFFSET + KS8695_PCIMEM_PA)
+#define __lbus_to_virt(x)	((x) - KS8695_PCIMEM_PA + PAGE_OFFSET)
+
+/* Platform-bus mapping */
+extern struct bus_type platform_bus_type;
+#define is_lbus_device(dev)		(dev && dev->bus == &platform_bus_type)
+
+static dma_addr_t ks8695_pfn_to_dma(struct device *dev, unsigned long pfn)
+{
+	dma_addr_t __dma = __pfn_to_phys(pfn);
+
+	if (!is_lbus_device(dev))
+		__dma = __dma - PHYS_OFFSET + KS8695_PCIMEM_PA;
+	return __dma;
+}
+static unsigned long ks8695_dma_to_pfn(struct device *dev, dma_addr_t addr)
+{
+	dma_addr_t __dma = addr;
+
+	if (!is_lbus_device(dev))
+		__dma += PHYS_OFFSET - KS8695_PCIMEM_PA;
+	return __phys_to_pfn(__dma);
+}
+
+static void *ks8695_dma_to_virt(struct device *dev, dma_addr_t addr)
+{
+	return (void *) (is_lbus_device(dev) ? __phys_to_virt(addr) : __lbus_to_virt(addr));
+}
+
+static dma_addr_t ks8695_virt_to_dma(struct device *dev, void *addr)
+{
+	unsigned long __addr = (unsigned long)(addr);
+	return (dma_addr_t) (is_lbus_device(dev) ? __virt_to_phys(__addr) : __virt_to_lbus(__addr));
+}
+
+#endif
+
+
 void __init ks8695_map_io(void)
 {
+#ifdef CONFIG_PCI
+	__arch_pfn_to_dma = ks8695_pfn_to_dma;
+	__arch_dma_to_pfn = ks8695_dma_to_pfn;
+	__arch_dma_to_virt = ks8695_dma_to_virt;
+	__arch_virt_to_dma = ks8695_virt_to_dma;
+#endif
+
 	iotable_init(ks8695_io_desc, ARRAY_SIZE(ks8695_io_desc));
 
 	ks8695_processor_info();
diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h
index 95e731a..029486d 100644
--- a/arch/arm/mach-ks8695/include/mach/memory.h
+++ b/arch/arm/mach-ks8695/include/mach/memory.h
@@ -20,37 +20,4 @@
  */
 #define PLAT_PHYS_OFFSET		KS8695_SDRAM_PA
 
-#ifndef __ASSEMBLY__
-
-#ifdef CONFIG_PCI
-
-/* PCI mappings */
-#define __virt_to_bus(x)	((x) - PAGE_OFFSET + KS8695_PCIMEM_PA)
-#define __bus_to_virt(x)	((x) - KS8695_PCIMEM_PA + PAGE_OFFSET)
-
-/* Platform-bus mapping */
-extern struct bus_type platform_bus_type;
-#define is_lbus_device(dev)		(dev && dev->bus == &platform_bus_type)
-#define __arch_dma_to_virt(dev, x)	({ (void *) (is_lbus_device(dev) ? \
-					__phys_to_virt(x) : __bus_to_virt(x)); })
-#define __arch_virt_to_dma(dev, x)	({ is_lbus_device(dev) ? \
-					(dma_addr_t)__virt_to_phys((unsigned long)x) \
-					: (dma_addr_t)__virt_to_bus(x); })
-#define __arch_pfn_to_dma(dev, pfn)	\
-	({ dma_addr_t __dma = __pfn_to_phys(pfn); \
-	   if (!is_lbus_device(dev)) \
-		__dma = __dma - PHYS_OFFSET + KS8695_PCIMEM_PA; \
-	   __dma; })
-
-#define __arch_dma_to_pfn(dev, x)	\
-	({ dma_addr_t __dma = x;				\
-	   if (!is_lbus_device(dev))				\
-		__dma += PHYS_OFFSET - KS8695_PCIMEM_PA;	\
-	   __phys_to_pfn(__dma);				\
-	})
-
-#endif
-
-#endif
-
 #endif
diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h
index 3c25305..beae905 100644
--- a/arch/arm/mach-omap1/include/mach/memory.h
+++ b/arch/arm/mach-omap1/include/mach/memory.h
@@ -11,47 +11,8 @@
 #define PLAT_PHYS_OFFSET		UL(0x10000000)
 
 /*
- * Bus address is physical address, except for OMAP-1510 Local Bus.
- * OMAP-1510 bus address is translated into a Local Bus address if the
- * OMAP bus type is lbus. We do the address translation based on the
- * device overriding the defaults used in the dma-mapping API.
- * Note that the is_lbus_device() test is not very efficient on 1510
- * because of the strncmp().
- */
-#if defined(CONFIG_ARCH_OMAP15XX) && !defined(__ASSEMBLER__)
-#include <mach/soc.h>
-
-/*
  * OMAP-1510 Local Bus address offset
  */
 #define OMAP1510_LB_OFFSET	UL(0x30000000)
 
-#define virt_to_lbus(x)		((x) - PAGE_OFFSET + OMAP1510_LB_OFFSET)
-#define lbus_to_virt(x)		((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET)
-#define is_lbus_device(dev)	(cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0))
-
-#define __arch_pfn_to_dma(dev, pfn)	\
-	({ dma_addr_t __dma = __pfn_to_phys(pfn); \
-	   if (is_lbus_device(dev)) \
-		__dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET; \
-	   __dma; })
-
-#define __arch_dma_to_pfn(dev, addr)	\
-	({ dma_addr_t __dma = addr;				\
-	   if (is_lbus_device(dev))				\
-		__dma += PHYS_OFFSET - OMAP1510_LB_OFFSET;	\
-	   __phys_to_pfn(__dma);				\
-	})
-
-#define __arch_dma_to_virt(dev, addr)	({ (void *) (is_lbus_device(dev) ? \
-						lbus_to_virt(addr) : \
-						__phys_to_virt(addr)); })
-
-#define __arch_virt_to_dma(dev, addr)	({ unsigned long __addr = (unsigned long)(addr); \
-					   (dma_addr_t) (is_lbus_device(dev) ? \
-						virt_to_lbus(__addr) : \
-						__virt_to_phys(__addr)); })
-
-#endif	/* CONFIG_ARCH_OMAP15XX */
-
 #endif
diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c
index 499b8ac..422527b 100644
--- a/arch/arm/mach-omap1/io.c
+++ b/arch/arm/mach-omap1/io.c
@@ -19,6 +19,7 @@
 #include <mach/mux.h>
 #include <mach/tc.h>
 #include <linux/omap-dma.h>
+#include <linux/dma-mapping.h>
 
 #include "iomap.h"
 #include "common.h"
@@ -86,10 +87,61 @@
 #endif
 
 /*
+ * Bus address is physical address, except for OMAP-1510 Local Bus.
+ * OMAP-1510 bus address is translated into a Local Bus address if the
+ * OMAP bus type is lbus. We do the address translation based on the
+ * device overriding the defaults used in the dma-mapping API.
+ * Note that the is_lbus_device() test is not very efficient on 1510
+ * because of the strncmp().
+ */
+#if defined(CONFIG_ARCH_OMAP15XX)
+
+#define virt_to_lbus(x)		((x) - PAGE_OFFSET + OMAP1510_LB_OFFSET)
+#define lbus_to_virt(x)		((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET)
+#define is_lbus_device(dev)	(cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0))
+
+static dma_addr_t omap1_pfn_to_dma(struct device *dev, unsigned long pfn)
+{
+	dma_addr_t __dma = __pfn_to_phys(pfn);
+
+	if (is_lbus_device(dev))
+		__dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET;
+	return __dma;
+}
+static unsigned long omap1_dma_to_pfn(struct device *dev, dma_addr_t addr)
+{
+	dma_addr_t __dma = addr;
+
+	if (is_lbus_device(dev))
+		__dma += PHYS_OFFSET - OMAP1510_LB_OFFSET;
+	return __phys_to_pfn(__dma);
+}
+
+static void *omap1_dma_to_virt(struct device *dev, dma_addr_t addr)
+{
+	return (void *) (is_lbus_device(dev) ? lbus_to_virt(addr) : __phys_to_virt(addr));
+}
+
+static dma_addr_t omap1_virt_to_dma(struct device *dev, void *addr)
+{
+	unsigned long __addr = (unsigned long)(addr);
+	return (dma_addr_t) (is_lbus_device(dev) ? virt_to_lbus(__addr) : __virt_to_phys(__addr));
+}
+
+#endif	/* CONFIG_ARCH_OMAP15XX */
+
+/*
  * Maps common IO regions for omap1
  */
 static void __init omap1_map_common_io(void)
 {
+#if defined(CONFIG_ARCH_OMAP15XX)
+	__arch_pfn_to_dma = omap1_pfn_to_dma;
+	__arch_dma_to_pfn = omap1_dma_to_pfn;
+	__arch_dma_to_virt = omap1_dma_to_virt;
+	__arch_virt_to_dma = omap1_virt_to_dma;
+#endif
+
 	iotable_init(omap_io_desc, ARRAY_SIZE(omap_io_desc));
 }
 
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 8a1b5e0..f7a6fd3 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2791,9 +2791,7 @@
 	sz = sizeof(struct omap_hwmod_link) * LINKS_PER_OCP_IF;
 
 	*sl = NULL;
-	*ml = alloc_bootmem(sz);
-
-	memset(*ml, 0, sz);
+	*ml = memblock_virt_alloc(sz, 0);
 
 	*sl = (void *)(*ml) + sizeof(struct omap_hwmod_link);
 
@@ -2912,9 +2910,7 @@
 	pr_debug("omap_hwmod: %s: allocating %d byte linkspace (%d links)\n",
 		 __func__, sz, max_ls);
 
-	linkspace = alloc_bootmem(sz);
-
-	memset(linkspace, 0, sz);
+	linkspace = memblock_virt_alloc(sz, 0);
 
 	return 0;
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f61a570..74a807b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -39,6 +39,35 @@
 
 #include "mm.h"
 
+static inline dma_addr_t __pfn_to_dma(struct device *dev, unsigned long pfn)
+{
+	return (dma_addr_t)__pfn_to_bus(pfn);
+}
+
+static inline unsigned long __dma_to_pfn(struct device *dev, dma_addr_t addr)
+{
+	return __bus_to_pfn(addr);
+}
+
+static inline void *__dma_to_virt(struct device *dev, dma_addr_t addr)
+{
+	return (void *)__bus_to_virt((unsigned long)addr);
+}
+
+static inline dma_addr_t __virt_to_dma(struct device *dev, void *addr)
+{
+	return (dma_addr_t)__virt_to_bus((unsigned long)(addr));
+}
+
+dma_addr_t (*__arch_pfn_to_dma)(struct device *dev, unsigned long pfn) = __pfn_to_dma;
+EXPORT_SYMBOL(__arch_pfn_to_dma);
+unsigned long (*__arch_dma_to_pfn)(struct device *dev, dma_addr_t addr) = __dma_to_pfn;
+EXPORT_SYMBOL(__arch_dma_to_pfn);
+void* (*__arch_dma_to_virt)(struct device *dev, dma_addr_t addr) = __dma_to_virt;
+EXPORT_SYMBOL(__arch_dma_to_virt);
+dma_addr_t (*__arch_virt_to_dma)(struct device *dev, void *addr) = __virt_to_dma;
+EXPORT_SYMBOL(__arch_virt_to_dma);
+
 /*
  * The DMA API is built upon the notion of "buffer ownership".  A buffer
  * is either exclusively owned by the CPU (and therefore may be accessed
@@ -888,7 +917,7 @@
 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
 	size_t size, enum dma_data_direction dir)
 {
-	unsigned long paddr;
+	phys_addr_t paddr;
 
 	dma_cache_maint_page(page, off, size, dir, dmac_map_area);
 
@@ -904,7 +933,7 @@
 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	size_t size, enum dma_data_direction dir)
 {
-	unsigned long paddr = page_to_phys(page) + off;
+	phys_addr_t paddr = page_to_phys(page) + off;
 
 	/* FIXME: non-speculating: not required */
 	/* don't bother invalidating if DMA to device */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 3e8f106..b7371bf 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -145,58 +145,6 @@
 	*max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]);
 }
 
-static void __init arm_bootmem_init(unsigned long start_pfn,
-	unsigned long end_pfn)
-{
-	struct memblock_region *reg;
-	unsigned int boot_pages;
-	phys_addr_t bitmap;
-	pg_data_t *pgdat;
-
-	/*
-	 * Allocate the bootmem bitmap page.  This must be in a region
-	 * of memory which has already been mapped.
-	 */
-	boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
-				__pfn_to_phys(end_pfn));
-
-	/*
-	 * Initialise the bootmem allocator, handing the
-	 * memory banks over to bootmem.
-	 */
-	node_set_online(0);
-	pgdat = NODE_DATA(0);
-	init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
-
-	/* Free the lowmem regions from memblock into bootmem. */
-	for_each_memblock(memory, reg) {
-		unsigned long start = memblock_region_memory_base_pfn(reg);
-		unsigned long end = memblock_region_memory_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
-	}
-
-	/* Reserve the lowmem memblock reserved regions in bootmem. */
-	for_each_memblock(reserved, reg) {
-		unsigned long start = memblock_region_reserved_base_pfn(reg);
-		unsigned long end = memblock_region_reserved_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		reserve_bootmem(__pfn_to_phys(start),
-			        (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
-	}
-}
-
 #ifdef CONFIG_ZONE_DMA
 
 phys_addr_t arm_dma_zone_size __read_mostly;
@@ -236,7 +184,7 @@
 #endif
 }
 
-static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
+static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
 	unsigned long max_high)
 {
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
@@ -384,7 +332,6 @@
 	dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
 
 	arm_memblock_steal_permitted = false;
-	memblock_allow_resize();
 	memblock_dump_all();
 }
 
@@ -392,12 +339,11 @@
 {
 	unsigned long min, max_low, max_high;
 
+	memblock_allow_resize();
 	max_low = max_high = 0;
 
 	find_limits(&min, &max_low, &max_high);
 
-	arm_bootmem_init(min, max_low);
-
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(),
 	 * so must be done after the fixed reservations
@@ -414,7 +360,7 @@
 	 * the sparse mem_map arrays initialized by sparse_init()
 	 * for memmap_init_zone(), otherwise all PFNs are invalid.
 	 */
-	arm_bootmem_free(min, max_low, max_high);
+	zone_sizes_init(min, max_low, max_high);
 
 	/*
 	 * This doesn't seem to be used by the Linux memory manager any
@@ -461,7 +407,7 @@
 	 * free the section of the memmap array.
 	 */
 	if (pg < pgend)
-		free_bootmem(pg, pgend - pg);
+		memblock_free_early(pg, pgend - pg);
 }
 
 /*
@@ -587,7 +533,7 @@
 	extern u32 itcm_end;
 #endif
 
-	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
+	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 
 	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap(&meminfo);
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index e2dbcb7..83a7995 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -91,7 +91,7 @@
 
 	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
 
-	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
+	for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
 		start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
 				PAGE_SIZE, corruption_check_size);
 		end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 174da5f..988c00a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1120,7 +1120,7 @@
 		nr_pages += end_pfn - start_pfn;
 	}
 
-	for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
+	for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
 		start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
 		end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
 		if (start_pfn < end_pfn)
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 8dabbed..1e9da79 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -74,7 +74,7 @@
 	u64 i;
 	phys_addr_t this_start, this_end;
 
-	for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
+	for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
 		this_start = clamp_t(phys_addr_t, this_start, start, end);
 		this_end = clamp_t(phys_addr_t, this_end, start, end);
 		if (this_start < this_end) {
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index f895a8c..92c5937 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,7 +22,6 @@
 #include <linux/device.h>
 #include <linux/highmem.h>
 #include <linux/backing-dev.h>
-#include <linux/bootmem.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
 #include <linux/export.h>
diff --git a/drivers/clk/keystone/gate.c b/drivers/clk/keystone/gate.c
index 1f333bc..86f1e36 100644
--- a/drivers/clk/keystone/gate.c
+++ b/drivers/clk/keystone/gate.c
@@ -179,6 +179,7 @@
 
 	init.name = name;
 	init.ops = &clk_psc_ops;
+	init.flags = 0;
 	init.parent_names = (parent_name ? &parent_name : NULL);
 	init.num_parents = (parent_name ? 1 : 0);
 
@@ -223,8 +224,7 @@
 	data->domain_base = of_iomap(node, i);
 	if (!data->domain_base) {
 		pr_err("%s: domain ioremap failed\n", __func__);
-		iounmap(data->control_base);
-		goto out;
+		goto unmap_ctrl;
 	}
 
 	of_property_read_u32(node, "domain-id", &data->domain_id);
@@ -237,16 +237,21 @@
 	parent_name = of_clk_get_parent_name(node, 0);
 	if (!parent_name) {
 		pr_err("%s: Parent clock not found\n", __func__);
-		goto out;
+		goto unmap_domain;
 	}
 
 	clk = clk_register_psc(NULL, clk_name, parent_name, data, lock);
-	if (clk) {
+	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		return;
 	}
 
 	pr_err("%s: error registering clk %s\n", __func__, node->name);
+
+unmap_domain:
+	iounmap(data->domain_base);
+unmap_ctrl:
+	iounmap(data->control_base);
 out:
 	kfree(data);
 	return;
diff --git a/drivers/clk/keystone/pll.c b/drivers/clk/keystone/pll.c
index 47a1bd9..0dd8a4b 100644
--- a/drivers/clk/keystone/pll.c
+++ b/drivers/clk/keystone/pll.c
@@ -24,6 +24,8 @@
 #define MAIN_PLLM_HIGH_MASK	0x7f000
 #define PLLM_HIGH_SHIFT		6
 #define PLLD_MASK		0x3f
+#define CLKOD_MASK		0x780000
+#define CLKOD_SHIFT		19
 
 /**
  * struct clk_pll_data - pll data structure
@@ -41,7 +43,10 @@
  * @pllm_upper_mask: multiplier upper mask
  * @pllm_upper_shift: multiplier upper shift
  * @plld_mask: divider mask
- * @postdiv: Post divider
+ * @clkod_mask: output divider mask
+ * @clkod_shift: output divider shift
+ * @plld_mask: divider mask
+ * @postdiv: Fixed post divider
  */
 struct clk_pll_data {
 	bool has_pllctrl;
@@ -53,6 +58,8 @@
 	u32 pllm_upper_mask;
 	u32 pllm_upper_shift;
 	u32 plld_mask;
+	u32 clkod_mask;
+	u32 clkod_shift;
 	u32 postdiv;
 };
 
@@ -90,7 +97,13 @@
 	mult |= ((val & pll_data->pllm_upper_mask)
 			>> pll_data->pllm_upper_shift);
 	prediv = (val & pll_data->plld_mask);
-	postdiv = pll_data->postdiv;
+
+	if (!pll_data->has_pllctrl)
+		/* read post divider from od bits*/
+		postdiv = ((val & pll_data->clkod_mask) >>
+				 pll_data->clkod_shift) + 1;
+	else
+		postdiv = pll_data->postdiv;
 
 	rate /= (prediv + 1);
 	rate = (rate * (mult + 1));
@@ -155,8 +168,11 @@
 	}
 
 	parent_name = of_clk_get_parent_name(node, 0);
-	if (of_property_read_u32(node, "fixed-postdiv",	&pll_data->postdiv))
-		goto out;
+	if (of_property_read_u32(node, "fixed-postdiv",	&pll_data->postdiv)) {
+		/* assume the PLL has output divider register bits */
+		pll_data->clkod_mask = CLKOD_MASK;
+		pll_data->clkod_shift = CLKOD_SHIFT;
+	}
 
 	i = of_property_match_string(node, "reg-names", "control");
 	pll_data->pll_ctl0 = of_iomap(node, i);
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 33621ef..2acf3fc 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -36,3 +36,4 @@
 obj-$(CONFIG_ARM_GLOBAL_TIMER)		+= arm_global_timer.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
+obj-$(CONFIG_ARCH_KEYSTONE)		+= timer-keystone.o
diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c
new file mode 100644
index 0000000..2299666
--- /dev/null
+++ b/drivers/clocksource/timer-keystone.c
@@ -0,0 +1,233 @@
+/*
+ * Keystone broadcast clock-event
+ *
+ * Copyright 2013 Texas Instruments, Inc.
+ *
+ * Author: Ivan Khoronzhuk <ivan.khoronzhuk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define TIMER_NAME			"timer-keystone"
+
+/* Timer register offsets */
+#define TIM12				0x10
+#define TIM34				0x14
+#define PRD12				0x18
+#define PRD34				0x1c
+#define TCR				0x20
+#define TGCR				0x24
+#define INTCTLSTAT			0x44
+
+/* Timer register bitfields */
+#define TCR_ENAMODE_MASK		0xC0
+#define TCR_ENAMODE_ONESHOT_MASK	0x40
+#define TCR_ENAMODE_PERIODIC_MASK	0x80
+
+#define TGCR_TIM_UNRESET_MASK		0x03
+#define INTCTLSTAT_ENINT_MASK		0x01
+
+/**
+ * struct keystone_timer: holds timer's data
+ * @base: timer memory base address
+ * @hz_period: cycles per HZ period
+ * @event_dev: event device based on timer
+ */
+static struct keystone_timer {
+	void __iomem *base;
+	unsigned long hz_period;
+	struct clock_event_device event_dev;
+} timer;
+
+static inline u32 keystone_timer_readl(unsigned long rg)
+{
+	return readl_relaxed(timer.base + rg);
+}
+
+static inline void keystone_timer_writel(u32 val, unsigned long rg)
+{
+	writel_relaxed(val, timer.base + rg);
+}
+
+/**
+ * keystone_timer_config: configures timer to work in oneshot/periodic modes.
+ * @ mode: mode to configure
+ * @ period: cycles number to configure for
+ */
+static int keystone_timer_config(u64 period, enum clock_event_mode mode)
+{
+	u32 tcr;
+	u32 off;
+
+	tcr = keystone_timer_readl(TCR);
+	off = tcr & ~(TCR_ENAMODE_MASK);
+
+	/* set enable mode */
+	switch (mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+		tcr |= TCR_ENAMODE_ONESHOT_MASK;
+		break;
+	case CLOCK_EVT_MODE_PERIODIC:
+		tcr |= TCR_ENAMODE_PERIODIC_MASK;
+		break;
+	default:
+		return -1;
+	}
+
+	/* disable timer */
+	keystone_timer_writel(off, TCR);
+	/* here we have to be sure the timer has been disabled */
+	__iowmb();
+
+	/* reset counter to zero, set new period */
+	keystone_timer_writel(0, TIM12);
+	keystone_timer_writel(0, TIM34);
+	keystone_timer_writel(period & 0xffffffff, PRD12);
+	keystone_timer_writel(period >> 32, PRD34);
+
+	/*
+	 * enable timer
+	 * here we have to be sure that CNTLO, CNTHI, PRDLO, PRDHI registers
+	 * have been written.
+	 */
+	__iowmb();
+	keystone_timer_writel(tcr, TCR);
+	return 0;
+}
+
+static void keystone_timer_disable(void)
+{
+	u32 tcr;
+
+	tcr = keystone_timer_readl(TCR);
+
+	/* disable timer */
+	tcr &= ~(TCR_ENAMODE_MASK);
+	keystone_timer_writel(tcr, TCR);
+}
+
+static irqreturn_t keystone_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	evt->event_handler(evt);
+	return IRQ_HANDLED;
+}
+
+static int keystone_set_next_event(unsigned long cycles,
+				  struct clock_event_device *evt)
+{
+	return keystone_timer_config(cycles, evt->mode);
+}
+
+static void keystone_set_mode(enum clock_event_mode mode,
+			     struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		keystone_timer_config(timer.hz_period, CLOCK_EVT_MODE_PERIODIC);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_ONESHOT:
+		keystone_timer_disable();
+		break;
+	default:
+		break;
+	}
+}
+
+static void __init keystone_timer_init(struct device_node *np)
+{
+	struct clock_event_device *event_dev = &timer.event_dev;
+	unsigned long rate;
+	struct clk *clk;
+	int irq, error;
+	u32 tgcr;
+
+	irq  = irq_of_parse_and_map(np, 0);
+	if (irq == NO_IRQ) {
+		pr_err("%s: failed to map interrupts\n", __func__);
+		return;
+	}
+
+	timer.base = of_iomap(np, 0);
+	if (!timer.base) {
+		pr_err("%s: failed to map registers\n", __func__);
+		return;
+	}
+
+	clk = of_clk_get(np, 0);
+	if (!clk) {
+		pr_err("%s: failed to get clock\n", __func__);
+		iounmap(timer.base);
+		return;
+	}
+
+	error = clk_prepare_enable(clk);
+	if (error) {
+		pr_err("%s: failed to enable clock\n", __func__);
+		goto err;
+	}
+
+	rate = clk_get_rate(clk);
+
+	/* disable, use internal clock source */
+	keystone_timer_writel(0, TCR);
+	/* here we have to be sure the timer has been disabled */
+	__iowmb();
+
+	/* reset timer as 64-bit, no pre-scaler, plus features are disabled */
+	tgcr = 0;
+	keystone_timer_writel(0, TGCR);
+
+	/* unreset timer */
+	tgcr |= TGCR_TIM_UNRESET_MASK;
+	keystone_timer_writel(tgcr, TGCR);
+
+	/* init counter to zero */
+	keystone_timer_writel(0, TIM12);
+	keystone_timer_writel(0, TIM34);
+
+	timer.hz_period = DIV_ROUND_UP(rate, HZ);
+
+	/* enable timer interrupts */
+	keystone_timer_writel(INTCTLSTAT_ENINT_MASK, INTCTLSTAT);
+
+	error = request_irq(irq, keystone_timer_interrupt, IRQF_TIMER,
+			    TIMER_NAME, event_dev);
+	if (error) {
+		pr_err("%s: failed to setup irq\n", __func__);
+		goto err;
+	}
+
+	/* setup clockevent */
+	event_dev->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+	event_dev->set_next_event = keystone_set_next_event;
+	event_dev->set_mode = keystone_set_mode;
+	event_dev->cpumask = cpu_all_mask;
+	event_dev->owner = THIS_MODULE;
+	event_dev->name = TIMER_NAME;
+	event_dev->irq = irq;
+
+	clockevents_config_and_register(event_dev, rate, 1, ULONG_MAX);
+
+	pr_info("keystone timer clock @%lu Hz\n", rate);
+	return;
+err:
+	clk_put(clk);
+	iounmap(timer.base);
+}
+
+CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer",
+					keystone_timer_init);
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index e2e04b0..17cf96c 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -324,7 +324,7 @@
 {
 	struct firmware_map_entry *entry;
 
-	entry = alloc_bootmem(sizeof(struct firmware_map_entry));
+	entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 0f04444..7c75f5a 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -115,6 +115,13 @@
 	help
 	  Say yes here to support GPIO on CLPS711X SoCs.
 
+config GPIO_DAVINCI
+	bool "TI Davinci/Keystone GPIO support"
+	default y if ARCH_DAVINCI
+	depends on ARM && (ARCH_DAVINCI || ARCH_KEYSTONE)
+	help
+	  Say yes here to enable GPIO support for TI Davinci/Keystone SoCs.
+
 config GPIO_GENERIC_PLATFORM
 	tristate "Generic memory-mapped GPIO controller support (MMIO platform device)"
 	select GPIO_GENERIC
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 7971e36..28a0ffc 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -22,7 +22,7 @@
 obj-$(CONFIG_GPIO_CS5535)	+= gpio-cs5535.o
 obj-$(CONFIG_GPIO_DA9052)	+= gpio-da9052.o
 obj-$(CONFIG_GPIO_DA9055)	+= gpio-da9055.o
-obj-$(CONFIG_ARCH_DAVINCI)	+= gpio-davinci.o
+obj-$(CONFIG_GPIO_DAVINCI)	+= gpio-davinci.o
 obj-$(CONFIG_GPIO_EM)		+= gpio-em.o
 obj-$(CONFIG_GPIO_EP93XX)	+= gpio-ep93xx.o
 obj-$(CONFIG_GPIO_F7188X)	+= gpio-f7188x.o
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 84be701..099b982 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -16,8 +16,13 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/gpio-davinci.h>
+#include <linux/irqchip/chained_irq.h>
 
 struct davinci_gpio_regs {
 	u32	dir;
@@ -32,6 +37,8 @@
 	u32	intstat;
 };
 
+typedef struct irq_chip *(*gpio_get_irq_chip_cb_t)(unsigned int irq);
+
 #define BINTEN	0x8 /* GPIO Interrupt Per-Bank Enable Register */
 
 #define chip2controller(chip)	\
@@ -82,14 +89,14 @@
 	u32 mask = 1 << offset;
 
 	spin_lock_irqsave(&d->lock, flags);
-	temp = __raw_readl(&g->dir);
+	temp = readl_relaxed(&g->dir);
 	if (out) {
 		temp &= ~mask;
-		__raw_writel(mask, value ? &g->set_data : &g->clr_data);
+		writel_relaxed(mask, value ? &g->set_data : &g->clr_data);
 	} else {
 		temp |= mask;
 	}
-	__raw_writel(temp, &g->dir);
+	writel_relaxed(temp, &g->dir);
 	spin_unlock_irqrestore(&d->lock, flags);
 
 	return 0;
@@ -118,7 +125,7 @@
 	struct davinci_gpio_controller *d = chip2controller(chip);
 	struct davinci_gpio_regs __iomem *g = d->regs;
 
-	return (1 << offset) & __raw_readl(&g->in_data);
+	return (1 << offset) & readl_relaxed(&g->in_data);
 }
 
 /*
@@ -130,7 +137,41 @@
 	struct davinci_gpio_controller *d = chip2controller(chip);
 	struct davinci_gpio_regs __iomem *g = d->regs;
 
-	__raw_writel((1 << offset), value ? &g->set_data : &g->clr_data);
+	writel_relaxed((1 << offset), value ? &g->set_data : &g->clr_data);
+}
+
+static struct davinci_gpio_platform_data *
+davinci_gpio_get_pdata(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	struct davinci_gpio_platform_data *pdata;
+	int ret;
+	u32 val;
+
+	if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
+		return pdev->dev.platform_data;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	ret = of_property_read_u32(dn, "ti,ngpio", &val);
+	if (ret)
+		goto of_err;
+
+	pdata->ngpio = val;
+
+	ret = of_property_read_u32(dn, "ti,davinci-gpio-unbanked", &val);
+	if (ret)
+		goto of_err;
+
+	pdata->gpio_unbanked = val;
+
+	return pdata;
+
+of_err:
+	dev_err(&pdev->dev, "Populating pdata from DT failed: err %d\n", ret);
+	return NULL;
 }
 
 static int davinci_gpio_probe(struct platform_device *pdev)
@@ -143,12 +184,14 @@
 	struct device *dev = &pdev->dev;
 	struct resource *res;
 
-	pdata = dev->platform_data;
+	pdata = davinci_gpio_get_pdata(pdev);
 	if (!pdata) {
 		dev_err(dev, "No platform data found\n");
 		return -EINVAL;
 	}
 
+	dev->platform_data = pdata;
+
 	/*
 	 * The gpio banks conceptually expose a segmented bitmap,
 	 * and "ngpio" is one more than the largest zero-based
@@ -160,8 +203,8 @@
 		return -EINVAL;
 	}
 
-	if (WARN_ON(DAVINCI_N_GPIO < ngpio))
-		ngpio = DAVINCI_N_GPIO;
+	if (WARN_ON(ARCH_NR_GPIOS < ngpio))
+		ngpio = ARCH_NR_GPIOS;
 
 	chips = devm_kzalloc(dev,
 			     ngpio * sizeof(struct davinci_gpio_controller),
@@ -172,11 +215,6 @@
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "Invalid memory resource\n");
-		return -EBUSY;
-	}
-
 	gpio_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(gpio_base))
 		return PTR_ERR(gpio_base);
@@ -194,6 +232,9 @@
 		if (chips[i].chip.ngpio > 32)
 			chips[i].chip.ngpio = 32;
 
+#ifdef CONFIG_OF_GPIO
+		chips[i].chip.of_node = dev->of_node;
+#endif
 		spin_lock_init(&chips[i].lock);
 
 		regs = gpio2regs(base);
@@ -227,8 +268,8 @@
 	struct davinci_gpio_regs __iomem *g = irq2regs(d->irq);
 	u32 mask = (u32) irq_data_get_irq_handler_data(d);
 
-	__raw_writel(mask, &g->clr_falling);
-	__raw_writel(mask, &g->clr_rising);
+	writel_relaxed(mask, &g->clr_falling);
+	writel_relaxed(mask, &g->clr_rising);
 }
 
 static void gpio_irq_enable(struct irq_data *d)
@@ -242,9 +283,9 @@
 		status = IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
 
 	if (status & IRQ_TYPE_EDGE_FALLING)
-		__raw_writel(mask, &g->set_falling);
+		writel_relaxed(mask, &g->set_falling);
 	if (status & IRQ_TYPE_EDGE_RISING)
-		__raw_writel(mask, &g->set_rising);
+		writel_relaxed(mask, &g->set_rising);
 }
 
 static int gpio_irq_type(struct irq_data *d, unsigned trigger)
@@ -278,34 +319,28 @@
 		mask <<= 16;
 
 	/* temporarily mask (level sensitive) parent IRQ */
-	desc->irq_data.chip->irq_mask(&desc->irq_data);
-	desc->irq_data.chip->irq_ack(&desc->irq_data);
+	chained_irq_enter(irq_desc_get_chip(desc), desc);
 	while (1) {
 		u32		status;
-		int		n;
-		int		res;
+		int		bit;
 
 		/* ack any irqs */
-		status = __raw_readl(&g->intstat) & mask;
+		status = readl_relaxed(&g->intstat) & mask;
 		if (!status)
 			break;
-		__raw_writel(status, &g->intstat);
+		writel_relaxed(status, &g->intstat);
 
 		/* now demux them to the right lowlevel handler */
-		n = d->irq_base;
-		if (irq & 1) {
-			n += 16;
-			status >>= 16;
-		}
 
 		while (status) {
-			res = ffs(status);
-			n += res;
-			generic_handle_irq(n - 1);
-			status >>= res;
+			bit = __ffs(status);
+			status &= ~BIT(bit);
+			generic_handle_irq(
+				irq_find_mapping(d->irq_domain,
+						 d->chip.base + bit));
 		}
 	}
-	desc->irq_data.chip->irq_unmask(&desc->irq_data);
+	chained_irq_exit(irq_desc_get_chip(desc), desc);
 	/* now it may re-trigger */
 }
 
@@ -313,10 +348,10 @@
 {
 	struct davinci_gpio_controller *d = chip2controller(chip);
 
-	if (d->irq_base >= 0)
-		return d->irq_base + offset;
+	if (d->irq_domain)
+		return irq_create_mapping(d->irq_domain, d->chip.base + offset);
 	else
-		return -ENODEV;
+		return -ENXIO;
 }
 
 static int gpio_to_irq_unbanked(struct gpio_chip *chip, unsigned offset)
@@ -346,14 +381,55 @@
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
 
-	__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_FALLING)
+	writel_relaxed(mask, (trigger & IRQ_TYPE_EDGE_FALLING)
 		     ? &g->set_falling : &g->clr_falling);
-	__raw_writel(mask, (trigger & IRQ_TYPE_EDGE_RISING)
+	writel_relaxed(mask, (trigger & IRQ_TYPE_EDGE_RISING)
 		     ? &g->set_rising : &g->clr_rising);
 
 	return 0;
 }
 
+static int
+davinci_gpio_irq_map(struct irq_domain *d, unsigned int irq,
+		     irq_hw_number_t hw)
+{
+	struct davinci_gpio_regs __iomem *g = gpio2regs(hw);
+
+	irq_set_chip_and_handler_name(irq, &gpio_irqchip, handle_simple_irq,
+				"davinci_gpio");
+	irq_set_irq_type(irq, IRQ_TYPE_NONE);
+	irq_set_chip_data(irq, (__force void *)g);
+	irq_set_handler_data(irq, (void *)__gpio_mask(hw));
+	set_irq_flags(irq, IRQF_VALID);
+
+	return 0;
+}
+
+static const struct irq_domain_ops davinci_gpio_irq_ops = {
+	.map = davinci_gpio_irq_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_chip *davinci_gpio_get_irq_chip(unsigned int irq)
+{
+	static struct irq_chip_type gpio_unbanked;
+
+	gpio_unbanked = *container_of(irq_get_chip(irq),
+				      struct irq_chip_type, chip);
+
+	return &gpio_unbanked.chip;
+};
+
+static struct irq_chip *keystone_gpio_get_irq_chip(unsigned int irq)
+{
+	static struct irq_chip gpio_unbanked;
+
+	gpio_unbanked = *irq_get_chip(irq);
+	return &gpio_unbanked;
+};
+
+static const struct of_device_id davinci_gpio_ids[];
+
 /*
  * NOTE:  for suspend/resume, probably best to make a platform_device with
  * suspend_late/resume_resume calls hooking into results of the set_wake()
@@ -364,7 +440,7 @@
 
 static int davinci_gpio_irq_setup(struct platform_device *pdev)
 {
-	unsigned	gpio, irq, bank;
+	unsigned	gpio, bank;
 	struct clk	*clk;
 	u32		binten = 0;
 	unsigned	ngpio, bank_irq;
@@ -373,6 +449,20 @@
 	struct davinci_gpio_controller *chips = platform_get_drvdata(pdev);
 	struct davinci_gpio_platform_data *pdata = dev->platform_data;
 	struct davinci_gpio_regs __iomem *g;
+	struct irq_domain	*irq_domain = NULL;
+	int		irq;
+	const struct of_device_id *match;
+	struct irq_chip *irq_chip;
+	gpio_get_irq_chip_cb_t gpio_get_irq_chip;
+
+	/*
+	 * Use davinci_gpio_get_irq_chip by default to handle non DT cases
+	 */
+	gpio_get_irq_chip = davinci_gpio_get_irq_chip;
+	match = of_match_device(of_match_ptr(davinci_gpio_ids),
+				dev);
+	if (match)
+		gpio_get_irq_chip = (gpio_get_irq_chip_cb_t)match->data;
 
 	ngpio = pdata->ngpio;
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -396,6 +486,22 @@
 	}
 	clk_prepare_enable(clk);
 
+	if (!pdata->gpio_unbanked) {
+		irq = irq_alloc_descs(-1, 0, ngpio, 0);
+		if (irq < 0) {
+			dev_err(dev, "Couldn't allocate IRQ numbers\n");
+			return irq;
+		}
+
+		irq_domain = irq_domain_add_legacy(NULL, ngpio, irq, 0,
+							&davinci_gpio_irq_ops,
+							chips);
+		if (!irq_domain) {
+			dev_err(dev, "Couldn't register an IRQ domain\n");
+			return -ENODEV;
+		}
+	}
+
 	/*
 	 * Arrange gpio_to_irq() support, handling either direct IRQs or
 	 * banked IRQs.  Having GPIOs in the first GPIO bank use direct
@@ -404,9 +510,7 @@
 	 */
 	for (gpio = 0, bank = 0; gpio < ngpio; bank++, gpio += 32) {
 		chips[bank].chip.to_irq = gpio_to_irq_banked;
-		chips[bank].irq_base = pdata->gpio_unbanked
-			? -EINVAL
-			: (pdata->intc_irq_num + gpio);
+		chips[bank].irq_domain = irq_domain;
 	}
 
 	/*
@@ -415,8 +519,6 @@
 	 * IRQ mux conflicts; gpio_irq_type_unbanked() is only for GPIOs.
 	 */
 	if (pdata->gpio_unbanked) {
-		static struct irq_chip_type gpio_unbanked;
-
 		/* pass "bank 0" GPIO IRQs to AINTC */
 		chips[0].chip.to_irq = gpio_to_irq_unbanked;
 		chips[0].gpio_irq = bank_irq;
@@ -425,19 +527,18 @@
 
 		/* AINTC handles mask/unmask; GPIO handles triggering */
 		irq = bank_irq;
-		gpio_unbanked = *container_of(irq_get_chip(irq),
-					      struct irq_chip_type, chip);
-		gpio_unbanked.chip.name = "GPIO-AINTC";
-		gpio_unbanked.chip.irq_set_type = gpio_irq_type_unbanked;
+		irq_chip = gpio_get_irq_chip(irq);
+		irq_chip->name = "GPIO-AINTC";
+		irq_chip->irq_set_type = gpio_irq_type_unbanked;
 
 		/* default trigger: both edges */
 		g = gpio2regs(0);
-		__raw_writel(~0, &g->set_falling);
-		__raw_writel(~0, &g->set_rising);
+		writel_relaxed(~0, &g->set_falling);
+		writel_relaxed(~0, &g->set_rising);
 
 		/* set the direct IRQs up to use that irqchip */
 		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++, irq++) {
-			irq_set_chip(irq, &gpio_unbanked.chip);
+			irq_set_chip(irq, irq_chip);
 			irq_set_handler_data(irq, &chips[gpio / 32]);
 			irq_set_status_flags(irq, IRQ_TYPE_EDGE_BOTH);
 		}
@@ -449,15 +550,11 @@
 	 * Or, AINTC can handle IRQs for banks of 16 GPIO IRQs, which we
 	 * then chain through our own handler.
 	 */
-	for (gpio = 0, irq = gpio_to_irq(0), bank = 0;
-			gpio < ngpio;
-			bank++, bank_irq++) {
-		unsigned		i;
-
+	for (gpio = 0, bank = 0; gpio < ngpio; bank++, bank_irq++, gpio += 16) {
 		/* disabled by default, enabled only as needed */
 		g = gpio2regs(gpio);
-		__raw_writel(~0, &g->clr_falling);
-		__raw_writel(~0, &g->clr_rising);
+		writel_relaxed(~0, &g->clr_falling);
+		writel_relaxed(~0, &g->clr_rising);
 
 		/* set up all irqs in this bank */
 		irq_set_chained_handler(bank_irq, gpio_irq_handler);
@@ -469,14 +566,6 @@
 		 */
 		irq_set_handler_data(bank_irq, &chips[gpio / 32]);
 
-		for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) {
-			irq_set_chip(irq, &gpio_irqchip);
-			irq_set_chip_data(irq, (__force void *)g);
-			irq_set_handler_data(irq, (void *)__gpio_mask(gpio));
-			irq_set_handler(irq, handle_simple_irq);
-			set_irq_flags(irq, IRQF_VALID);
-		}
-
 		binten |= BIT(bank);
 	}
 
@@ -485,18 +574,26 @@
 	 * BINTEN -- per-bank interrupt enable. genirq would also let these
 	 * bits be set/cleared dynamically.
 	 */
-	__raw_writel(binten, gpio_base + BINTEN);
-
-	printk(KERN_INFO "DaVinci: %d gpio irqs\n", irq - gpio_to_irq(0));
+	writel_relaxed(binten, gpio_base + BINTEN);
 
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_OF)
+static const struct of_device_id davinci_gpio_ids[] = {
+	{ .compatible = "ti,keystone-gpio", keystone_gpio_get_irq_chip},
+	{ .compatible = "ti,dm6441-gpio", davinci_gpio_get_irq_chip},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, davinci_gpio_ids);
+#endif
+
 static struct platform_driver davinci_gpio_driver = {
 	.probe		= davinci_gpio_probe,
 	.driver		= {
-		.name	= "davinci_gpio",
-		.owner	= THIS_MODULE,
+		.name		= "davinci_gpio",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match_ptr(davinci_gpio_ids),
 	},
 };
 
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 29a11db..7bc3982 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -7,6 +7,17 @@
 
 if MEMORY
 
+config TI_AEMIF
+	tristate "Texas Instruments AEMIF driver"
+	depends on (ARCH_DAVINCI || ARCH_KEYSTONE) && OF
+	help
+	  This driver is for the AEMIF module available in Texas Instruments
+	  SoCs. AEMIF stands for Asynchronous External Memory Interface and
+	  is intended to provide a glue-less interface to a variety of
+	  asynchronuous memory devices like ASRAM, NOR and NAND memory. A total
+	  of 256M bytes of any of these memories can be accessed at a given
+	  time via four chip selects with 64M byte access per chip select.
+
 config TI_EMIF
 	tristate "Texas Instruments EMIF driver"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 969d923..d4e150c 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -5,6 +5,7 @@
 ifeq ($(CONFIG_DDR),y)
 obj-$(CONFIG_OF)		+= of_memory.o
 endif
+obj-$(CONFIG_TI_AEMIF)		+= ti-aemif.o
 obj-$(CONFIG_TI_EMIF)		+= emif.o
 obj-$(CONFIG_MVEBU_DEVBUS)	+= mvebu-devbus.o
 obj-$(CONFIG_TEGRA20_MC)	+= tegra20-mc.o
diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c
new file mode 100644
index 0000000..8d15d87
--- /dev/null
+++ b/drivers/memory/ti-aemif.c
@@ -0,0 +1,429 @@
+/*
+ * TI AEMIF driver
+ *
+ * Copyright (C) 2010 - 2013 Texas Instruments Incorporated. http://www.ti.com/
+ *
+ * Authors:
+ * Murali Karicheri <m-karicheri2@ti.com>
+ * Ivan Khoronzhuk <ivan.khoronzhuk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#define TA_SHIFT	2
+#define RHOLD_SHIFT	4
+#define RSTROBE_SHIFT	7
+#define RSETUP_SHIFT	13
+#define WHOLD_SHIFT	17
+#define WSTROBE_SHIFT	20
+#define WSETUP_SHIFT	26
+#define EW_SHIFT	30
+#define SS_SHIFT	31
+
+#define TA(x)		((x) << TA_SHIFT)
+#define RHOLD(x)	((x) << RHOLD_SHIFT)
+#define RSTROBE(x)	((x) << RSTROBE_SHIFT)
+#define RSETUP(x)	((x) << RSETUP_SHIFT)
+#define WHOLD(x)	((x) << WHOLD_SHIFT)
+#define WSTROBE(x)	((x) << WSTROBE_SHIFT)
+#define WSETUP(x)	((x) << WSETUP_SHIFT)
+#define EW(x)		((x) << EW_SHIFT)
+#define SS(x)		((x) << SS_SHIFT)
+
+#define ASIZE_MAX	0x1
+#define TA_MAX		0x3
+#define RHOLD_MAX	0x7
+#define RSTROBE_MAX	0x3f
+#define RSETUP_MAX	0xf
+#define WHOLD_MAX	0x7
+#define WSTROBE_MAX	0x3f
+#define WSETUP_MAX	0xf
+#define EW_MAX		0x1
+#define SS_MAX		0x1
+#define NUM_CS		4
+
+#define TA_VAL(x)	(((x) & TA(TA_MAX)) >> TA_SHIFT)
+#define RHOLD_VAL(x)	(((x) & RHOLD(RHOLD_MAX)) >> RHOLD_SHIFT)
+#define RSTROBE_VAL(x)	(((x) & RSTROBE(RSTROBE_MAX)) >> RSTROBE_SHIFT)
+#define RSETUP_VAL(x)	(((x) & RSETUP(RSETUP_MAX)) >> RSETUP_SHIFT)
+#define WHOLD_VAL(x)	(((x) & WHOLD(WHOLD_MAX)) >> WHOLD_SHIFT)
+#define WSTROBE_VAL(x)	(((x) & WSTROBE(WSTROBE_MAX)) >> WSTROBE_SHIFT)
+#define WSETUP_VAL(x)	(((x) & WSETUP(WSETUP_MAX)) >> WSETUP_SHIFT)
+#define EW_VAL(x)	(((x) & EW(EW_MAX)) >> EW_SHIFT)
+#define SS_VAL(x)	(((x) & SS(SS_MAX)) >> SS_SHIFT)
+
+#define NRCSR_OFFSET	0x00
+#define AWCCR_OFFSET	0x04
+#define A1CR_OFFSET	0x10
+
+#define ACR_ASIZE_MASK	0x3
+#define ACR_EW_MASK	BIT(30)
+#define ACR_SS_MASK	BIT(31)
+#define ASIZE_16BIT	1
+
+#define CONFIG_MASK	(TA(TA_MAX) | \
+				RHOLD(RHOLD_MAX) | \
+				RSTROBE(RSTROBE_MAX) |	\
+				RSETUP(RSETUP_MAX) | \
+				WHOLD(WHOLD_MAX) | \
+				WSTROBE(WSTROBE_MAX) | \
+				WSETUP(WSETUP_MAX) | \
+				EW(EW_MAX) | SS(SS_MAX) | \
+				ASIZE_MAX)
+
+#define DRV_NAME	"ti-aemif"
+
+/**
+ * struct aemif_cs_data: structure to hold cs parameters
+ * @cs: chip-select number
+ * @wstrobe: write strobe width, ns
+ * @rstrobe: read strobe width, ns
+ * @wsetup: write setup width, ns
+ * @whold: write hold width, ns
+ * @rsetup: read setup width, ns
+ * @rhold: read hold width, ns
+ * @ta: minimum turn around time, ns
+ * @enable_ss: enable/disable select strobe mode
+ * @enable_ew: enable/disable extended wait mode
+ * @asize: width of the asynchronous device's data bus
+ */
+struct aemif_cs_data {
+	u8	cs;
+	u16	wstrobe;
+	u16	rstrobe;
+	u8	wsetup;
+	u8	whold;
+	u8	rsetup;
+	u8	rhold;
+	u8	ta;
+	u8	enable_ss;
+	u8	enable_ew;
+	u8	asize;
+};
+
+/**
+ * struct aemif_device: structure to hold device data
+ * @base: base address of AEMIF registers
+ * @clk: source clock
+ * @clk_rate: clock's rate in kHz
+ * @num_cs: number of assigned chip-selects
+ * @cs_offset: start number of cs nodes
+ * @cs_data: array of chip-select settings
+ */
+struct aemif_device {
+	void __iomem *base;
+	struct clk *clk;
+	unsigned long clk_rate;
+	u8 num_cs;
+	int cs_offset;
+	struct aemif_cs_data cs_data[NUM_CS];
+};
+
+/**
+ * aemif_calc_rate - calculate timing data.
+ * @pdev: platform device to calculate for
+ * @wanted: The cycle time needed in nanoseconds.
+ * @clk: The input clock rate in kHz.
+ * @max: The maximum divider value that can be programmed.
+ *
+ * On success, returns the calculated timing value minus 1 for easy
+ * programming into AEMIF timing registers, else negative errno.
+ */
+static int aemif_calc_rate(struct platform_device *pdev, int wanted,
+			   unsigned long clk, int max)
+{
+	int result;
+
+	result = DIV_ROUND_UP((wanted * clk), NSEC_PER_MSEC) - 1;
+
+	dev_dbg(&pdev->dev, "%s: result %d from %ld, %d\n", __func__, result,
+		clk, wanted);
+
+	/* It is generally OK to have a more relaxed timing than requested... */
+	if (result < 0)
+		result = 0;
+
+	/* ... But configuring tighter timings is not an option. */
+	else if (result > max)
+		result = -EINVAL;
+
+	return result;
+}
+
+/**
+ * aemif_config_abus - configure async bus parameters
+ * @pdev: platform device to configure for
+ * @csnum: aemif chip select number
+ *
+ * This function programs the given timing values (in real clock) into the
+ * AEMIF registers taking the AEMIF clock into account.
+ *
+ * This function does not use any locking while programming the AEMIF
+ * because it is expected that there is only one user of a given
+ * chip-select.
+ *
+ * Returns 0 on success, else negative errno.
+ */
+static int aemif_config_abus(struct platform_device *pdev, int csnum)
+{
+	struct aemif_device *aemif = platform_get_drvdata(pdev);
+	struct aemif_cs_data *data = &aemif->cs_data[csnum];
+	int ta, rhold, rstrobe, rsetup, whold, wstrobe, wsetup;
+	unsigned long clk_rate = aemif->clk_rate;
+	unsigned offset;
+	u32 set, val;
+
+	offset = A1CR_OFFSET + (data->cs - aemif->cs_offset) * 4;
+
+	ta	= aemif_calc_rate(pdev, data->ta, clk_rate, TA_MAX);
+	rhold	= aemif_calc_rate(pdev, data->rhold, clk_rate, RHOLD_MAX);
+	rstrobe	= aemif_calc_rate(pdev, data->rstrobe, clk_rate, RSTROBE_MAX);
+	rsetup	= aemif_calc_rate(pdev, data->rsetup, clk_rate, RSETUP_MAX);
+	whold	= aemif_calc_rate(pdev, data->whold, clk_rate, WHOLD_MAX);
+	wstrobe	= aemif_calc_rate(pdev, data->wstrobe, clk_rate, WSTROBE_MAX);
+	wsetup	= aemif_calc_rate(pdev, data->wsetup, clk_rate, WSETUP_MAX);
+
+	if (ta < 0 || rhold < 0 || rstrobe < 0 || rsetup < 0 ||
+	    whold < 0 || wstrobe < 0 || wsetup < 0) {
+		dev_err(&pdev->dev, "%s: cannot get suitable timings\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	set = TA(ta) | RHOLD(rhold) | RSTROBE(rstrobe) | RSETUP(rsetup) |
+		WHOLD(whold) | WSTROBE(wstrobe) | WSETUP(wsetup);
+
+	set |= (data->asize & ACR_ASIZE_MASK);
+	if (data->enable_ew)
+		set |= ACR_EW_MASK;
+	if (data->enable_ss)
+		set |= ACR_SS_MASK;
+
+	val = readl(aemif->base + offset);
+	val &= ~CONFIG_MASK;
+	val |= set;
+	writel(val, aemif->base + offset);
+
+	return 0;
+}
+
+static inline int aemif_cycles_to_nsec(int val, unsigned long clk_rate)
+{
+	return ((val + 1) * NSEC_PER_MSEC) / clk_rate;
+}
+
+/**
+ * aemif_get_hw_params - function to read hw register values
+ * @pdev: platform device to read for
+ * @csnum: aemif chip select number
+ *
+ * This function reads the defaults from the registers and update
+ * the timing values. Required for get/set commands and also for
+ * the case when driver needs to use defaults in hardware.
+ */
+static void aemif_get_hw_params(struct platform_device *pdev, int csnum)
+{
+	struct aemif_device *aemif = platform_get_drvdata(pdev);
+	struct aemif_cs_data *data = &aemif->cs_data[csnum];
+	unsigned long clk_rate = aemif->clk_rate;
+	u32 val, offset;
+
+	offset = A1CR_OFFSET + (data->cs - aemif->cs_offset) * 4;
+	val = readl(aemif->base + offset);
+
+	data->ta = aemif_cycles_to_nsec(TA_VAL(val), clk_rate);
+	data->rhold = aemif_cycles_to_nsec(RHOLD_VAL(val), clk_rate);
+	data->rstrobe = aemif_cycles_to_nsec(RSTROBE_VAL(val), clk_rate);
+	data->rsetup = aemif_cycles_to_nsec(RSETUP_VAL(val), clk_rate);
+	data->whold = aemif_cycles_to_nsec(WHOLD_VAL(val), clk_rate);
+	data->wstrobe = aemif_cycles_to_nsec(WSTROBE_VAL(val), clk_rate);
+	data->wsetup = aemif_cycles_to_nsec(WSETUP_VAL(val), clk_rate);
+	data->enable_ew = EW_VAL(val);
+	data->enable_ss = SS_VAL(val);
+	data->asize = val & ASIZE_MAX;
+}
+
+/**
+ * of_aemif_parse_abus_config - parse CS configuration from DT
+ * @pdev: platform device to parse for
+ * @np: device node ptr
+ *
+ * This function update the emif async bus configuration based on the values
+ * configured in a cs device binding node.
+ */
+static int of_aemif_parse_abus_config(struct platform_device *pdev,
+				      struct device_node *np)
+{
+	struct aemif_device *aemif = platform_get_drvdata(pdev);
+	struct aemif_cs_data *data;
+	u32 cs;
+	u32 val;
+
+	if (of_property_read_u32(np, "ti,cs-chipselect", &cs)) {
+		dev_dbg(&pdev->dev, "cs property is required");
+		return -EINVAL;
+	}
+
+	if (cs - aemif->cs_offset >= NUM_CS || cs < aemif->cs_offset) {
+		dev_dbg(&pdev->dev, "cs number is incorrect %d", cs);
+		return -EINVAL;
+	}
+
+	if (aemif->num_cs >= NUM_CS) {
+		dev_dbg(&pdev->dev, "cs count is more than %d", NUM_CS);
+		return -EINVAL;
+	}
+
+	data = &aemif->cs_data[aemif->num_cs];
+	data->cs = cs;
+
+	/* read the current value in the hw register */
+	aemif_get_hw_params(pdev, aemif->num_cs++);
+
+	/* override the values from device node */
+	if (!of_property_read_u32(np, "ti,cs-ta", &val))
+		data->ta = val;
+
+	if (!of_property_read_u32(np, "ti,cs-rhold", &val))
+		data->rhold = val;
+
+	if (!of_property_read_u32(np, "ti,cs-rstrobe", &val))
+		data->rstrobe = val;
+
+	if (!of_property_read_u32(np, "ti,cs-rsetup", &val))
+		data->rsetup = val;
+
+	if (!of_property_read_u32(np, "ti,cs-whold", &val))
+		data->whold = val;
+
+	if (!of_property_read_u32(np, "ti,cs-wstrobe", &val))
+		data->wstrobe = val;
+
+	if (!of_property_read_u32(np, "ti,cs-wsetup", &val))
+		data->wsetup = val;
+
+	if (!of_property_read_u32(np, "ti,bus-width", &val))
+		if (val == 16)
+			data->asize = 1;
+	data->enable_ew = of_property_read_bool(np, "ti,cs-ew");
+	data->enable_ss = of_property_read_bool(np, "ti,cs-ss");
+	return 0;
+}
+
+static const struct of_device_id aemif_of_match[] = {
+	{ .compatible = "ti,davinci-aemif", },
+	{ .compatible = "ti,da850-aemif", },
+	{},
+};
+
+static int aemif_probe(struct platform_device *pdev)
+{
+	int ret  = -ENODEV, i;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *child_np;
+	struct aemif_device *aemif;
+
+	if (np == NULL)
+		return 0;
+
+	aemif = devm_kzalloc(dev, sizeof(*aemif), GFP_KERNEL);
+	if (!aemif) {
+		dev_err(dev, "cannot allocate memory for aemif\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, aemif);
+
+	aemif->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(aemif->clk)) {
+		dev_err(dev, "cannot get clock 'aemif'\n");
+		return PTR_ERR(aemif->clk);
+	}
+
+	clk_prepare_enable(aemif->clk);
+	aemif->clk_rate = clk_get_rate(aemif->clk) / MSEC_PER_SEC;
+
+	if (of_device_is_compatible(np, "ti,da850-aemif"))
+		aemif->cs_offset = 2;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	aemif->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(aemif->base)) {
+		ret = PTR_ERR(aemif->base);
+		goto error;
+	}
+
+	/*
+	 * For every controller device node, there is a cs device node that
+	 * describe the bus configuration parameters. This functions iterate
+	 * over these nodes and update the cs data array.
+	 */
+	for_each_available_child_of_node(np, child_np) {
+		ret = of_aemif_parse_abus_config(pdev, child_np);
+		if (ret < 0)
+			goto error;
+	}
+
+	for (i = 0; i < aemif->num_cs; i++) {
+		ret = aemif_config_abus(pdev, i);
+		if (ret < 0) {
+			dev_err(dev, "Error configuring chip select %d\n",
+				aemif->cs_data[i].cs);
+			goto error;
+		}
+	}
+
+	/*
+	 * Create a child devices explicitly from here to
+	 * guarantee that the child will be probed after the AEMIF timing
+	 * parameters are set.
+	 */
+	for_each_available_child_of_node(np, child_np) {
+		ret = of_platform_populate(child_np, NULL, NULL, dev);
+		if (ret < 0)
+			goto error;
+	}
+
+	return 0;
+error:
+	clk_disable_unprepare(aemif->clk);
+	return ret;
+}
+
+static int aemif_remove(struct platform_device *pdev)
+{
+	struct aemif_device *aemif = platform_get_drvdata(pdev);
+	clk_disable_unprepare(aemif->clk);
+	return 0;
+}
+
+static struct platform_driver aemif_driver = {
+	.probe = aemif_probe,
+	.remove = aemif_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(aemif_of_match),
+	},
+};
+
+module_platform_driver(aemif_driver);
+
+MODULE_AUTHOR("Murali Karicheri <m-karicheri2@ti.com>");
+MODULE_AUTHOR("Ivan Khoronzhuk <ivan.khoronzhuk@ti.com>");
+MODULE_DESCRIPTION("Texas Instruments AEMIF driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 357bbc5..3e049c1 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -197,7 +197,7 @@
 	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
-		limit = dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
+		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
 	mq->card = card;
 	mq->queue = blk_init_queue(mmc_request_fn, lock);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 93ae6a6..35f3ea3 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -464,11 +464,11 @@
 	  for NAND Flash using FLCTL.
 
 config MTD_NAND_DAVINCI
-        tristate "Support NAND on DaVinci SoC"
-        depends on ARCH_DAVINCI
+        tristate "Support NAND on DaVinci/Keystone SoC"
+        depends on ARCH_DAVINCI || (ARCH_KEYSTONE && TI_AEMIF)
         help
 	  Enable the driver for NAND flash chips on Texas Instruments
-	  DaVinci processors.
+	  DaVinci/Keystone processors.
 
 config MTD_NAND_TXX9NDFMC
 	tristate "NAND Flash support for TXx9 SoC"
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index b77a01e..936f1df 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
+#include <linux/of_mtd.h>
 
 #include <linux/platform_data/mtd-davinci.h>
 #include <linux/platform_data/mtd-davinci-aemif.h>
@@ -487,7 +488,7 @@
  * ten ECC bytes plus the manufacturer's bad block marker byte, and
  * and not overlapping the default BBT markers.
  */
-static struct nand_ecclayout hwecc4_small __initconst = {
+static struct nand_ecclayout hwecc4_small = {
 	.eccbytes = 10,
 	.eccpos = { 0, 1, 2, 3, 4,
 		/* offset 5 holds the badblock marker */
@@ -503,7 +504,7 @@
  * storing ten ECC bytes plus the manufacturer's bad block marker byte,
  * and not overlapping the default BBT markers.
  */
-static struct nand_ecclayout hwecc4_2048 __initconst = {
+static struct nand_ecclayout hwecc4_2048 = {
 	.eccbytes = 40,
 	.eccpos = {
 		/* at the end of spare sector */
@@ -534,17 +535,19 @@
 		struct davinci_nand_pdata *pdata;
 		const char *mode;
 		u32 prop;
-		int len;
 
 		pdata =  devm_kzalloc(&pdev->dev,
 				sizeof(struct davinci_nand_pdata),
 				GFP_KERNEL);
 		pdev->dev.platform_data = pdata;
 		if (!pdata)
-			return NULL;
+			return ERR_PTR(-ENOMEM);
 		if (!of_property_read_u32(pdev->dev.of_node,
 			"ti,davinci-chipselect", &prop))
 			pdev->id = prop;
+		else
+			return ERR_PTR(-EINVAL);
+
 		if (!of_property_read_u32(pdev->dev.of_node,
 			"ti,davinci-mask-ale", &prop))
 			pdata->mask_ale = prop;
@@ -555,6 +558,8 @@
 			"ti,davinci-mask-chipsel", &prop))
 			pdata->mask_chipsel = prop;
 		if (!of_property_read_string(pdev->dev.of_node,
+			"nand-ecc-mode", &mode) ||
+		    !of_property_read_string(pdev->dev.of_node,
 			"ti,davinci-ecc-mode", &mode)) {
 			if (!strncmp("none", mode, 4))
 				pdata->ecc_mode = NAND_ECC_NONE;
@@ -566,12 +571,16 @@
 		if (!of_property_read_u32(pdev->dev.of_node,
 			"ti,davinci-ecc-bits", &prop))
 			pdata->ecc_bits = prop;
-		if (!of_property_read_u32(pdev->dev.of_node,
+
+		prop = of_get_nand_bus_width(pdev->dev.of_node);
+		if (0 < prop || !of_property_read_u32(pdev->dev.of_node,
 			"ti,davinci-nand-buswidth", &prop))
 			if (prop == 16)
 				pdata->options |= NAND_BUSWIDTH_16;
-		if (of_find_property(pdev->dev.of_node,
-			"ti,davinci-nand-use-bbt", &len))
+		if (of_property_read_bool(pdev->dev.of_node,
+			"nand-on-flash-bbt") ||
+		    of_property_read_bool(pdev->dev.of_node,
+			"ti,davinci-nand-use-bbt"))
 			pdata->bbt_options = NAND_BBT_USE_FLASH;
 	}
 
@@ -585,7 +594,7 @@
 }
 #endif
 
-static int __init nand_davinci_probe(struct platform_device *pdev)
+static int nand_davinci_probe(struct platform_device *pdev)
 {
 	struct davinci_nand_pdata	*pdata;
 	struct davinci_nand_info	*info;
@@ -598,6 +607,9 @@
 	nand_ecc_modes_t		ecc_mode;
 
 	pdata = nand_davinci_get_pdata(pdev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+
 	/* insist on board-specific configuration */
 	if (!pdata)
 		return -ENODEV;
@@ -609,8 +621,7 @@
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		dev_err(&pdev->dev, "unable to allocate memory\n");
-		ret = -ENOMEM;
-		goto err_nomem;
+		return -ENOMEM;
 	}
 
 	platform_set_drvdata(pdev, info);
@@ -619,19 +630,23 @@
 	res2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (!res1 || !res2) {
 		dev_err(&pdev->dev, "resource missing\n");
-		ret = -EINVAL;
-		goto err_nomem;
+		return -EINVAL;
 	}
 
 	vaddr = devm_ioremap_resource(&pdev->dev, res1);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto err_ioremap;
-	}
-	base = devm_ioremap_resource(&pdev->dev, res2);
-	if (IS_ERR(base)) {
-		ret = PTR_ERR(base);
-		goto err_ioremap;
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	/*
+	 * This registers range is used to setup NAND settings. In case with
+	 * TI AEMIF driver, the same memory address range is requested already
+	 * by AEMIF, so we cannot request it twice, just ioremap.
+	 * The AEMIF and NAND drivers not use the same registers in this range.
+	 */
+	base = devm_ioremap(&pdev->dev, res2->start, resource_size(res2));
+	if (!base) {
+		dev_err(&pdev->dev, "ioremap failed for resource %pR\n", res2);
+		return -EADDRNOTAVAIL;
 	}
 
 	info->dev		= &pdev->dev;
@@ -699,7 +714,7 @@
 			spin_unlock_irq(&davinci_nand_lock);
 
 			if (ret == -EBUSY)
-				goto err_ecc;
+				return ret;
 
 			info->chip.ecc.calculate = nand_davinci_calculate_4bit;
 			info->chip.ecc.correct = nand_davinci_correct_4bit;
@@ -715,8 +730,7 @@
 		info->chip.ecc.strength = pdata->ecc_bits;
 		break;
 	default:
-		ret = -EINVAL;
-		goto err_ecc;
+		return -EINVAL;
 	}
 	info->chip.ecc.mode = ecc_mode;
 
@@ -724,7 +738,7 @@
 	if (IS_ERR(info->clk)) {
 		ret = PTR_ERR(info->clk);
 		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
-		goto err_clk;
+		return ret;
 	}
 
 	ret = clk_prepare_enable(info->clk);
@@ -734,28 +748,6 @@
 		goto err_clk_enable;
 	}
 
-	/*
-	 * Setup Async configuration register in case we did not boot from
-	 * NAND and so bootloader did not bother to set it up.
-	 */
-	val = davinci_nand_readl(info, A1CR_OFFSET + info->core_chipsel * 4);
-
-	/* Extended Wait is not valid and Select Strobe mode is not used */
-	val &= ~(ACR_ASIZE_MASK | ACR_EW_MASK | ACR_SS_MASK);
-	if (info->chip.options & NAND_BUSWIDTH_16)
-		val |= 0x1;
-
-	davinci_nand_writel(info, A1CR_OFFSET + info->core_chipsel * 4, val);
-
-	ret = 0;
-	if (info->timing)
-		ret = davinci_aemif_setup_timing(info->timing, info->base,
-							info->core_chipsel);
-	if (ret < 0) {
-		dev_dbg(&pdev->dev, "NAND timing values setup fail\n");
-		goto err_timing;
-	}
-
 	spin_lock_irq(&davinci_nand_lock);
 
 	/* put CSxNAND into NAND mode */
@@ -769,7 +761,7 @@
 	ret = nand_scan_ident(&info->mtd, pdata->mask_chipsel ? 2 : 1, NULL);
 	if (ret < 0) {
 		dev_dbg(&pdev->dev, "no NAND chip(s) found\n");
-		goto err_scan;
+		goto err;
 	}
 
 	/* Update ECC layout if needed ... for 1-bit HW ECC, the default
@@ -783,7 +775,7 @@
 		if (!chunks || info->mtd.oobsize < 16) {
 			dev_dbg(&pdev->dev, "too small\n");
 			ret = -EINVAL;
-			goto err_scan;
+			goto err;
 		}
 
 		/* For small page chips, preserve the manufacturer's
@@ -814,7 +806,7 @@
 		dev_warn(&pdev->dev, "no 4-bit ECC support yet "
 				"for 4KiB-page NAND\n");
 		ret = -EIO;
-		goto err_scan;
+		goto err;
 
 syndrome_done:
 		info->chip.ecc.layout = &info->ecclayout;
@@ -822,7 +814,7 @@
 
 	ret = nand_scan_tail(&info->mtd);
 	if (ret < 0)
-		goto err_scan;
+		goto err;
 
 	if (pdata->parts)
 		ret = mtd_device_parse_register(&info->mtd, NULL, NULL,
@@ -835,7 +827,7 @@
 						NULL, 0);
 	}
 	if (ret < 0)
-		goto err_scan;
+		goto err;
 
 	val = davinci_nand_readl(info, NRCSR_OFFSET);
 	dev_info(&pdev->dev, "controller rev. %d.%d\n",
@@ -843,8 +835,7 @@
 
 	return 0;
 
-err_scan:
-err_timing:
+err:
 	clk_disable_unprepare(info->clk);
 
 err_clk_enable:
@@ -852,15 +843,10 @@
 	if (ecc_mode == NAND_ECC_HW_SYNDROME)
 		ecc4_busy = false;
 	spin_unlock_irq(&davinci_nand_lock);
-
-err_ecc:
-err_clk:
-err_ioremap:
-err_nomem:
 	return ret;
 }
 
-static int __exit nand_davinci_remove(struct platform_device *pdev)
+static int nand_davinci_remove(struct platform_device *pdev)
 {
 	struct davinci_nand_info *info = platform_get_drvdata(pdev);
 
@@ -877,7 +863,8 @@
 }
 
 static struct platform_driver nand_davinci_driver = {
-	.remove		= __exit_p(nand_davinci_remove),
+	.probe		= nand_davinci_probe,
+	.remove		= nand_davinci_remove,
 	.driver		= {
 		.name	= "davinci_nand",
 		.owner	= THIS_MODULE,
@@ -886,7 +873,7 @@
 };
 MODULE_ALIAS("platform:davinci_nand");
 
-module_platform_driver_probe(nand_davinci_driver, nand_davinci_probe);
+module_platform_driver(nand_davinci_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Texas Instruments");
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7bd7f0d..62ec84b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1684,7 +1684,7 @@
 
 	host_dev = scsi_get_device(shost);
 	if (host_dev && host_dev->dma_mask)
-		bounce_limit = dma_max_pfn(host_dev) << PAGE_SHIFT;
+		bounce_limit = (u64)dma_max_pfn(host_dev) << PAGE_SHIFT;
 
 	return bounce_limit;
 }
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index eb1f1ef..58bcff0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -159,7 +159,6 @@
 	tristate "Texas Instruments DaVinci/DA8x/OMAP-L/AM1x SoC SPI controller"
 	depends on ARCH_DAVINCI || ARCH_KEYSTONE
 	select SPI_BITBANG
-	select TI_EDMA
 	help
 	  SPI master controller for DaVinci/DA8x/OMAP-L/AM1x SPI modules.
 
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 50b2d88..5e7389f 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -396,10 +396,6 @@
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = &dspi->pdata;
 
-	/* if bits per word length is zero then set it default 8 */
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (!(spi->mode & SPI_NO_CS)) {
 		if ((pdata->chip_sel == NULL) ||
 		    (pdata->chip_sel[spi->chip_select] == SPI_INTERN_CS))
@@ -853,7 +849,7 @@
 	struct spi_master *master;
 	struct davinci_spi *dspi;
 	struct davinci_spi_platform_data *pdata;
-	struct resource *r, *mem;
+	struct resource *r;
 	resource_size_t dma_rx_chan = SPI_NO_RESOURCE;
 	resource_size_t	dma_tx_chan = SPI_NO_RESOURCE;
 	int i = 0, ret = 0;
@@ -894,39 +890,33 @@
 
 	dspi->pbase = r->start;
 
-	mem = request_mem_region(r->start, resource_size(r), pdev->name);
-	if (mem == NULL) {
-		ret = -EBUSY;
+	dspi->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(dspi->base)) {
+		ret = PTR_ERR(dspi->base);
 		goto free_master;
 	}
 
-	dspi->base = ioremap(r->start, resource_size(r));
-	if (dspi->base == NULL) {
-		ret = -ENOMEM;
-		goto release_region;
-	}
-
 	dspi->irq = platform_get_irq(pdev, 0);
 	if (dspi->irq <= 0) {
 		ret = -EINVAL;
-		goto unmap_io;
+		goto free_master;
 	}
 
-	ret = request_threaded_irq(dspi->irq, davinci_spi_irq, dummy_thread_fn,
-				 0, dev_name(&pdev->dev), dspi);
+	ret = devm_request_threaded_irq(&pdev->dev, dspi->irq, davinci_spi_irq,
+				dummy_thread_fn, 0, dev_name(&pdev->dev), dspi);
 	if (ret)
-		goto unmap_io;
+		goto free_master;
 
 	dspi->bitbang.master = master;
 	if (dspi->bitbang.master == NULL) {
 		ret = -ENODEV;
-		goto irq_free;
+		goto free_master;
 	}
 
-	dspi->clk = clk_get(&pdev->dev, NULL);
+	dspi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dspi->clk)) {
 		ret = -ENODEV;
-		goto irq_free;
+		goto free_master;
 	}
 	clk_prepare_enable(dspi->clk);
 
@@ -963,8 +953,8 @@
 			goto free_clk;
 
 		dev_info(&pdev->dev, "DMA: supported\n");
-		dev_info(&pdev->dev, "DMA: RX channel: %d, TX channel: %d, "
-				"event queue: %d\n", dma_rx_chan, dma_tx_chan,
+		dev_info(&pdev->dev, "DMA: RX channel: %pa, TX channel: %pa, "
+				"event queue: %d\n", &dma_rx_chan, &dma_tx_chan,
 				pdata->dma_event_q);
 	}
 
@@ -1015,13 +1005,6 @@
 	dma_release_channel(dspi->dma_tx);
 free_clk:
 	clk_disable_unprepare(dspi->clk);
-	clk_put(dspi->clk);
-irq_free:
-	free_irq(dspi->irq, dspi);
-unmap_io:
-	iounmap(dspi->base);
-release_region:
-	release_mem_region(dspi->pbase, resource_size(r));
 free_master:
 	spi_master_put(master);
 err:
@@ -1041,7 +1024,6 @@
 {
 	struct davinci_spi *dspi;
 	struct spi_master *master;
-	struct resource *r;
 
 	master = platform_get_drvdata(pdev);
 	dspi = spi_master_get_devdata(master);
@@ -1049,11 +1031,6 @@
 	spi_bitbang_stop(&dspi->bitbang);
 
 	clk_disable_unprepare(dspi->clk);
-	clk_put(dspi->clk);
-	free_irq(dspi->irq, dspi);
-	iounmap(dspi->base);
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(dspi->pbase, resource_size(r));
 	spi_master_put(master);
 
 	return 0;
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 8641b03..649f9d1 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -373,9 +373,6 @@
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	return dspi_setup_transfer(spi, NULL);
 }
 
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 3adebfa..79e5aa2 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -111,14 +111,6 @@
 	return 0;
 }
 
-static int mxs_spi_setup(struct spi_device *dev)
-{
-	if (!dev->bits_per_word)
-		dev->bits_per_word = 8;
-
-	return 0;
-}
-
 static u32 mxs_spi_cs_to_reg(unsigned cs)
 {
 	u32 select = 0;
@@ -502,7 +494,6 @@
 		return -ENOMEM;
 
 	master->transfer_one_message = mxs_spi_transfer_one;
-	master->setup = mxs_spi_setup;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->mode_bits = SPI_CPOL | SPI_CPHA;
 	master->num_chipselect = 3;
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 9e829ce..9769bff 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -771,8 +771,6 @@
 {
 	struct rspi_data *rspi = spi_master_get_devdata(spi->master);
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
 	rspi->max_speed_hz = spi->max_speed_hz;
 
 	set_config_register(rspi, 8);
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 9eda21d..c4e561f 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -254,9 +254,6 @@
 
 static int sc18is602_setup(struct spi_device *spi)
 {
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (spi->mode & ~(SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST))
 		return -EINVAL;
 
diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c
index c120a70..86a17d6 100644
--- a/drivers/spi/spi-sh.c
+++ b/drivers/spi/spi-sh.c
@@ -358,9 +358,6 @@
 {
 	struct spi_sh_data *ss = spi_master_get_devdata(spi->master);
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	pr_debug("%s: enter\n", __func__);
 
 	spi_sh_write(ss, 0xfe, SPI_SH_CR1);	/* SPI sycle stop */
diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c
index ed5e501..e430689 100644
--- a/drivers/spi/spi-sirf.c
+++ b/drivers/spi/spi-sirf.c
@@ -536,16 +536,9 @@
 
 static int spi_sirfsoc_setup(struct spi_device *spi)
 {
-	struct sirfsoc_spi *sspi;
-
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
-	sspi = spi_master_get_devdata(spi->master);
-
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	return spi_sirfsoc_setup_transfer(spi, NULL);
 }
 
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 4461313..9322de9 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -466,12 +466,6 @@
 
 static int pch_spi_setup(struct spi_device *pspi)
 {
-	/* check bits per word */
-	if (pspi->bits_per_word == 0) {
-		pspi->bits_per_word = 8;
-		dev_dbg(&pspi->dev, "%s 8 bits per word\n", __func__);
-	}
-
 	/* Check baud rate setting */
 	/* if baud rate of chip is greater than
 	   max we can support,return error */
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 70fc430..e2c730f 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -70,6 +70,13 @@
 	  One such PCIe-based platform is Synopsys' PCIe HAPS model of
 	  this IP.
 
+config USB_DWC3_KEYSTONE
+	tristate "Texas Instruments Keystone2 Platforms"
+	default USB_DWC3
+	help
+	  Support of USB2/3 functionality in TI Keystone2 platforms.
+	  Say 'Y' or 'M' here if you have one such device
+
 comment "Debugging features"
 
 config USB_DWC3_DEBUG
diff --git a/drivers/usb/dwc3/Makefile b/drivers/usb/dwc3/Makefile
index dd17601..10ac3e7 100644
--- a/drivers/usb/dwc3/Makefile
+++ b/drivers/usb/dwc3/Makefile
@@ -32,3 +32,4 @@
 obj-$(CONFIG_USB_DWC3_OMAP)		+= dwc3-omap.o
 obj-$(CONFIG_USB_DWC3_EXYNOS)		+= dwc3-exynos.o
 obj-$(CONFIG_USB_DWC3_PCI)		+= dwc3-pci.o
+obj-$(CONFIG_USB_DWC3_KEYSTONE)		+= dwc3-keystone.o
diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c
new file mode 100644
index 0000000..1fad161
--- /dev/null
+++ b/drivers/usb/dwc3/dwc3-keystone.c
@@ -0,0 +1,202 @@
+/**
+ * dwc3-keystone.c - Keystone Specific Glue layer
+ *
+ * Copyright (C) 2010-2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: WingMan Kwok <w-kwok2@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+/* USBSS register offsets */
+#define USBSS_REVISION		0x0000
+#define USBSS_SYSCONFIG		0x0010
+#define USBSS_IRQ_EOI		0x0018
+#define USBSS_IRQSTATUS_RAW_0	0x0020
+#define USBSS_IRQSTATUS_0	0x0024
+#define USBSS_IRQENABLE_SET_0	0x0028
+#define USBSS_IRQENABLE_CLR_0	0x002c
+
+/* IRQ register bits */
+#define USBSS_IRQ_EOI_LINE(n)	BIT(n)
+#define USBSS_IRQ_EVENT_ST	BIT(0)
+#define USBSS_IRQ_COREIRQ_EN	BIT(0)
+#define USBSS_IRQ_COREIRQ_CLR	BIT(0)
+
+static u64 kdwc3_dma_mask;
+
+struct dwc3_keystone {
+	struct device			*dev;
+	struct clk			*clk;
+	void __iomem			*usbss;
+};
+
+static inline u32 kdwc3_readl(void __iomem *base, u32 offset)
+{
+	return readl(base + offset);
+}
+
+static inline void kdwc3_writel(void __iomem *base, u32 offset, u32 value)
+{
+	writel(value, base + offset);
+}
+
+static void kdwc3_enable_irqs(struct dwc3_keystone *kdwc)
+{
+	u32 val;
+
+	val = kdwc3_readl(kdwc->usbss, USBSS_IRQENABLE_SET_0);
+	val |= USBSS_IRQ_COREIRQ_EN;
+	kdwc3_writel(kdwc->usbss, USBSS_IRQENABLE_SET_0, val);
+}
+
+static void kdwc3_disable_irqs(struct dwc3_keystone *kdwc)
+{
+	u32 val;
+
+	val = kdwc3_readl(kdwc->usbss, USBSS_IRQENABLE_SET_0);
+	val &= ~USBSS_IRQ_COREIRQ_EN;
+	kdwc3_writel(kdwc->usbss, USBSS_IRQENABLE_SET_0, val);
+}
+
+static irqreturn_t dwc3_keystone_interrupt(int irq, void *_kdwc)
+{
+	struct dwc3_keystone	*kdwc = _kdwc;
+
+	kdwc3_writel(kdwc->usbss, USBSS_IRQENABLE_CLR_0, USBSS_IRQ_COREIRQ_CLR);
+	kdwc3_writel(kdwc->usbss, USBSS_IRQSTATUS_0, USBSS_IRQ_EVENT_ST);
+	kdwc3_writel(kdwc->usbss, USBSS_IRQENABLE_SET_0, USBSS_IRQ_COREIRQ_EN);
+	kdwc3_writel(kdwc->usbss, USBSS_IRQ_EOI, USBSS_IRQ_EOI_LINE(0));
+
+	return IRQ_HANDLED;
+}
+
+static int kdwc3_probe(struct platform_device *pdev)
+{
+	struct device		*dev = &pdev->dev;
+	struct device_node	*node = pdev->dev.of_node;
+	struct dwc3_keystone	*kdwc;
+	struct resource		*res;
+	int			error, irq;
+
+	kdwc = devm_kzalloc(dev, sizeof(*kdwc), GFP_KERNEL);
+	if (!kdwc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, kdwc);
+
+	kdwc->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "missing usbss resource\n");
+		return -EINVAL;
+	}
+
+	kdwc->usbss = devm_ioremap_resource(dev, res);
+	if (IS_ERR(kdwc->usbss))
+		return PTR_ERR(kdwc->usbss);
+
+	kdwc3_dma_mask = dma_get_mask(dev);
+	dev->dma_mask = &kdwc3_dma_mask;
+
+	kdwc->clk = devm_clk_get(kdwc->dev, "usb");
+
+	error = clk_prepare_enable(kdwc->clk);
+	if (error < 0) {
+		dev_dbg(kdwc->dev, "unable to enable usb clock, err %d\n",
+			error);
+		return error;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "missing irq\n");
+		goto err_irq;
+	}
+
+	error = devm_request_irq(dev, irq, dwc3_keystone_interrupt, IRQF_SHARED,
+			dev_name(dev), kdwc);
+	if (error) {
+		dev_err(dev, "failed to request IRQ #%d --> %d\n",
+				irq, error);
+		goto err_irq;
+	}
+
+	kdwc3_enable_irqs(kdwc);
+
+	error = of_platform_populate(node, NULL, NULL, dev);
+	if (error) {
+		dev_err(&pdev->dev, "failed to create dwc3 core\n");
+		goto err_core;
+	}
+
+	return 0;
+
+err_core:
+	kdwc3_disable_irqs(kdwc);
+err_irq:
+	clk_disable_unprepare(kdwc->clk);
+
+	return error;
+}
+
+static int kdwc3_remove_core(struct device *dev, void *c)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	platform_device_unregister(pdev);
+
+	return 0;
+}
+
+static int kdwc3_remove(struct platform_device *pdev)
+{
+	struct dwc3_keystone *kdwc = platform_get_drvdata(pdev);
+
+	kdwc3_disable_irqs(kdwc);
+	device_for_each_child(&pdev->dev, NULL, kdwc3_remove_core);
+	clk_disable_unprepare(kdwc->clk);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id kdwc3_of_match[] = {
+	{ .compatible = "ti,keystone-dwc3", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, kdwc3_of_match);
+
+static struct platform_driver kdwc3_driver = {
+	.probe		= kdwc3_probe,
+	.remove		= kdwc3_remove,
+	.driver		= {
+		.name	= "keystone-dwc3",
+		.owner	        = THIS_MODULE,
+		.of_match_table	= kdwc3_of_match,
+	},
+};
+
+module_platform_driver(kdwc3_driver);
+
+MODULE_ALIAS("platform:keystone-dwc3");
+MODULE_AUTHOR("WingMan Kwok <w-kwok2@ti.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DesignWare USB3 KEYSTONE Glue Layer");
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 2b41c63..38ad4fc 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -42,6 +42,15 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called isp1301_omap.
 
+config KEYSTONE_USB_PHY
+	tristate "Keystone USB PHY Driver"
+	depends on ARCH_KEYSTONE || COMPILE_TEST
+	select NOP_USB_XCEIV
+	help
+	  Enable this to support Keystone USB phy. This driver provides
+	  interface to interact with USB 2.0 and USB 3.0 PHY that is part
+	  of the Keystone SOC.
+
 config MV_U3D_PHY
 	bool "Marvell USB 3.0 PHY controller Driver"
 	depends on CPU_MMP3
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 022c1da..311b47b 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -30,3 +30,4 @@
 obj-$(CONFIG_USB_RCAR_GEN2_PHY)		+= phy-rcar-gen2-usb.o
 obj-$(CONFIG_USB_ULPI)			+= phy-ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)		+= phy-ulpi-viewport.o
+obj-$(CONFIG_KEYSTONE_USB_PHY)		+= phy-keystone.o
diff --git a/drivers/usb/phy/phy-keystone.c b/drivers/usb/phy/phy-keystone.c
new file mode 100644
index 0000000..533db12
--- /dev/null
+++ b/drivers/usb/phy/phy-keystone.c
@@ -0,0 +1,141 @@
+/*
+ * phy-keystone - USB PHY, talking to dwc3 controller in Keystone.
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Author: WingMan Kwok <w-kwok2@ti.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/io.h>
+#include <linux/of.h>
+
+#include "phy-generic.h"
+
+/* USB PHY control register offsets */
+#define USB_PHY_CTL_UTMI		0x0000
+#define USB_PHY_CTL_PIPE		0x0004
+#define USB_PHY_CTL_PARAM_1		0x0008
+#define USB_PHY_CTL_PARAM_2		0x000c
+#define USB_PHY_CTL_CLOCK		0x0010
+#define USB_PHY_CTL_PLL			0x0014
+
+#define PHY_REF_SSP_EN			BIT(29)
+
+struct keystone_usbphy {
+	struct usb_phy_gen_xceiv	usb_phy_gen;
+	void __iomem			*phy_ctrl;
+};
+
+static inline u32 keystone_usbphy_readl(void __iomem *base, u32 offset)
+{
+	return readl(base + offset);
+}
+
+static inline void keystone_usbphy_writel(void __iomem *base,
+					  u32 offset, u32 value)
+{
+	writel(value, base + offset);
+}
+
+static int keystone_usbphy_init(struct usb_phy *phy)
+{
+	struct keystone_usbphy *k_phy = dev_get_drvdata(phy->dev);
+	u32 val;
+
+	val  = keystone_usbphy_readl(k_phy->phy_ctrl, USB_PHY_CTL_CLOCK);
+	keystone_usbphy_writel(k_phy->phy_ctrl, USB_PHY_CTL_CLOCK,
+				val | PHY_REF_SSP_EN);
+	return 0;
+}
+
+static void keystone_usbphy_shutdown(struct usb_phy *phy)
+{
+	struct keystone_usbphy *k_phy = dev_get_drvdata(phy->dev);
+	u32 val;
+
+	val  = keystone_usbphy_readl(k_phy->phy_ctrl, USB_PHY_CTL_CLOCK);
+	keystone_usbphy_writel(k_phy->phy_ctrl, USB_PHY_CTL_CLOCK,
+				val &= ~PHY_REF_SSP_EN);
+}
+
+static int keystone_usbphy_probe(struct platform_device *pdev)
+{
+	struct device		*dev = &pdev->dev;
+	struct keystone_usbphy	*k_phy;
+	struct resource		*res;
+	int ret;
+
+	k_phy = devm_kzalloc(dev, sizeof(*k_phy), GFP_KERNEL);
+	if (!k_phy)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "missing usb phy resource\n");
+		return -EINVAL;
+	}
+
+	k_phy->phy_ctrl = devm_ioremap_resource(dev, res);
+	if (IS_ERR(k_phy->phy_ctrl))
+		return PTR_ERR(k_phy->phy_ctrl);
+
+	ret = usb_phy_gen_create_phy(dev, &k_phy->usb_phy_gen, NULL);
+	if (ret)
+		return ret;
+
+	k_phy->usb_phy_gen.phy.init = keystone_usbphy_init;
+	k_phy->usb_phy_gen.phy.shutdown = keystone_usbphy_shutdown;
+
+	platform_set_drvdata(pdev, k_phy);
+
+	ret = usb_add_phy_dev(&k_phy->usb_phy_gen.phy);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int keystone_usbphy_remove(struct platform_device *pdev)
+{
+	struct keystone_usbphy *k_phy = platform_get_drvdata(pdev);
+
+	usb_remove_phy(&k_phy->usb_phy_gen.phy);
+
+	return 0;
+}
+
+static const struct of_device_id keystone_usbphy_ids[] = {
+	{ .compatible = "ti,keystone-usbphy" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, keystone_usbphy_ids);
+
+static struct platform_driver keystone_usbphy_driver = {
+	.probe          = keystone_usbphy_probe,
+	.remove         = keystone_usbphy_remove,
+	.driver         = {
+		.name   = "keystone-usbphy",
+		.owner  = THIS_MODULE,
+		.of_match_table = of_match_ptr(keystone_usbphy_ids),
+	},
+};
+
+module_platform_driver(keystone_usbphy_driver);
+
+MODULE_ALIAS("platform:keystone-usbphy");
+MODULE_AUTHOR("Texas Instruments Inc.");
+MODULE_DESCRIPTION("Keystone USB phy driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5be6e91..2958e09 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -270,10 +270,11 @@
 
 config DAVINCI_WATCHDOG
 	tristate "DaVinci watchdog"
-	depends on ARCH_DAVINCI
+	depends on ARCH_DAVINCI || ARCH_KEYSTONE
+	select WATCHDOG_CORE
 	help
 	  Say Y here if to include support for the watchdog timer
-	  in the DaVinci DM644x/DM646x processors.
+	  in the DaVinci DM644x/DM646x or Keystone processors.
 	  To compile this driver as a module, choose M here: the
 	  module will be called davinci_wdt.
 
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index dd625cc..b1bae03 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -3,7 +3,7 @@
  *
  * Watchdog driver for DaVinci DM644x/DM646x processors
  *
- * Copyright (C) 2006 Texas Instruments.
+ * Copyright (C) 2006-2013 Texas Instruments.
  *
  * 2007 (c) MontaVista Software, Inc. This file is licensed under
  * the terms of the GNU General Public License version 2. This program
@@ -15,18 +15,12 @@
 #include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/init.h>
-#include <linux/bitops.h>
 #include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/device.h>
 #include <linux/clk.h>
-#include <linux/slab.h>
 #include <linux/err.h>
 
 #define MODULE_NAME "DAVINCI-WDT: "
@@ -61,142 +55,103 @@
 #define WDKEY_SEQ0		(0xa5c6 << 16)
 #define WDKEY_SEQ1		(0xda7e << 16)
 
-static int heartbeat = DEFAULT_HEARTBEAT;
+static int heartbeat;
 
-static DEFINE_SPINLOCK(io_lock);
-static unsigned long wdt_status;
-#define WDT_IN_USE        0
-#define WDT_OK_TO_CLOSE   1
-#define WDT_REGION_INITED 2
-#define WDT_DEVICE_INITED 3
+/*
+ * struct to hold data for each WDT device
+ * @base - base io address of WD device
+ * @clk - source clock of WDT
+ * @wdd - hold watchdog device as is in WDT core
+ */
+struct davinci_wdt_device {
+	void __iomem		*base;
+	struct clk		*clk;
+	struct watchdog_device	wdd;
+};
 
-static void __iomem	*wdt_base;
-struct clk		*wdt_clk;
-
-static void wdt_service(void)
-{
-	spin_lock(&io_lock);
-
-	/* put watchdog in service state */
-	iowrite32(WDKEY_SEQ0, wdt_base + WDTCR);
-	/* put watchdog in active state */
-	iowrite32(WDKEY_SEQ1, wdt_base + WDTCR);
-
-	spin_unlock(&io_lock);
-}
-
-static void wdt_enable(void)
+static int davinci_wdt_start(struct watchdog_device *wdd)
 {
 	u32 tgcr;
 	u32 timer_margin;
 	unsigned long wdt_freq;
+	struct davinci_wdt_device *davinci_wdt = watchdog_get_drvdata(wdd);
 
-	wdt_freq = clk_get_rate(wdt_clk);
-
-	spin_lock(&io_lock);
+	wdt_freq = clk_get_rate(davinci_wdt->clk);
 
 	/* disable, internal clock source */
-	iowrite32(0, wdt_base + TCR);
+	iowrite32(0, davinci_wdt->base + TCR);
 	/* reset timer, set mode to 64-bit watchdog, and unreset */
-	iowrite32(0, wdt_base + TGCR);
+	iowrite32(0, davinci_wdt->base + TGCR);
 	tgcr = TIMMODE_64BIT_WDOG | TIM12RS_UNRESET | TIM34RS_UNRESET;
-	iowrite32(tgcr, wdt_base + TGCR);
+	iowrite32(tgcr, davinci_wdt->base + TGCR);
 	/* clear counter regs */
-	iowrite32(0, wdt_base + TIM12);
-	iowrite32(0, wdt_base + TIM34);
+	iowrite32(0, davinci_wdt->base + TIM12);
+	iowrite32(0, davinci_wdt->base + TIM34);
 	/* set timeout period */
-	timer_margin = (((u64)heartbeat * wdt_freq) & 0xffffffff);
-	iowrite32(timer_margin, wdt_base + PRD12);
-	timer_margin = (((u64)heartbeat * wdt_freq) >> 32);
-	iowrite32(timer_margin, wdt_base + PRD34);
+	timer_margin = (((u64)wdd->timeout * wdt_freq) & 0xffffffff);
+	iowrite32(timer_margin, davinci_wdt->base + PRD12);
+	timer_margin = (((u64)wdd->timeout * wdt_freq) >> 32);
+	iowrite32(timer_margin, davinci_wdt->base + PRD34);
 	/* enable run continuously */
-	iowrite32(ENAMODE12_PERIODIC, wdt_base + TCR);
+	iowrite32(ENAMODE12_PERIODIC, davinci_wdt->base + TCR);
 	/* Once the WDT is in pre-active state write to
 	 * TIM12, TIM34, PRD12, PRD34, TCR, TGCR, WDTCR are
 	 * write protected (except for the WDKEY field)
 	 */
 	/* put watchdog in pre-active state */
-	iowrite32(WDKEY_SEQ0 | WDEN, wdt_base + WDTCR);
+	iowrite32(WDKEY_SEQ0 | WDEN, davinci_wdt->base + WDTCR);
 	/* put watchdog in active state */
-	iowrite32(WDKEY_SEQ1 | WDEN, wdt_base + WDTCR);
-
-	spin_unlock(&io_lock);
-}
-
-static int davinci_wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(WDT_IN_USE, &wdt_status))
-		return -EBUSY;
-
-	wdt_enable();
-
-	return nonseekable_open(inode, file);
-}
-
-static ssize_t
-davinci_wdt_write(struct file *file, const char *data, size_t len,
-		  loff_t *ppos)
-{
-	if (len)
-		wdt_service();
-
-	return len;
-}
-
-static const struct watchdog_info ident = {
-	.options = WDIOF_KEEPALIVEPING,
-	.identity = "DaVinci Watchdog",
-};
-
-static long davinci_wdt_ioctl(struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	int ret = -ENOTTY;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		ret = copy_to_user((struct watchdog_info *)arg, &ident,
-				   sizeof(ident)) ? -EFAULT : 0;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		ret = put_user(0, (int *)arg);
-		break;
-
-	case WDIOC_KEEPALIVE:
-		wdt_service();
-		ret = 0;
-		break;
-
-	case WDIOC_GETTIMEOUT:
-		ret = put_user(heartbeat, (int *)arg);
-		break;
-	}
-	return ret;
-}
-
-static int davinci_wdt_release(struct inode *inode, struct file *file)
-{
-	wdt_service();
-	clear_bit(WDT_IN_USE, &wdt_status);
-
+	iowrite32(WDKEY_SEQ1 | WDEN, davinci_wdt->base + WDTCR);
 	return 0;
 }
 
-static const struct file_operations davinci_wdt_fops = {
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.write = davinci_wdt_write,
-	.unlocked_ioctl = davinci_wdt_ioctl,
-	.open = davinci_wdt_open,
-	.release = davinci_wdt_release,
+static int davinci_wdt_ping(struct watchdog_device *wdd)
+{
+	struct davinci_wdt_device *davinci_wdt = watchdog_get_drvdata(wdd);
+
+	/* put watchdog in service state */
+	iowrite32(WDKEY_SEQ0, davinci_wdt->base + WDTCR);
+	/* put watchdog in active state */
+	iowrite32(WDKEY_SEQ1, davinci_wdt->base + WDTCR);
+	return 0;
+}
+
+static unsigned int davinci_wdt_get_timeleft(struct watchdog_device *wdd)
+{
+	u64 timer_counter;
+	unsigned long freq;
+	u32 val;
+	struct davinci_wdt_device *davinci_wdt = watchdog_get_drvdata(wdd);
+
+	/* if timeout has occured then return 0 */
+	val = ioread32(davinci_wdt->base + WDTCR);
+	if (val & WDFLAG)
+		return 0;
+
+	freq = clk_get_rate(davinci_wdt->clk);
+
+	if (!freq)
+		return 0;
+
+	timer_counter = ioread32(davinci_wdt->base + TIM12);
+	timer_counter |= ((u64)ioread32(davinci_wdt->base + TIM34) << 32);
+
+	do_div(timer_counter, freq);
+
+	return wdd->timeout - timer_counter;
+}
+
+static const struct watchdog_info davinci_wdt_info = {
+	.options = WDIOF_KEEPALIVEPING,
+	.identity = "DaVinci/Keystone Watchdog",
 };
 
-static struct miscdevice davinci_wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &davinci_wdt_fops,
+static const struct watchdog_ops davinci_wdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= davinci_wdt_start,
+	.stop		= davinci_wdt_ping,
+	.ping		= davinci_wdt_ping,
+	.get_timeleft	= davinci_wdt_get_timeleft,
 };
 
 static int davinci_wdt_probe(struct platform_device *pdev)
@@ -204,37 +159,53 @@
 	int ret = 0;
 	struct device *dev = &pdev->dev;
 	struct resource  *wdt_mem;
+	struct watchdog_device *wdd;
+	struct davinci_wdt_device *davinci_wdt;
 
-	wdt_clk = devm_clk_get(dev, NULL);
-	if (WARN_ON(IS_ERR(wdt_clk)))
-		return PTR_ERR(wdt_clk);
+	davinci_wdt = devm_kzalloc(dev, sizeof(*davinci_wdt), GFP_KERNEL);
+	if (!davinci_wdt)
+		return -ENOMEM;
 
-	clk_prepare_enable(wdt_clk);
+	davinci_wdt->clk = devm_clk_get(dev, NULL);
+	if (WARN_ON(IS_ERR(davinci_wdt->clk)))
+		return PTR_ERR(davinci_wdt->clk);
 
-	if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT)
-		heartbeat = DEFAULT_HEARTBEAT;
+	clk_prepare_enable(davinci_wdt->clk);
 
-	dev_info(dev, "heartbeat %d sec\n", heartbeat);
+	platform_set_drvdata(pdev, davinci_wdt);
+
+	wdd			= &davinci_wdt->wdd;
+	wdd->info		= &davinci_wdt_info;
+	wdd->ops		= &davinci_wdt_ops;
+	wdd->min_timeout	= 1;
+	wdd->max_timeout	= MAX_HEARTBEAT;
+	wdd->timeout		= DEFAULT_HEARTBEAT;
+
+	watchdog_init_timeout(wdd, heartbeat, dev);
+
+	dev_info(dev, "heartbeat %d sec\n", wdd->timeout);
+
+	watchdog_set_drvdata(wdd, davinci_wdt);
+	watchdog_set_nowayout(wdd, 1);
 
 	wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	wdt_base = devm_ioremap_resource(dev, wdt_mem);
-	if (IS_ERR(wdt_base))
-		return PTR_ERR(wdt_base);
+	davinci_wdt->base = devm_ioremap_resource(dev, wdt_mem);
+	if (IS_ERR(davinci_wdt->base))
+		return PTR_ERR(davinci_wdt->base);
 
-	ret = misc_register(&davinci_wdt_miscdev);
-	if (ret < 0) {
-		dev_err(dev, "cannot register misc device\n");
-	} else {
-		set_bit(WDT_DEVICE_INITED, &wdt_status);
-	}
+	ret = watchdog_register_device(wdd);
+	if (ret < 0)
+		dev_err(dev, "cannot register watchdog device\n");
 
 	return ret;
 }
 
 static int davinci_wdt_remove(struct platform_device *pdev)
 {
-	misc_deregister(&davinci_wdt_miscdev);
-	clk_disable_unprepare(wdt_clk);
+	struct davinci_wdt_device *davinci_wdt = platform_get_drvdata(pdev);
+
+	watchdog_unregister_device(&davinci_wdt->wdd);
+	clk_disable_unprepare(davinci_wdt->clk);
 
 	return 0;
 }
@@ -247,7 +218,7 @@
 
 static struct platform_driver platform_wdt_driver = {
 	.driver = {
-		.name = "watchdog",
+		.name = "davinci-wdt",
 		.owner	= THIS_MODULE,
 		.of_match_table = davinci_wdt_of_match,
 	},
@@ -267,4 +238,4 @@
 		 __MODULE_STRING(DEFAULT_HEARTBEAT));
 
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:watchdog");
+MODULE_ALIAS("platform:davinci-wdt");
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f1f07d3..db51fe4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -5,6 +5,7 @@
 #define _LINUX_BOOTMEM_H
 
 #include <linux/mmzone.h>
+#include <linux/mm_types.h>
 #include <asm/dma.h>
 
 /*
@@ -52,7 +53,6 @@
 			      unsigned long size);
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
@@ -142,6 +142,194 @@
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
+
+#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
+
+/* FIXME: use MEMBLOCK_ALLOC_* variants here */
+#define BOOTMEM_ALLOC_ACCESSIBLE	0
+#define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
+
+/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
+void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
+		phys_addr_t align, phys_addr_t min_addr,
+		phys_addr_t max_addr, int nid);
+void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+
+static inline void * __init memblock_virt_alloc(
+					phys_addr_t size,  phys_addr_t align)
+{
+	return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
+					    BOOTMEM_ALLOC_ACCESSIBLE,
+					    NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+					phys_addr_t size, phys_addr_t align)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, align,
+						    BOOTMEM_LOW_LIMIT,
+						    BOOTMEM_ALLOC_ACCESSIBLE,
+						    NUMA_NO_NODE);
+}
+
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
+#endif
+
+static inline void * __init memblock_virt_alloc_low(
+					phys_addr_t size, phys_addr_t align)
+{
+	return memblock_virt_alloc_try_nid(size, align,
+						   BOOTMEM_LOW_LIMIT,
+						   ARCH_LOW_ADDRESS_LIMIT,
+						   NUMA_NO_NODE);
+}
+static inline void * __init memblock_virt_alloc_low_nopanic(
+					phys_addr_t size, phys_addr_t align)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, align,
+						   BOOTMEM_LOW_LIMIT,
+						   ARCH_LOW_ADDRESS_LIMIT,
+						   NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
+						    BOOTMEM_ALLOC_ACCESSIBLE,
+						    NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+						phys_addr_t size, int nid)
+{
+	return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+						phys_addr_t size, int nid)
+{
+	return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
+						    BOOTMEM_ALLOC_ACCESSIBLE,
+						    nid);
+}
+
+static inline void __init memblock_free_early(
+					phys_addr_t base, phys_addr_t size)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+				phys_addr_t base, phys_addr_t size, int nid)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_late(
+					phys_addr_t base, phys_addr_t size)
+{
+	__memblock_free_late(base, size);
+}
+
+#else
+
+#define BOOTMEM_ALLOC_ACCESSIBLE	0
+
+
+/* Fall back to all the existing bootmem APIs */
+static inline void * __init memblock_virt_alloc(
+					phys_addr_t size,  phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_nopanic(
+					phys_addr_t size, phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_low(
+					phys_addr_t size, phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem_low(size, align, 0);
+}
+
+static inline void * __init memblock_virt_alloc_low_nopanic(
+					phys_addr_t size, phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem_low_nopanic(size, align, 0);
+}
+
+static inline void * __init memblock_virt_alloc_from_nopanic(
+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+	return __alloc_bootmem_nopanic(size, align, min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_node(
+						phys_addr_t size, int nid)
+{
+	return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
+				     BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_node_nopanic(
+						phys_addr_t size, int nid)
+{
+	return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+					     SMP_CACHE_BYTES,
+					     BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
+	phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+	return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
+					  min_addr);
+}
+
+static inline void * __init memblock_virt_alloc_try_nid_nopanic(
+			phys_addr_t size, phys_addr_t align,
+			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
+{
+	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
+				min_addr, max_addr);
+}
+
+static inline void __init memblock_free_early(
+					phys_addr_t base, phys_addr_t size)
+{
+	free_bootmem(base, size);
+}
+
+static inline void __init memblock_free_early_nid(
+				phys_addr_t base, phys_addr_t size, int nid)
+{
+	free_bootmem_node(NODE_DATA(nid), base, size);
+}
+
+static inline void __init memblock_free_late(
+					phys_addr_t base, phys_addr_t size)
+{
+	free_bootmem_late(base, size);
+}
+#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
+
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
 #else
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 77c60e5..ae0d71c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -47,11 +47,13 @@
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
-				phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
+					    phys_addr_t start, phys_addr_t end,
+					    int nid);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
+phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
@@ -87,7 +89,7 @@
 /**
  * for_each_free_mem_range - iterate through free memblock areas
  * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
@@ -107,7 +109,7 @@
 /**
  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
  * @i: u64 used as loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 6efd202..0c3551b 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -34,7 +34,7 @@
 
 struct davinci_gpio_controller {
 	struct gpio_chip	chip;
-	int			irq_base;
+	struct irq_domain	*irq_domain;
 	/* Serialize access to GPIO registers */
 	spinlock_t		lock;
 	void __iomem		*regs;
diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h
index 05b2934..97948ac 100644
--- a/include/linux/platform_data/mtd-davinci-aemif.h
+++ b/include/linux/platform_data/mtd-davinci-aemif.h
@@ -10,6 +10,8 @@
 #ifndef _MACH_DAVINCI_AEMIF_H
 #define _MACH_DAVINCI_AEMIF_H
 
+#include <linux/platform_device.h>
+
 #define NRCSR_OFFSET		0x00
 #define AWCCR_OFFSET		0x04
 #define A1CR_OFFSET		0x10
@@ -31,6 +33,5 @@
 	u8	ta;
 };
 
-int davinci_aemif_setup_timing(struct davinci_aemif_timing *t,
-					void __iomem *base, unsigned cs);
+int davinci_aemif_setup(struct platform_device *pdev);
 #endif
diff --git a/init/main.c b/init/main.c
index febc511..d3e3cff 100644
--- a/init/main.c
+++ b/init/main.c
@@ -355,9 +355,11 @@
  */
 static void __init setup_command_line(char *command_line)
 {
-	saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
-	initcall_command_line = alloc_bootmem(strlen (boot_command_line)+1);
-	static_command_line = alloc_bootmem(strlen (command_line)+1);
+	saved_command_line =
+		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+	initcall_command_line =
+		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
 	strcpy (saved_command_line, boot_command_line);
 	strcpy (static_command_line, command_line);
 }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b38109e..d9f61a1 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -637,7 +637,7 @@
 		BUG_ON(!region);
 	} else
 		/* This allocation cannot fail */
-		region = alloc_bootmem(sizeof(struct nosave_region));
+		region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index be7c86b..f8b41bd 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -757,14 +757,10 @@
 		return;
 
 	if (early) {
-		unsigned long mem;
-
-		mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
-		if (!mem)
-			return;
-		new_log_buf = __va(mem);
+		new_log_buf =
+			memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
 	} else {
-		new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
+		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
 	}
 
 	if (unlikely(!new_log_buf)) {
diff --git a/lib/cpumask.c b/lib/cpumask.c
index d327b87..b810b75 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -140,7 +140,7 @@
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = alloc_bootmem(cpumask_size());
+	*mask = memblock_virt_alloc(cpumask_size(), 0);
 }
 
 /**
@@ -161,6 +161,6 @@
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	free_bootmem(__pa(mask), cpumask_size());
+	memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index e4399fa..42a1cf4 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -172,8 +172,9 @@
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
-						PAGE_ALIGN(io_tlb_overflow));
+	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
+						PAGE_ALIGN(io_tlb_overflow),
+						PAGE_SIZE);
 	if (!v_overflow_buffer)
 		return -ENOMEM;
 
@@ -184,11 +185,15 @@
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+	io_tlb_list = memblock_virt_alloc(
+				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
+				PAGE_SIZE);
 	for (i = 0; i < io_tlb_nslabs; i++)
  		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
 	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+	io_tlb_orig_addr = memblock_virt_alloc(
+				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
+				PAGE_SIZE);
 
 	if (verbose)
 		swiotlb_print_info();
@@ -215,13 +220,13 @@
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
+	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
 	if (io_tlb_start)
-		free_bootmem(io_tlb_start,
-				 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_early(io_tlb_start,
+				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	pr_warn("Cannot allocate SWIOTLB buffer");
 	no_iotlb_memory = true;
 }
@@ -357,14 +362,14 @@
 		free_pages((unsigned long)phys_to_virt(io_tlb_start),
 			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
 	} else {
-		free_bootmem_late(io_tlb_overflow_buffer,
-				  PAGE_ALIGN(io_tlb_overflow));
-		free_bootmem_late(__pa(io_tlb_orig_addr),
-				  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_bootmem_late(__pa(io_tlb_list),
-				  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		free_bootmem_late(io_tlb_start,
-				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_late(io_tlb_overflow_buffer,
+				   PAGE_ALIGN(io_tlb_overflow));
+		memblock_free_late(__pa(io_tlb_orig_addr),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(io_tlb_list),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+		memblock_free_late(io_tlb_start,
+				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
 	io_tlb_nslabs = 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dee6cf4..e16c56e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1280,9 +1280,9 @@
 	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
 		void *addr;
 
-		addr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
-				huge_page_size(h), huge_page_size(h), 0);
-
+		addr = memblock_virt_alloc_try_nid_nopanic(
+				huge_page_size(h), huge_page_size(h),
+				0, BOOTMEM_ALLOC_ACCESSIBLE, node);
 		if (addr) {
 			/*
 			 * Use the beginning of the huge page to store the
@@ -1322,8 +1322,8 @@
 
 #ifdef CONFIG_HIGHMEM
 		page = pfn_to_page(m->phys >> PAGE_SHIFT);
-		free_bootmem_late((unsigned long)m,
-				  sizeof(struct huge_bootmem_page));
+		memblock_free_late(__pa(m),
+				   sizeof(struct huge_bootmem_page));
 #else
 		page = virt_to_page(m);
 #endif
diff --git a/mm/memblock.c b/mm/memblock.c
index 53e477b..96e86c0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -21,6 +21,9 @@
 #include <linux/memblock.h>
 
 #include <asm-generic/sections.h>
+#include <linux/io.h>
+
+#include "internal.h"
 
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
@@ -91,7 +94,7 @@
  * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Utility called from memblock_find_in_range_node(), find free area bottom-up.
  *
@@ -123,7 +126,7 @@
  * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
@@ -154,11 +157,11 @@
 
 /**
  * memblock_find_in_range_node - find free area in given range and node
- * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
  * @size: size of free area to find
  * @align: alignment of free area to find
- * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
@@ -173,9 +176,9 @@
  * RETURNS:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
-					phys_addr_t end, phys_addr_t size,
-					phys_addr_t align, int nid)
+phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+					phys_addr_t align, phys_addr_t start,
+					phys_addr_t end, int nid)
 {
 	int ret;
 	phys_addr_t kernel_end;
@@ -238,8 +241,8 @@
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align)
 {
-	return memblock_find_in_range_node(start, end, size, align,
-					   MAX_NUMNODES);
+	return memblock_find_in_range_node(size, align, start, end,
+					    NUMA_NO_NODE);
 }
 
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
@@ -259,6 +262,8 @@
 	}
 }
 
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+
 phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
 					phys_addr_t *addr)
 {
@@ -271,6 +276,20 @@
 			  memblock.reserved.max);
 }
 
+phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
+					phys_addr_t *addr)
+{
+	if (memblock.memory.regions == memblock_memory_init_regions)
+		return 0;
+
+	*addr = __pa(memblock.memory.regions);
+
+	return PAGE_ALIGN(sizeof(struct memblock_region) *
+			  memblock.memory.max);
+}
+
+#endif
+
 /**
  * memblock_double_array - double the size of the memblock regions array
  * @type: memblock type of the regions array being doubled
@@ -643,7 +662,7 @@
 {
 	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
 		     (unsigned long long)base,
-		     (unsigned long long)base + size,
+		     (unsigned long long)base + size - 1,
 		     (void *)_RET_IP_);
 
 	return __memblock_remove(&memblock.reserved, base, size);
@@ -655,7 +674,7 @@
 
 	memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
 		     (unsigned long long)base,
-		     (unsigned long long)base + size,
+		     (unsigned long long)base + size - 1,
 		     (void *)_RET_IP_);
 
 	return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
@@ -664,7 +683,7 @@
 /**
  * __next_free_mem_range - next function for for_each_free_mem_range()
  * @idx: pointer to u64 loop variable
- * @nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
@@ -693,13 +712,16 @@
 	int mi = *idx & 0xffffffff;
 	int ri = *idx >> 32;
 
+	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+		nid = NUMA_NO_NODE;
+
 	for ( ; mi < mem->cnt; mi++) {
 		struct memblock_region *m = &mem->regions[mi];
 		phys_addr_t m_start = m->base;
 		phys_addr_t m_end = m->base + m->size;
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
 			continue;
 
 		/* scan areas before each reservation for intersection */
@@ -740,7 +762,7 @@
 /**
  * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
  * @idx: pointer to u64 loop variable
- * @nid: nid: node selector, %MAX_NUMNODES for all nodes
+ * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
@@ -756,6 +778,9 @@
 	int mi = *idx & 0xffffffff;
 	int ri = *idx >> 32;
 
+	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+		nid = NUMA_NO_NODE;
+
 	if (*idx == (u64)ULLONG_MAX) {
 		mi = mem->cnt - 1;
 		ri = rsv->cnt;
@@ -767,7 +792,7 @@
 		phys_addr_t m_end = m->base + m->size;
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
 			continue;
 
 		/* scan areas before each reservation for intersection */
@@ -870,13 +895,10 @@
 {
 	phys_addr_t found;
 
-	if (WARN_ON(!align))
-		align = __alignof__(long long);
+	if (!align)
+		align = SMP_CACHE_BYTES;
 
-	/* align @size to avoid excessive fragmentation on reserved array */
-	size = round_up(size, align);
-
-	found = memblock_find_in_range_node(0, max_addr, size, align, nid);
+	found = memblock_find_in_range_node(size, align, 0, max_addr, nid);
 	if (found && !memblock_reserve(found, size))
 		return found;
 
@@ -890,7 +912,7 @@
 
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
-	return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
+	return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE);
 }
 
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
@@ -920,6 +942,207 @@
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
+/**
+ * memblock_virt_alloc_internal - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region to allocate (phys address)
+ * @max_addr: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The @min_addr limit is dropped if it can not be satisfied and the allocation
+ * will fall back to memory below @min_addr. Also, allocation may fall back
+ * to any node in the system if the specified node can not
+ * hold the requested memory.
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
+ *
+ * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
+ *
+ * The phys address of allocated boot memory block is converted to virtual and
+ * allocated memory is reset to 0.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+static void * __init memblock_virt_alloc_internal(
+				phys_addr_t size, phys_addr_t align,
+				phys_addr_t min_addr, phys_addr_t max_addr,
+				int nid)
+{
+	phys_addr_t alloc;
+	void *ptr;
+
+	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+		nid = NUMA_NO_NODE;
+
+	/*
+	 * Detect any accidental use of these APIs after slab is ready, as at
+	 * this moment memblock may be deinitialized already and its
+	 * internal data may be destroyed (after execution of free_all_bootmem)
+	 */
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, nid);
+
+	if (!align)
+		align = SMP_CACHE_BYTES;
+
+	if (max_addr > memblock.current_limit)
+		max_addr = memblock.current_limit;
+
+again:
+	alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
+					    nid);
+	if (alloc)
+		goto done;
+
+	if (nid != NUMA_NO_NODE) {
+		alloc = memblock_find_in_range_node(size, align, min_addr,
+						    max_addr,  NUMA_NO_NODE);
+		if (alloc)
+			goto done;
+	}
+
+	if (min_addr) {
+		min_addr = 0;
+		goto again;
+	} else {
+		goto error;
+	}
+
+done:
+	memblock_reserve(alloc, size);
+	ptr = phys_to_virt(alloc);
+	memset(ptr, 0, size);
+
+	/*
+	 * The min_count is set to 0 so that bootmem allocated blocks
+	 * are never reported as leaks. This is because many of these blocks
+	 * are only referred via the physical address which is not
+	 * looked up by kmemleak.
+	 */
+	kmemleak_alloc(ptr, size, 0, 0);
+
+	return ptr;
+
+error:
+	return NULL;
+}
+
+/**
+ * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ *	  is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
+ *	      allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides
+ * additional debug information (including caller info), if enabled.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_virt_alloc_try_nid_nopanic(
+				phys_addr_t size, phys_addr_t align,
+				phys_addr_t min_addr, phys_addr_t max_addr,
+				int nid)
+{
+	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
+		     __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+		     (u64)max_addr, (void *)_RET_IP_);
+	return memblock_virt_alloc_internal(size, align, min_addr,
+					     max_addr, nid);
+}
+
+/**
+ * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ *	  is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
+ *	      allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public panicking version of _memblock_virt_alloc_try_nid_nopanic()
+ * which provides debug information (including caller info), if enabled,
+ * and panics if the request can not be satisfied.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_virt_alloc_try_nid(
+			phys_addr_t size, phys_addr_t align,
+			phys_addr_t min_addr, phys_addr_t max_addr,
+			int nid)
+{
+	void *ptr;
+
+	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
+		     __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+		     (u64)max_addr, (void *)_RET_IP_);
+	ptr = memblock_virt_alloc_internal(size, align,
+					   min_addr, max_addr, nid);
+	if (ptr)
+		return ptr;
+
+	panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n",
+	      __func__, (u64)size, (u64)align, nid, (u64)min_addr,
+	      (u64)max_addr);
+	return NULL;
+}
+
+/**
+ * __memblock_free_early - free boot memory block
+ * @base: phys starting address of the  boot memory block
+ * @size: size of the boot memory block in bytes
+ *
+ * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
+ * The freeing memory will not be released to the buddy allocator.
+ */
+void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
+{
+	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
+		     __func__, (u64)base, (u64)base + size - 1,
+		     (void *)_RET_IP_);
+	kmemleak_free_part(__va(base), size);
+	__memblock_remove(&memblock.reserved, base, size);
+}
+
+/*
+ * __memblock_free_late - free bootmem block pages directly to buddy allocator
+ * @addr: phys starting address of the  boot memory block
+ * @size: size of the boot memory block in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system.  Pages are released directly
+ * to the buddy allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
+{
+	u64 cursor, end;
+
+	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
+		     __func__, (u64)base, (u64)base + size - 1,
+		     (void *)_RET_IP_);
+	kmemleak_free_part(__va(base), size);
+	cursor = PFN_UP(base);
+	end = PFN_DOWN(base + size);
+
+	for (; cursor < end; cursor++) {
+		__free_pages_bootmem(pfn_to_page(cursor), 0);
+		totalram_pages++;
+	}
+}
 
 /*
  * Remaining API functions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 489f235..4f158ec 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -9,7 +9,6 @@
 #include <linux/swap.h>
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/pagevec.h>
@@ -269,7 +268,7 @@
 }
 
 /* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
- * alloc_bootmem_node_nopanic() */
+ * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
 static int __ref ensure_zone_is_initialized(struct zone *zone,
 			unsigned long start_pfn, unsigned long num_pages)
 {
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 2c254d3..f73f298 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -41,11 +41,13 @@
 	if (limit > memblock.current_limit)
 		limit = memblock.current_limit;
 
-	addr = memblock_find_in_range_node(goal, limit, size, align, nid);
+	addr = memblock_find_in_range_node(size, align, goal, limit, nid);
 	if (!addr)
 		return NULL;
 
-	memblock_reserve(addr, size);
+	if (memblock_reserve(addr, size))
+		return NULL;
+
 	ptr = phys_to_virt(addr);
 	memset(ptr, 0, size);
 	/*
@@ -114,16 +116,27 @@
 static unsigned long __init free_low_memory_core_early(void)
 {
 	unsigned long count = 0;
-	phys_addr_t start, end, size;
+	phys_addr_t start, end;
 	u64 i;
 
-	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
+	for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL)
 		count += __free_memory_core(start, end);
 
-	/* free range that is used for reserved array if we allocate it */
-	size = get_allocated_memblock_reserved_regions_info(&start);
-	if (size)
-		count += __free_memory_core(start, start + size);
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+	{
+		phys_addr_t size;
+
+		/* Free memblock.reserved array if it was allocated */
+		size = get_allocated_memblock_reserved_regions_info(&start);
+		if (size)
+			count += __free_memory_core(start, start + size);
+
+		/* Free memblock.memory array if it was allocated */
+		size = get_allocated_memblock_memory_regions_info(&start);
+		if (size)
+			count += __free_memory_core(start, start + size);
+	}
+#endif
 
 	return count;
 }
@@ -161,7 +174,7 @@
 	reset_all_zones_managed_pages();
 
 	/*
-	 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+	 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
 	 *  because in some case like Node0 doesn't have RAM installed
 	 *  low ram will be on Node1
 	 */
@@ -215,7 +228,7 @@
 
 restart:
 
-	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align, goal, limit);
 
 	if (ptr)
 		return ptr;
@@ -299,7 +312,7 @@
 	if (ptr)
 		return ptr;
 
-	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
+	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align,
 					goal, limit);
 	if (ptr)
 		return ptr;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5248fe0..5974ae2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4209,7 +4209,6 @@
 int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
 	int i;
-	struct pglist_data *pgdat = zone->zone_pgdat;
 	size_t alloc_size;
 
 	/*
@@ -4225,7 +4224,8 @@
 
 	if (!slab_is_available()) {
 		zone->wait_table = (wait_queue_head_t *)
-			alloc_bootmem_node_nopanic(pgdat, alloc_size);
+			memblock_virt_alloc_node_nopanic(
+				alloc_size, zone->zone_pgdat->node_id);
 	} else {
 		/*
 		 * This case means that a zone whose size was 0 gets new memory
@@ -4345,13 +4345,14 @@
 #endif
 
 /**
- * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
+ * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
  * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
- * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
+ * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid
  *
  * If an architecture guarantees that all ranges registered with
  * add_active_ranges() contain no holes and may be freed, this
- * this function may be used instead of calling free_bootmem() manually.
+ * this function may be used instead of calling memblock_free_early_nid()
+ * manually.
  */
 void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
 {
@@ -4363,9 +4364,9 @@
 		end_pfn = min(end_pfn, max_low_pfn);
 
 		if (start_pfn < end_pfn)
-			free_bootmem_node(NODE_DATA(this_nid),
-					  PFN_PHYS(start_pfn),
-					  (end_pfn - start_pfn) << PAGE_SHIFT);
+			memblock_free_early_nid(PFN_PHYS(start_pfn),
+					(end_pfn - start_pfn) << PAGE_SHIFT,
+					this_nid);
 	}
 }
 
@@ -4636,8 +4637,9 @@
 	unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
-		zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
-								   usemapsize);
+		zone->pageblock_flags =
+			memblock_virt_alloc_node_nopanic(usemapsize,
+							 pgdat->node_id);
 }
 #else
 static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
@@ -4831,7 +4833,8 @@
 		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
 		if (!map)
-			map = alloc_bootmem_node_nopanic(pgdat, size);
+			map = memblock_virt_alloc_node_nopanic(size,
+							       pgdat->node_id);
 		pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
 	}
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -5857,7 +5860,7 @@
 	do {
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY)
-			table = alloc_bootmem_nopanic(size);
+			table = memblock_virt_alloc_nopanic(size, 0);
 		else if (hashdist)
 			table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
 		else {
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6d757e3a..d8bd2c5 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -54,8 +54,9 @@
 
 	table_size = sizeof(struct page_cgroup) * nr_pages;
 
-	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	base = memblock_virt_alloc_try_nid_nopanic(
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+			BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	if (!base)
 		return -ENOMEM;
 	NODE_DATA(nid)->node_page_cgroup = base;
diff --git a/mm/percpu.c b/mm/percpu.c
index 0d10def..65fd8a7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1063,7 +1063,7 @@
 			  __alignof__(ai->groups[0].cpu_map[0]));
 	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
 
-	ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
+	ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), 0);
 	if (!ptr)
 		return NULL;
 	ai = ptr;
@@ -1088,7 +1088,7 @@
  */
 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 {
-	free_bootmem(__pa(ai), ai->__ai_size);
+	memblock_free_early(__pa(ai), ai->__ai_size);
 }
 
 /**
@@ -1246,10 +1246,12 @@
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
-	group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
-	group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0]));
-	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
-	unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
+	group_offsets = memblock_virt_alloc(ai->nr_groups *
+					     sizeof(group_offsets[0]), 0);
+	group_sizes = memblock_virt_alloc(ai->nr_groups *
+					   sizeof(group_sizes[0]), 0);
+	unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
+	unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = UINT_MAX;
@@ -1311,7 +1313,8 @@
 	 * empty chunks.
 	 */
 	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-	pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
+	pcpu_slot = memblock_virt_alloc(
+			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
 	for (i = 0; i < pcpu_nr_slots; i++)
 		INIT_LIST_HEAD(&pcpu_slot[i]);
 
@@ -1322,7 +1325,7 @@
 	 * covers static area + reserved area (mostly used for module
 	 * static percpu allocation).
 	 */
-	schunk = alloc_bootmem(pcpu_chunk_struct_size);
+	schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 	INIT_LIST_HEAD(&schunk->list);
 	schunk->base_addr = base_addr;
 	schunk->map = smap;
@@ -1346,7 +1349,7 @@
 
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
-		dchunk = alloc_bootmem(pcpu_chunk_struct_size);
+		dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 		INIT_LIST_HEAD(&dchunk->list);
 		dchunk->base_addr = base_addr;
 		dchunk->map = dmap;
@@ -1626,7 +1629,7 @@
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = alloc_bootmem_nopanic(areas_size);
+	areas = memblock_virt_alloc_nopanic(areas_size, 0);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -1712,7 +1715,7 @@
 out_free:
 	pcpu_free_alloc_info(ai);
 	if (areas)
-		free_bootmem(__pa(areas), areas_size);
+		memblock_free_early(__pa(areas), areas_size);
 	return rc;
 }
 #endif /* BUILD_EMBED_FIRST_CHUNK */
@@ -1760,7 +1763,7 @@
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
 			       sizeof(pages[0]));
-	pages = alloc_bootmem(pages_size);
+	pages = memblock_virt_alloc(pages_size, 0);
 
 	/* allocate pages */
 	j = 0;
@@ -1823,7 +1826,7 @@
 		free_fn(page_address(pages[j]), PAGE_SIZE);
 	rc = -ENOMEM;
 out_free_ar:
-	free_bootmem(__pa(pages), pages_size);
+	memblock_free_early(__pa(pages), pages_size);
 	pcpu_free_alloc_info(ai);
 	return rc;
 }
@@ -1848,12 +1851,13 @@
 static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 				       size_t align)
 {
-	return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
+	return  memblock_virt_alloc_from_nopanic(
+			size, align, __pa(MAX_DMA_ADDRESS));
 }
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free_early(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -1896,7 +1900,9 @@
 	void *fc;
 
 	ai = pcpu_alloc_alloc_info(1, 1);
-	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	fc = memblock_virt_alloc_from_nopanic(unit_size,
+					      PAGE_SIZE,
+					      __pa(MAX_DMA_ADDRESS));
 	if (!ai || !fc)
 		panic("Failed to allocate memory for percpu areas.");
 	/* kmemleak tracks the percpu allocations separately */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 27eeab3..4cba9c2 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,7 +40,8 @@
 				unsigned long align,
 				unsigned long goal)
 {
-	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
+	return memblock_virt_alloc_try_nid(size, align, goal,
+					    BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
 static void *vmemmap_buf;
@@ -226,7 +227,8 @@
 
 	if (vmemmap_buf_start) {
 		/* need to free left buf */
-		free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+		memblock_free_early(__pa(vmemmap_buf),
+				    vmemmap_buf_end - vmemmap_buf);
 		vmemmap_buf = NULL;
 		vmemmap_buf_end = NULL;
 	}
diff --git a/mm/sparse.c b/mm/sparse.c
index 8cc7be0..63c3ea5 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -69,7 +69,7 @@
 		else
 			section = kzalloc(array_size, GFP_KERNEL);
 	} else {
-		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
+		section = memblock_virt_alloc_node(array_size, nid);
 	}
 
 	return section;
@@ -279,8 +279,9 @@
 	limit = goal + (1UL << PA_SECTION_SHIFT);
 	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
-	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
-					  SMP_CACHE_BYTES, goal, limit);
+	p = memblock_virt_alloc_try_nid_nopanic(size,
+						SMP_CACHE_BYTES, goal, limit,
+						nid);
 	if (!p && limit) {
 		limit = 0;
 		goto again;
@@ -331,7 +332,7 @@
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	return alloc_bootmem_node_nopanic(pgdat, size);
+	return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -376,8 +377,9 @@
 		return map;
 
 	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
-	map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
-					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	map = memblock_virt_alloc_try_nid(size,
+					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	return map;
 }
 void __init sparse_mem_maps_populate_node(struct page **map_map,
@@ -401,8 +403,9 @@
 	}
 
 	size = PAGE_ALIGN(size);
-	map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
-					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	map = memblock_virt_alloc_try_nid(size * map_count,
+					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+					  BOOTMEM_ALLOC_ACCESSIBLE, nodeid);
 	if (map) {
 		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 			if (!present_section_nr(pnum))
@@ -545,7 +548,7 @@
 	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
 	 */
 	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
-	usemap_map = alloc_bootmem(size);
+	usemap_map = memblock_virt_alloc(size, 0);
 	if (!usemap_map)
 		panic("can not allocate usemap_map\n");
 	alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
@@ -553,7 +556,7 @@
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
 	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
-	map_map = alloc_bootmem(size2);
+	map_map = memblock_virt_alloc(size2, 0);
 	if (!map_map)
 		panic("can not allocate map_map\n");
 	alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
@@ -583,9 +586,9 @@
 	vmemmap_populate_print_last();
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	free_bootmem(__pa(map_map), size2);
+	memblock_free_early(__pa(map_map), size2);
 #endif
-	free_bootmem(__pa(usemap_map), size);
+	memblock_free_early(__pa(usemap_map), size);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG