Merge tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "A mix of fixes and a few stragglers. In detail:

   - Revert the bogus __read_mostly that we discussed for the initial
     pull request.

   - Fix a merge window regression with fixed file registration error
     path handling.

   - Fix io-wq numa node affinities.

   - Series abstracting out an io_identity struct, making it both easier
     to see what the personality items are, and also easier to to adopt
     more. Use this to cover audit logging.

   - Fix for read-ahead disabled block condition in async buffered
     reads, and using single page read-ahead to unify what
     generic_file_buffer_read() path is used.

   - Series for REQ_F_COMP_LOCKED fix and removal of it (Pavel)

   - Poll fix (Pavel)"

* tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block: (21 commits)
  io_uring: use blk_queue_nowait() to check if NOWAIT supported
  mm: use limited read-ahead to satisfy read
  mm: mark async iocb read as NOWAIT once some data has been copied
  io_uring: fix double poll mask init
  io-wq: inherit audit loginuid and sessionid
  io_uring: use percpu counters to track inflight requests
  io_uring: assign new io_identity for task if members have changed
  io_uring: store io_identity in io_uring_task
  io_uring: COW io_identity on mismatch
  io_uring: move io identity items into separate struct
  io_uring: rely solely on work flags to determine personality.
  io_uring: pass required context in as flags
  io-wq: assign NUMA node locality if appropriate
  io_uring: fix error path cleanup in io_sqe_files_register()
  Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly"
  io_uring: fix REQ_F_COMP_LOCKED by killing it
  io_uring: dig out COMP_LOCK from deep call chain
  io_uring: don't put a poll req under spinlock
  io_uring: don't unnecessarily clear F_LINK_TIMEOUT
  io_uring: don't set COMP_LOCKED if won't put
  ...
diff --git a/.clang-format b/.clang-format
index 6cbd6ee..10dc5a9 100644
--- a/.clang-format
+++ b/.clang-format
@@ -429,6 +429,7 @@
   - 'rbtree_postorder_for_each_entry_safe'
   - 'rdma_for_each_block'
   - 'rdma_for_each_port'
+  - 'rdma_umem_for_each_dma_block'
   - 'resource_list_for_each_entry'
   - 'resource_list_for_each_entry_safe'
   - 'rhl_for_each_entry_rcu'
diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband
index 96dfe19..87b11f9 100644
--- a/Documentation/ABI/stable/sysfs-class-infiniband
+++ b/Documentation/ABI/stable/sysfs-class-infiniband
@@ -258,23 +258,6 @@
 		userspace ABI compatibility of umad & issm devices.
 
 
-What:		/sys/class/infiniband_cm/ucmN/ibdev
-Date:		Oct, 2005
-KernelVersion:	v2.6.14
-Contact:	linux-rdma@vger.kernel.org
-Description:
-		(RO) Display Infiniband (IB) device name
-
-
-What:		/sys/class/infiniband_cm/abi_version
-Date:		Oct, 2005
-KernelVersion:	v2.6.14
-Contact:	linux-rdma@vger.kernel.org
-Description:
-		(RO) Value is incremented if any changes are made that break
-		userspace ABI compatibility of ucm devices.
-
-
 What:		/sys/class/infiniband_verbs/uverbsN/ibdev
 What:		/sys/class/infiniband_verbs/uverbsN/abi_version
 Date:		Sept, 2005
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 40213c7..dbccb2f 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -34,7 +34,7 @@
 		Describes the main type of the supply.
 
 		Access: Read
-		Valid values: "Battery", "UPS", "Mains", "USB"
+		Valid values: "Battery", "UPS", "Mains", "USB", "Wireless"
 
 ===== Battery Properties =====
 
@@ -108,7 +108,8 @@
 		which they average readings to smooth out the reported value.
 
 		Access: Read
-		Valid values: Represented in microamps
+		Valid values: Represented in microamps. Negative values are used
+		for discharging batteries, positive values for charging batteries.
 
 What:		/sys/class/power_supply/<supply_name>/current_max
 Date:		October 2010
@@ -127,7 +128,8 @@
 		This value is not averaged/smoothed.
 
 		Access: Read
-		Valid values: Represented in microamps
+		Valid values: Represented in microamps. Negative values are used
+		for discharging batteries, positive values for charging batteries.
 
 What:		/sys/class/power_supply/<supply_name>/charge_control_limit
 Date:		Oct 2012
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
index 4a48e20..f4efd68 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -963,7 +963,7 @@
 ``->dynticks_nesting`` field is incremented up from zero, the
 ``->dynticks_nmi_nesting`` field is set to a large positive number, and
 whenever the ``->dynticks_nesting`` field is decremented down to zero,
-the the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
+the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
 the number of misnested interrupts is not sufficient to overflow the
 counter, this approach corrects the ``->dynticks_nmi_nesting`` field
 every time the corresponding CPU enters the idle loop from process
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 8f41ad0..1ae79a1 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2162,7 +2162,7 @@
    this sort of thing.
 #. If a CPU is in a portion of the kernel that is absolutely positively
    no-joking guaranteed to never execute any RCU read-side critical
-   sections, and RCU believes this CPU to to be idle, no problem. This
+   sections, and RCU believes this CPU to be idle, no problem. This
    sort of thing is used by some architectures for light-weight
    exception handlers, which can then avoid the overhead of
    ``rcu_irq_enter()`` and ``rcu_irq_exit()`` at exception entry and
@@ -2431,7 +2431,7 @@
 not have this property, given that any point in the code outside of an
 RCU read-side critical section can be a quiescent state. Therefore,
 *RCU-sched* was created, which follows “classic” RCU in that an
-RCU-sched grace period waits for for pre-existing interrupt and NMI
+RCU-sched grace period waits for pre-existing interrupt and NMI
 handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and
 RCU-sched APIs have identical implementations, while kernels built with
 ``CONFIG_PREEMPT=y`` provide a separate implementation for each.
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index c7f147b..fb3ff76 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -360,7 +360,7 @@
 
 There are at least three flavors of RCU usage in the Linux kernel. The diagram
 above shows the most common one. On the updater side, the rcu_assign_pointer(),
-sychronize_rcu() and call_rcu() primitives used are the same for all three
+synchronize_rcu() and call_rcu() primitives used are the same for all three
 flavors. However for protection (on the reader side), the primitives used vary
 depending on the flavor:
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d246ad4..02d4adb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3095,6 +3095,10 @@
 			and gids from such clients.  This is intended to ease
 			migration from NFSv2/v3.
 
+	nmi_backtrace.backtrace_idle [KNL]
+			Dump stacks even of idle CPUs in response to an
+			NMI stack-backtrace request.
+
 	nmi_debug=	[KNL,SH] Specify one or more actions to take
 			when a NMI is triggered.
 			Format: [state][,regs][,debounce][,die]
@@ -4174,46 +4178,55 @@
 			This wake_up() will be accompanied by a
 			WARN_ONCE() splat and an ftrace_dump().
 
+	rcutree.rcu_unlock_delay= [KNL]
+			In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
+			this specifies an rcu_read_unlock()-time delay
+			in microseconds.  This defaults to zero.
+			Larger delays increase the probability of
+			catching RCU pointer leaks, that is, buggy use
+			of RCU-protected pointers after the relevant
+			rcu_read_unlock() has completed.
+
 	rcutree.sysrq_rcu= [KNL]
 			Commandeer a sysrq key to dump out Tree RCU's
 			rcu_node tree with an eye towards determining
 			why a new grace period has not yet started.
 
-	rcuperf.gp_async= [KNL]
+	rcuscale.gp_async= [KNL]
 			Measure performance of asynchronous
 			grace-period primitives such as call_rcu().
 
-	rcuperf.gp_async_max= [KNL]
+	rcuscale.gp_async_max= [KNL]
 			Specify the maximum number of outstanding
 			callbacks per writer thread.  When a writer
 			thread exceeds this limit, it invokes the
 			corresponding flavor of rcu_barrier() to allow
 			previously posted callbacks to drain.
 
-	rcuperf.gp_exp= [KNL]
+	rcuscale.gp_exp= [KNL]
 			Measure performance of expedited synchronous
 			grace-period primitives.
 
-	rcuperf.holdoff= [KNL]
+	rcuscale.holdoff= [KNL]
 			Set test-start holdoff period.  The purpose of
 			this parameter is to delay the start of the
 			test until boot completes in order to avoid
 			interference.
 
-	rcuperf.kfree_rcu_test= [KNL]
+	rcuscale.kfree_rcu_test= [KNL]
 			Set to measure performance of kfree_rcu() flooding.
 
-	rcuperf.kfree_nthreads= [KNL]
+	rcuscale.kfree_nthreads= [KNL]
 			The number of threads running loops of kfree_rcu().
 
-	rcuperf.kfree_alloc_num= [KNL]
+	rcuscale.kfree_alloc_num= [KNL]
 			Number of allocations and frees done in an iteration.
 
-	rcuperf.kfree_loops= [KNL]
-			Number of loops doing rcuperf.kfree_alloc_num number
+	rcuscale.kfree_loops= [KNL]
+			Number of loops doing rcuscale.kfree_alloc_num number
 			of allocations and frees.
 
-	rcuperf.nreaders= [KNL]
+	rcuscale.nreaders= [KNL]
 			Set number of RCU readers.  The value -1 selects
 			N, where N is the number of CPUs.  A value
 			"n" less than -1 selects N-n+1, where N is again
@@ -4222,23 +4235,23 @@
 			A value of "n" less than or equal to -N selects
 			a single reader.
 
-	rcuperf.nwriters= [KNL]
+	rcuscale.nwriters= [KNL]
 			Set number of RCU writers.  The values operate
-			the same as for rcuperf.nreaders.
+			the same as for rcuscale.nreaders.
 			N, where N is the number of CPUs
 
-	rcuperf.perf_type= [KNL]
+	rcuscale.perf_type= [KNL]
 			Specify the RCU implementation to test.
 
-	rcuperf.shutdown= [KNL]
+	rcuscale.shutdown= [KNL]
 			Shut the system down after performance tests
 			complete.  This is useful for hands-off automated
 			testing.
 
-	rcuperf.verbose= [KNL]
+	rcuscale.verbose= [KNL]
 			Enable additional printk() statements.
 
-	rcuperf.writer_holdoff= [KNL]
+	rcuscale.writer_holdoff= [KNL]
 			Write-side holdoff between grace periods,
 			in microseconds.  The default of zero says
 			no holdoff.
@@ -4291,6 +4304,18 @@
 			are zero, rcutorture acts as if is interpreted
 			they are all non-zero.
 
+	rcutorture.irqreader= [KNL]
+			Run RCU readers from irq handlers, or, more
+			accurately, from a timer handler.  Not all RCU
+			flavors take kindly to this sort of thing.
+
+	rcutorture.leakpointer= [KNL]
+			Leak an RCU-protected pointer out of the reader.
+			This can of course result in splats, and is
+			intended to test the ability of things like
+			CONFIG_RCU_STRICT_GRACE_PERIOD=y to detect
+			such leaks.
+
 	rcutorture.n_barrier_cbs= [KNL]
 			Set callbacks/threads for rcu_barrier() testing.
 
@@ -4512,8 +4537,8 @@
 	refscale.shutdown= [KNL]
 			Shut down the system at the end of the performance
 			test.  This defaults to 1 (shut it down) when
-			rcuperf is built into the kernel and to 0 (leave
-			it running) when rcuperf is built as a module.
+			refscale is built into the kernel and to 0 (leave
+			it running) when refscale is built as a module.
 
 	refscale.verbose= [KNL]
 			Enable additional printk() statements.
@@ -4659,6 +4684,98 @@
 			Format: integer between 0 and 10
 			Default is 0.
 
+	scftorture.holdoff= [KNL]
+			Number of seconds to hold off before starting
+			test.  Defaults to zero for module insertion and
+			to 10 seconds for built-in smp_call_function()
+			tests.
+
+	scftorture.longwait= [KNL]
+			Request ridiculously long waits randomly selected
+			up to the chosen limit in seconds.  Zero (the
+			default) disables this feature.  Please note
+			that requesting even small non-zero numbers of
+			seconds can result in RCU CPU stall warnings,
+			softlockup complaints, and so on.
+
+	scftorture.nthreads= [KNL]
+			Number of kthreads to spawn to invoke the
+			smp_call_function() family of functions.
+			The default of -1 specifies a number of kthreads
+			equal to the number of CPUs.
+
+	scftorture.onoff_holdoff= [KNL]
+			Number seconds to wait after the start of the
+			test before initiating CPU-hotplug operations.
+
+	scftorture.onoff_interval= [KNL]
+			Number seconds to wait between successive
+			CPU-hotplug operations.  Specifying zero (which
+			is the default) disables CPU-hotplug operations.
+
+	scftorture.shutdown_secs= [KNL]
+			The number of seconds following the start of the
+			test after which to shut down the system.  The
+			default of zero avoids shutting down the system.
+			Non-zero values are useful for automated tests.
+
+	scftorture.stat_interval= [KNL]
+			The number of seconds between outputting the
+			current test statistics to the console.  A value
+			of zero disables statistics output.
+
+	scftorture.stutter_cpus= [KNL]
+			The number of jiffies to wait between each change
+			to the set of CPUs under test.
+
+	scftorture.use_cpus_read_lock= [KNL]
+			Use use_cpus_read_lock() instead of the default
+			preempt_disable() to disable CPU hotplug
+			while invoking one of the smp_call_function*()
+			functions.
+
+	scftorture.verbose= [KNL]
+			Enable additional printk() statements.
+
+	scftorture.weight_single= [KNL]
+			The probability weighting to use for the
+			smp_call_function_single() function with a zero
+			"wait" parameter.  A value of -1 selects the
+			default if all other weights are -1.  However,
+			if at least one weight has some other value, a
+			value of -1 will instead select a weight of zero.
+
+	scftorture.weight_single_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_single() function with a
+			non-zero "wait" parameter.  See weight_single.
+
+	scftorture.weight_many= [KNL]
+			The probability weighting to use for the
+			smp_call_function_many() function with a zero
+			"wait" parameter.  See weight_single.
+			Note well that setting a high probability for
+			this weighting can place serious IPI load
+			on the system.
+
+	scftorture.weight_many_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_many() function with a
+			non-zero "wait" parameter.  See weight_single
+			and weight_many.
+
+	scftorture.weight_all= [KNL]
+			The probability weighting to use for the
+			smp_call_function_all() function with a zero
+			"wait" parameter.  See weight_single and
+			weight_many.
+
+	scftorture.weight_all_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_all() function with a
+			non-zero "wait" parameter.  See weight_single
+			and weight_many.
+
 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.
@@ -5853,6 +5970,14 @@
 			improve timer resolution at the expense of processing
 			more timer interrupts.
 
+	xen.event_eoi_delay=	[XEN]
+			How long to delay EOI handling in case of event
+			storms (jiffies). Default is 10.
+
+	xen.event_loop_timeout=	[XEN]
+			After which time (jiffies) the event handling loop
+			should start to delay EOI handling. Default is 2.
+
 	nopv=		[X86,XEN,KVM,HYPER_V,VMWARE]
 			Disables the PV optimizations forcing the guest to run
 			as generic guest with no PV drivers. Currently support
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index f461d6c..86de8a1 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -210,6 +210,28 @@
 	inconsistent namespace presentation during or after a
 	failover event.
 
+Deprecation of V4 Format
+========================
+
+The V4 filesystem format lacks certain features that are supported by
+the V5 format, such as metadata checksumming, strengthened metadata
+verification, and the ability to store timestamps past the year 2038.
+Because of this, the V4 format is deprecated.  All users should upgrade
+by backing up their files, reformatting, and restoring from the backup.
+
+Administrators and users can detect a V4 filesystem by running xfs_info
+against a filesystem mountpoint and checking for a string containing
+"crc=".  If no such string is found, please upgrade xfsprogs to the
+latest version and try again.
+
+The deprecation will take place in two parts.  Support for mounting V4
+filesystems can now be disabled at kernel build time via Kconfig option.
+The option will default to yes until September 2025, at which time it
+will be changed to default to no.  In September 2030, support will be
+removed from the codebase entirely.
+
+Note: Distributors may choose to withdraw V4 format support earlier than
+the dates listed above.
 
 Deprecated Mount Options
 ========================
@@ -217,6 +239,9 @@
 ===========================     ================
   Name				Removal Schedule
 ===========================     ================
+Mounting with V4 filesystem     September 2030
+ikeep/noikeep			September 2025
+attr2/noattr2			September 2025
 ===========================     ================
 
 
@@ -331,7 +356,12 @@
 Deprecated Sysctls
 ==================
 
-None at present.
+===========================     ================
+  Name				Removal Schedule
+===========================     ================
+fs.xfs.irix_sgid_inherit        September 2025
+fs.xfs.irix_symlink_mode        September 2025
+===========================     ================
 
 
 Removed Sysctls
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
index e93606e..c234a3a 100644
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -11,6 +11,7 @@
 	usage
 	kunit-tool
 	api/index
+	style
 	faq
 
 What is KUnit?
diff --git a/Documentation/dev-tools/kunit/style.rst b/Documentation/dev-tools/kunit/style.rst
new file mode 100644
index 0000000..da1d6f0
--- /dev/null
+++ b/Documentation/dev-tools/kunit/style.rst
@@ -0,0 +1,205 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Test Style and Nomenclature
+===========================
+
+To make finding, writing, and using KUnit tests as simple as possible, it's
+strongly encouraged that they are named and written according to the guidelines
+below. While it's possible to write KUnit tests which do not follow these rules,
+they may break some tooling, may conflict with other tests, and may not be run
+automatically by testing systems.
+
+It's recommended that you only deviate from these guidelines when:
+
+1. Porting tests to KUnit which are already known with an existing name, or
+2. Writing tests which would cause serious problems if automatically run (e.g.,
+   non-deterministically producing false positives or negatives, or taking an
+   extremely long time to run).
+
+Subsystems, Suites, and Tests
+=============================
+
+In order to make tests as easy to find as possible, they're grouped into suites
+and subsystems. A test suite is a group of tests which test a related area of
+the kernel, and a subsystem is a set of test suites which test different parts
+of the same kernel subsystem or driver.
+
+Subsystems
+----------
+
+Every test suite must belong to a subsystem. A subsystem is a collection of one
+or more KUnit test suites which test the same driver or part of the kernel. A
+rule of thumb is that a test subsystem should match a single kernel module. If
+the code being tested can't be compiled as a module, in many cases the subsystem
+should correspond to a directory in the source tree or an entry in the
+MAINTAINERS file. If unsure, follow the conventions set by tests in similar
+areas.
+
+Test subsystems should be named after the code being tested, either after the
+module (wherever possible), or after the directory or files being tested. Test
+subsystems should be named to avoid ambiguity where necessary.
+
+If a test subsystem name has multiple components, they should be separated by
+underscores. *Do not* include "test" or "kunit" directly in the subsystem name
+unless you are actually testing other tests or the kunit framework itself.
+
+Example subsystems could be:
+
+``ext4``
+  Matches the module and filesystem name.
+``apparmor``
+  Matches the module name and LSM name.
+``kasan``
+  Common name for the tool, prominent part of the path ``mm/kasan``
+``snd_hda_codec_hdmi``
+  Has several components (``snd``, ``hda``, ``codec``, ``hdmi``) separated by
+  underscores. Matches the module name.
+
+Avoid names like these:
+
+``linear-ranges``
+  Names should use underscores, not dashes, to separate words. Prefer
+  ``linear_ranges``.
+``qos-kunit-test``
+  As well as using underscores, this name should not have "kunit-test" as a
+  suffix, and ``qos`` is ambiguous as a subsystem name. ``power_qos`` would be a
+  better name.
+``pc_parallel_port``
+  The corresponding module name is ``parport_pc``, so this subsystem should also
+  be named ``parport_pc``.
+
+.. note::
+        The KUnit API and tools do not explicitly know about subsystems. They're
+        simply a way of categorising test suites and naming modules which
+        provides a simple, consistent way for humans to find and run tests. This
+        may change in the future, though.
+
+Suites
+------
+
+KUnit tests are grouped into test suites, which cover a specific area of
+functionality being tested. Test suites can have shared initialisation and
+shutdown code which is run for all tests in the suite.
+Not all subsystems will need to be split into multiple test suites (e.g. simple drivers).
+
+Test suites are named after the subsystem they are part of. If a subsystem
+contains several suites, the specific area under test should be appended to the
+subsystem name, separated by an underscore.
+
+In the event that there are multiple types of test using KUnit within a
+subsystem (e.g., both unit tests and integration tests), they should be put into
+separate suites, with the type of test as the last element in the suite name.
+Unless these tests are actually present, avoid using ``_test``, ``_unittest`` or
+similar in the suite name.
+
+The full test suite name (including the subsystem name) should be specified as
+the ``.name`` member of the ``kunit_suite`` struct, and forms the base for the
+module name (see below).
+
+Example test suites could include:
+
+``ext4_inode``
+  Part of the ``ext4`` subsystem, testing the ``inode`` area.
+``kunit_try_catch``
+  Part of the ``kunit`` implementation itself, testing the ``try_catch`` area.
+``apparmor_property_entry``
+  Part of the ``apparmor`` subsystem, testing the ``property_entry`` area.
+``kasan``
+  The ``kasan`` subsystem has only one suite, so the suite name is the same as
+  the subsystem name.
+
+Avoid names like:
+
+``ext4_ext4_inode``
+  There's no reason to state the subsystem twice.
+``property_entry``
+  The suite name is ambiguous without the subsystem name.
+``kasan_integration_test``
+  Because there is only one suite in the ``kasan`` subsystem, the suite should
+  just be called ``kasan``. There's no need to redundantly add
+  ``integration_test``. Should a separate test suite with, for example, unit
+  tests be added, then that suite could be named ``kasan_unittest`` or similar.
+
+Test Cases
+----------
+
+Individual tests consist of a single function which tests a constrained
+codepath, property, or function. In the test output, individual tests' results
+will show up as subtests of the suite's results.
+
+Tests should be named after what they're testing. This is often the name of the
+function being tested, with a description of the input or codepath being tested.
+As tests are C functions, they should be named and written in accordance with
+the kernel coding style.
+
+.. note::
+        As tests are themselves functions, their names cannot conflict with
+        other C identifiers in the kernel. This may require some creative
+        naming. It's a good idea to make your test functions `static` to avoid
+        polluting the global namespace.
+
+Example test names include:
+
+``unpack_u32_with_null_name``
+  Tests the ``unpack_u32`` function when a NULL name is passed in.
+``test_list_splice``
+  Tests the ``list_splice`` macro. It has the prefix ``test_`` to avoid a
+  name conflict with the macro itself.
+
+
+Should it be necessary to refer to a test outside the context of its test suite,
+the *fully-qualified* name of a test should be the suite name followed by the
+test name, separated by a colon (i.e. ``suite:test``).
+
+Test Kconfig Entries
+====================
+
+Every test suite should be tied to a Kconfig entry.
+
+This Kconfig entry must:
+
+* be named ``CONFIG_<name>_KUNIT_TEST``: where <name> is the name of the test
+  suite.
+* be listed either alongside the config entries for the driver/subsystem being
+  tested, or be under [Kernel Hacking]→[Kernel Testing and Coverage]
+* depend on ``CONFIG_KUNIT``
+* be visible only if ``CONFIG_KUNIT_ALL_TESTS`` is not enabled.
+* have a default value of ``CONFIG_KUNIT_ALL_TESTS``.
+* have a brief description of KUnit in the help text
+
+Unless there's a specific reason not to (e.g. the test is unable to be built as
+a module), Kconfig entries for tests should be tristate.
+
+An example Kconfig entry:
+
+.. code-block:: none
+
+        config FOO_KUNIT_TEST
+                tristate "KUnit test for foo" if !KUNIT_ALL_TESTS
+                depends on KUNIT
+                default KUNIT_ALL_TESTS
+                help
+                    This builds unit tests for foo.
+
+                    For more information on KUnit and unit tests in general, please refer
+                    to the KUnit documentation in Documentation/dev-tools/kunit
+
+                    If unsure, say N
+
+
+Test File and Module Names
+==========================
+
+KUnit tests can often be compiled as a module. These modules should be named
+after the test suite, followed by ``_test``. If this is likely to conflict with
+non-KUnit tests, the suffix ``_kunit`` can also be used.
+
+The easiest way of achieving this is to name the file containing the test suite
+``<suite>_test.c`` (or, as above, ``<suite>_kunit.c``). This file should be
+placed next to the code under test.
+
+If the suite name contains some or all of the name of the test's parent
+directory, it may make sense to modify the source filename to reduce redundancy.
+For example, a ``foo_firmware`` suite could be in the ``foo/firmware_test.c``
+file.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 3c3fe8b..961d3ea 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -211,6 +211,11 @@
 .. note::
    A test case will only be run if it is associated with a test suite.
 
+``kunit_test_suite(...)`` is a macro which tells the linker to put the specified
+test suite in a special linker section so that it can be run by KUnit either
+after late_init, or when the test module is loaded (depending on whether the
+test was built in or not).
+
 For more information on these types of things see the :doc:`api/test`.
 
 Isolating Behavior
diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
new file mode 100644
index 0000000..d43791a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mailbox/arm,mhu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM MHU Mailbox Controller
+
+maintainers:
+  - Jassi Brar <jaswinder.singh@linaro.org>
+
+description: |
+  The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has 3
+  independent channels/links to communicate with remote processor(s).  MHU links
+  are hardwired on a platform. A link raises interrupt for any received data.
+  However, there is no specified way of knowing if the sent data has been read
+  by the remote. This driver assumes the sender polls STAT register and the
+  remote clears it after having read the data.  The last channel is specified to
+  be a 'Secure' resource, hence can't be used by Linux running NS.
+
+  The MHU hardware also allows operations in doorbell mode. The MHU drives the
+  interrupt signal using a 32-bit register, with all 32-bits logically ORed
+  together. It provides a set of registers to enable software to set, clear and
+  check the status of each of the bits of this register independently. The use
+  of 32 bits per interrupt line enables software to provide more information
+  about the source of the interrupt. For example, each bit of the register can
+  be associated with a type of event that can contribute to raising the
+  interrupt. Each of the 32-bits can be used as "doorbell" to alert the remote
+  processor.
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - arm,mhu
+          - arm,mhu-doorbell
+  required:
+    - compatible
+
+properties:
+  compatible:
+    oneOf:
+      - description: Data transfer mode
+        items:
+          - const: arm,mhu
+          - const: arm,primecell
+
+      - description: Doorbell mode
+        items:
+          - const: arm,mhu-doorbell
+          - const: arm,primecell
+
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: low-priority non-secure
+      - description: high-priority non-secure
+      - description: Secure
+    maxItems: 3
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: apb_pclk
+
+  '#mbox-cells':
+    description: |
+      Set to 1 in data transfer mode and represents index of the channel.
+      Set to 2 in doorbell mode and represents index of the channel and doorbell
+      number.
+    enum: [ 1, 2 ]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#mbox-cells'
+
+additionalProperties: false
+
+examples:
+  # Data transfer mode.
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        mhuA: mailbox@2b1f0000 {
+            #mbox-cells = <1>;
+            compatible = "arm,mhu", "arm,primecell";
+            reg = <0 0x2b1f0000 0 0x1000>;
+            interrupts = <0 36 4>, /* LP-NonSecure */
+                         <0 35 4>, /* HP-NonSecure */
+                         <0 37 4>; /* Secure */
+            clocks = <&clock 0 2 1>;
+            clock-names = "apb_pclk";
+        };
+
+        mhu_client_scb: scb@2e000000 {
+            compatible = "fujitsu,mb86s70-scb-1.0";
+            reg = <0 0x2e000000 0 0x4000>;
+            mboxes = <&mhuA 1>; /* HP-NonSecure */
+        };
+    };
+
+  # Doorbell mode.
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        mhuB: mailbox@2b2f0000 {
+            #mbox-cells = <2>;
+            compatible = "arm,mhu-doorbell", "arm,primecell";
+            reg = <0 0x2b2f0000 0 0x1000>;
+            interrupts = <0 36 4>, /* LP-NonSecure */
+                         <0 35 4>, /* HP-NonSecure */
+                         <0 37 4>; /* Secure */
+            clocks = <&clock 0 2 1>;
+            clock-names = "apb_pclk";
+        };
+
+        mhu_client_scpi: scpi@2f000000 {
+            compatible = "arm,scpi";
+            reg = <0 0x2f000000 0 0x200>;
+            mboxes = <&mhuB 1 4>; /* HP-NonSecure, 5th doorbell */
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
deleted file mode 100644
index 4971f03..0000000
--- a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-ARM MHU Mailbox Driver
-======================
-
-The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has
-3 independent channels/links to communicate with remote processor(s).
- MHU links are hardwired on a platform. A link raises interrupt for any
-received data. However, there is no specified way of knowing if the sent
-data has been read by the remote. This driver assumes the sender polls
-STAT register and the remote clears it after having read the data.
-The last channel is specified to be a 'Secure' resource, hence can't be
-used by Linux running NS.
-
-Mailbox Device Node:
-====================
-
-Required properties:
---------------------
-- compatible:		Shall be "arm,mhu" & "arm,primecell"
-- reg:			Contains the mailbox register address range (base
-			address and length)
-- #mbox-cells		Shall be 1 - the index of the channel needed.
-- interrupts:		Contains the interrupt information corresponding to
-			each of the 3 links of MHU.
-
-Example:
---------
-
-	mhu: mailbox@2b1f0000 {
-		#mbox-cells = <1>;
-		compatible = "arm,mhu", "arm,primecell";
-		reg = <0 0x2b1f0000 0x1000>;
-		interrupts = <0 36 4>, /* LP-NonSecure */
-			     <0 35 4>, /* HP-NonSecure */
-			     <0 37 4>; /* Secure */
-		clocks = <&clock 0 2 1>;
-		clock-names = "apb_pclk";
-	};
-
-	mhu_client: scb@2e000000 {
-		compatible = "fujitsu,mb86s70-scb-1.0";
-		reg = <0 0x2e000000 0x4000>;
-		mboxes = <&mhu 1>; /* HP-NonSecure */
-	};
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
index 274bbe6..b29050f 100644
--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -55,6 +55,37 @@
         $ref: /schemas/types.yaml#/definitions/string
         enum: [none, soft, hw, hw_syndrome, hw_oob_first, on-die]
 
+      nand-ecc-engine:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/phandle
+        description: |
+          A phandle on the hardware ECC engine if any. There are
+          basically three possibilities:
+          1/ The ECC engine is part of the NAND controller, in this
+          case the phandle should reference the parent node.
+          2/ The ECC engine is part of the NAND part (on-die), in this
+          case the phandle should reference the node itself.
+          3/ The ECC engine is external, in this case the phandle should
+          reference the specific ECC engine node.
+
+      nand-use-soft-ecc-engine:
+        type: boolean
+        description: Use a software ECC engine.
+
+      nand-no-ecc-engine:
+        type: boolean
+        description: Do not use any ECC correction.
+
+      nand-ecc-placement:
+        allOf:
+          - $ref: /schemas/types.yaml#/definitions/string
+          - enum: [ oob, interleaved ]
+        description:
+          Location of the ECC bytes. This location is unknown by default
+          but can be explicitly set to "oob", if all ECC bytes are
+          known to be stored in the OOB area, or "interleaved" if ECC
+          bytes will be interleaved with regular data in the main area.
+
       nand-ecc-algo:
         description:
           Desired ECC algorithm.
diff --git a/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt b/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
index 1b4213e..4d530d8 100644
--- a/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
+++ b/Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
@@ -1,10 +1,13 @@
 Microsemi Ocelot reset controller
 
 The DEVCPU_GCB:CHIP_REGS have a SOFT_RST register that can be used to reset the
-SoC MIPS core.
+SoC core.
+
+The reset registers are both present in the MSCC vcoreiii MIPS and
+microchip Sparx5 armv8 SoC's.
 
 Required Properties:
- - compatible: "mscc,ocelot-chip-reset"
+ - compatible: "mscc,ocelot-chip-reset" or "microchip,sparx5-chip-reset"
 
 Example:
 	reset@1070008 {
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.txt b/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
deleted file mode 100644
index de34f27..0000000
--- a/Documentation/devicetree/bindings/power/reset/reboot-mode.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Generic reboot mode core map driver
-
-This driver get reboot mode arguments and call the write
-interface to store the magic value in special register
-or ram. Then the bootloader can read it and take different
-action according to the argument stored.
-
-All mode properties are vendor specific, it is a indication to tell
-the bootloader what to do when the system reboots, and should be named
-as mode-xxx = <magic> (xxx is mode name, magic should be a none-zero value).
-
-For example modes common on Android platform:
-- mode-normal: Normal reboot mode, system reboot with command "reboot".
-- mode-recovery: Android Recovery mode, it is a mode to format the device or update a new image.
-- mode-bootloader: Android fastboot mode, it's a mode to re-flash partitions on the Android based device.
-- mode-loader: A bootloader mode, it's a mode used to download image on Rockchip platform,
-	       usually used in development.
-
-Example:
-	reboot-mode {
-		mode-normal = <BOOT_NORMAL>;
-		mode-recovery = <BOOT_RECOVERY>;
-		mode-bootloader = <BOOT_FASTBOOT>;
-		mode-loader = <BOOT_BL_DOWNLOAD>;
-	}
diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
new file mode 100644
index 0000000..a6c9102
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/reboot-mode.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic reboot mode core map
+
+maintainers:
+  - Andy Yan <andy.yan@rock-chips.com>
+
+description: |
+  This driver get reboot mode arguments and call the write
+  interface to store the magic value in special register
+  or ram. Then the bootloader can read it and take different
+  action according to the argument stored.
+
+  All mode properties are vendor specific, it is a indication to tell
+  the bootloader what to do when the system reboots, and should be named
+  as mode-xxx = <magic> (xxx is mode name, magic should be a non-zero value).
+
+  For example, modes common Android platform are:
+    - normal: Normal reboot mode, system reboot with command "reboot".
+    - recovery: Android Recovery mode, it is a mode to format the device or update a new image.
+    - bootloader: Android fastboot mode, it's a mode to re-flash partitions on the Android based device.
+    - loader: A bootloader mode, it's a mode used to download image on Rockchip platform,
+              usually used in development.
+
+properties:
+  mode-normal:
+      $ref: /schemas/types.yaml#/definitions/uint32
+      description: |
+        Default value to set on a reboot if no command was provided.
+
+patternProperties:
+  "^mode-.*$":
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+examples:
+  - |
+    reboot-mode {
+      mode-normal = <0>;
+      mode-recovery = <1>;
+      mode-bootloader = <2>;
+      mode-loader = <3>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/power/supply/battery.yaml b/Documentation/devicetree/bindings/power/supply/battery.yaml
index 932b736..0c7e2e4 100644
--- a/Documentation/devicetree/bindings/power/supply/battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/battery.yaml
@@ -82,6 +82,27 @@
       An array containing the temperature in degree Celsius,
       for each of the battery capacity lookup table.
 
+  operating-range-celsius:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: operating temperature range of a battery
+    items:
+      - description: minimum temperature at which battery can operate
+      - description: maximum temperature at which battery can operate
+
+  ambient-celsius:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: safe range of ambient temperature
+    items:
+      - description: alert when ambient temperature is lower than this value
+      - description: alert when ambient temperature is higher than this value
+
+  alert-celsius:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: safe range of battery temperature
+    items:
+      - description: alert when battery temperature is lower than this value
+      - description: alert when battery temperature is higher than this value
+
 required:
   - compatible
 
@@ -130,6 +151,9 @@
         /* table for 10 degree Celsius */
         ocv-capacity-table-2 = <4250000 100>, <4200000 95>, <4185000 90>;
         resistance-temp-table = <20 100>, <10 90>, <0 80>, <(-10) 60>;
+        operating-range-celsius = <(-30) 50>;
+        ambient-celsius = <(-5) 50>;
+        alert-celsius = <0 40>;
       };
 
       charger@11 {
diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.txt b/Documentation/devicetree/bindings/power/supply/bq25890.txt
index 3b4c69a..805040c 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.txt
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.txt
@@ -33,6 +33,10 @@
 - ti,thermal-regulation-threshold: integer, temperature above which the charge
     current is lowered, to avoid overheating (in degrees Celsius). If omitted,
     the default setting will be used (120 degrees);
+- ti,ibatcomp-micro-ohms: integer, value of a resistor in series with
+    the battery;
+- ti,ibatcomp-clamp-microvolt: integer, maximum charging voltage adjustment due
+    to expected voltage drop on in-series resistor;
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
new file mode 100644
index 0000000..f6b3dd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 Texas Instruments Incorporated
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/bq25980.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI BQ25980 Flash Charger
+
+maintainers:
+  - Dan Murphy <dmurphy@ti.com>
+  - Ricardo Rivera-Matos <r-rivera-matos@ti.com>
+
+description: |
+  The BQ25980, BQ25975, and BQ25960 are a series of flash chargers intended
+  for use in high-power density portable electronics. These inductorless
+  switching chargers can provide over 97% efficiency by making use of the
+  switched capacitor architecture.
+
+allOf:
+  - $ref: power-supply.yaml#
+
+properties:
+  compatible:
+    enum:
+      - ti,bq25980
+      - ti,bq25975
+      - ti,bq25960
+
+  reg:
+    maxItems: 1
+
+  ti,watchdog-timeout-ms:
+    description: |
+      Watchdog timer in milli seconds. 0 disables the watchdog.
+    default: 0
+    minimum: 0
+    maximum: 300000
+    enum: [ 0, 5000, 10000, 50000, 300000]
+
+  ti,sc-ovp-limit-microvolt:
+    description: |
+      Minimum input voltage limit in micro volts with a when the charger is in
+      switch cap mode. 100000 micro volt step.
+    default: 17800000
+    minimum: 14000000
+    maximum: 22000000
+
+  ti,sc-ocp-limit-microamp:
+    description: |
+      Maximum input current limit in micro amps with a 100000 micro amp step.
+    minimum: 100000
+    maximum: 3300000
+
+  ti,bypass-ovp-limit-microvolt:
+    description: |
+      Minimum input voltage limit in micro volts with a when the charger is in
+      switch cap mode. 50000 micro volt step.
+    minimum: 7000000
+    maximum: 12750000
+
+  ti,bypass-ocp-limit-microamp:
+    description: |
+      Maximum input current limit in micro amps with a 100000 micro amp step.
+    minimum: 100000
+    maximum: 3300000
+
+  ti,bypass-enable:
+    type: boolean
+    description: Enables bypass mode at boot time
+
+  interrupts:
+    description: |
+      Indicates that the device state has changed.
+
+  monitored-battery:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: phandle to the battery node being monitored
+
+required:
+  - compatible
+  - reg
+  - monitored-battery
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    bat: battery {
+      compatible = "simple-battery";
+      constant-charge-current-max-microamp = <4000000>;
+      constant-charge-voltage-max-microvolt = <8400000>;
+      precharge-current-microamp = <160000>;
+      charge-term-current-microamp = <160000>;
+    };
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      bq25980: charger@65 {
+          compatible = "ti,bq25980";
+          reg = <0x65>;
+          interrupt-parent = <&gpio1>;
+          interrupts = <16 IRQ_TYPE_EDGE_FALLING>;
+          ti,watchdog-timer = <0>;
+          ti,sc-ocp-limit-microamp = <2000000>;
+          ti,sc-ovp-limit-microvolt = <17800000>;
+          monitored-battery = <&bat>;
+      };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
index 82f6827..45beefc 100644
--- a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
@@ -51,6 +51,7 @@
       - ti,bq27621
       - ti,bq27z561
       - ti,bq28z610
+      - ti,bq34z100
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/power/supply/charger-manager.txt b/Documentation/devicetree/bindings/power/supply/charger-manager.txt
index ec4fe9d..b5ae906 100644
--- a/Documentation/devicetree/bindings/power/supply/charger-manager.txt
+++ b/Documentation/devicetree/bindings/power/supply/charger-manager.txt
@@ -3,24 +3,32 @@
 
 Required properties :
  - compatible : "charger-manager"
- - <>-supply : for regulator consumer
- - cm-num-chargers : number of chargers
+ - <>-supply : for regulator consumer, named according to cm-regulator-name
  - cm-chargers : name of chargers
  - cm-fuel-gauge : name of battery fuel gauge
  - subnode <regulator> :
 	- cm-regulator-name : name of charger regulator
 	- subnode <cable> :
-		- cm-cable-name : name of charger cable
+		- cm-cable-name : name of charger cable - one of USB, USB-HOST,
+			SDP, DCP, CDP, ACA, FAST-CHARGER, SLOW-CHARGER, WPT,
+			PD, DOCK, JIG, or MECHANICAL
 		- cm-cable-extcon : name of extcon dev
 (optional)	- cm-cable-min : minimum current of cable
 (optional)	- cm-cable-max : maximum current of cable
 
 Optional properties :
  - cm-name : charger manager's name (default : "battery")
- - cm-poll-mode : polling mode (enum polling_modes)
- - cm-poll-interval : polling interval
- - cm-battery-stat : battery status (enum data_source)
- - cm-fullbatt-* : data for full battery checking
+ - cm-poll-mode : polling mode - 0 for disabled, 1 for always, 2 for when
+	external power is connected, or 3 for when charging.  If not present,
+	then polling is disabled
+ - cm-poll-interval : polling interval (in ms)
+ - cm-battery-stat : battery status - 0 for battery always present, 1 for no
+	battery, 2 to check presence via fuel gauge, or 3 to check presence
+	via charger
+ - cm-fullbatt-vchkdrop-volt : voltage drop (in uV) before restarting charging
+ - cm-fullbatt-voltage : voltage (in uV) of full battery
+ - cm-fullbatt-soc : state of charge to consider as full battery
+ - cm-fullbatt-capacity : capcity (in uAh) to consider as full battery
  - cm-thermal-zone : name of external thermometer's thermal zone
  - cm-battery-* : threshold battery temperature for charging
 	-cold : critical cold temperature of battery for charging
@@ -29,6 +37,10 @@
 	-temp-diff : temperature difference to allow recharging
  - cm-dis/charging-max = limits of charging duration
 
+Deprecated properties:
+ - cm-num-chargers
+ - cm-fullbatt-vchkdrop-ms
+
 Example :
 	charger-manager@0 {
 		compatible = "charger-manager";
@@ -39,13 +51,11 @@
 		cm-poll-mode = <1>;
 		cm-poll-interval = <30000>;
 
-		cm-fullbatt-vchkdrop-ms = <30000>;
 		cm-fullbatt-vchkdrop-volt = <150000>;
 		cm-fullbatt-soc = <100>;
 
 		cm-battery-stat = <3>;
 
-		cm-num-chargers = <3>;
 		cm-chargers = "charger0", "charger1", "charger2";
 
 		cm-fuel-gauge = "fuelgauge0";
@@ -71,7 +81,7 @@
 				cm-cable-max = <500000>;
 			};
 			cable@1 {
-				cm-cable-name = "TA";
+				cm-cable-name = "SDP";
 				cm-cable-extcon = "extcon-dev.0";
 				cm-cable-min = <650000>;
 				cm-cable-max = <675000>;
diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
index 6244b8e..89f8e2b 100644
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.yaml
@@ -39,6 +39,25 @@
     maxItems: 1
     description: GPIO indicating the charging status
 
+  charge-current-limit-gpios:
+    minItems: 1
+    maxItems: 32
+    description: GPIOs used for current limiting
+
+  charge-current-limit-mapping:
+    description: List of tuples with current in uA and a GPIO bitmap (in
+      this order). The tuples must be provided in descending order of the
+      current limit.
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description:
+            Current limit in uA
+        - description:
+            Encoded GPIO setting. Bit 0 represents last GPIO from the
+            charge-current-limit-gpios property. Bit 1 second to last
+            GPIO and so on.
+
 required:
   - compatible
 
@@ -47,6 +66,12 @@
       - gpios
   - required:
       - charge-status-gpios
+  - required:
+      - charge-current-limit-gpios
+
+dependencies:
+  charge-current-limit-gpios: [ charge-current-limit-mapping ]
+  charge-current-limit-mapping: [ charge-current-limit-gpios ]
 
 additionalProperties: false
 
@@ -60,4 +85,10 @@
 
       gpios = <&gpd 28 GPIO_ACTIVE_LOW>;
       charge-status-gpios = <&gpc 27 GPIO_ACTIVE_LOW>;
+
+      charge-current-limit-gpios = <&gpioA 11 GPIO_ACTIVE_HIGH>,
+                                   <&gpioA 12 GPIO_ACTIVE_HIGH>;
+      charge-current-limit-mapping = <2500000 0x00>, // 2.5 A => both GPIOs low
+                                     <700000 0x01>, // 700 mA => GPIO A.12 high
+                                     <0 0x02>; // 0 mA => GPIO A.11 high
     };
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt b/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
deleted file mode 100644
index 66430bf..0000000
--- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-* Ingenic JZ47xx battery bindings
-
-Required properties:
-
-- compatible: Must be "ingenic,jz4740-battery".
-- io-channels: phandle and IIO specifier pair to the IIO device.
-  Format described in iio-bindings.txt.
-- monitored-battery: phandle to a "simple-battery" compatible node.
-
-The "monitored-battery" property must be a phandle to a node using the format
-described in battery.txt, with the following properties being required:
-
-- voltage-min-design-microvolt: Drained battery voltage.
-- voltage-max-design-microvolt: Fully charged battery voltage.
-
-Example:
-
-#include <dt-bindings/iio/adc/ingenic,adc.h>
-
-simple_battery: battery {
-	compatible = "simple-battery";
-	voltage-min-design-microvolt = <3600000>;
-	voltage-max-design-microvolt = <4200000>;
-};
-
-ingenic_battery {
-	compatible = "ingenic,jz4740-battery";
-	io-channels = <&adc INGENIC_ADC_BATTERY>;
-	io-channel-names = "battery";
-	monitored-battery = <&simple_battery>;
-};
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
new file mode 100644
index 0000000..867e3e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019-2020 Artur Rojek
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/ingenic,battery.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Ingenic JZ47xx battery bindings
+
+maintainers:
+  - Artur Rojek <contact@artur-rojek.eu>
+
+properties:
+  compatible:
+    oneOf:
+      - const: ingenic,jz4740-battery
+      - items:
+        - enum:
+          - ingenic,jz4725b-battery
+          - ingenic,jz4770-battery
+        - const: ingenic,jz4740-battery
+
+  io-channels:
+    maxItems: 1
+
+  io-channel-names:
+    const: battery
+
+  monitored-battery:
+    description: >
+      phandle to a "simple-battery" compatible node.
+
+      This property must be a phandle to a node using the format described
+      in battery.yaml, with the following properties being required:
+      - voltage-min-design-microvolt: drained battery voltage,
+      - voltage-max-design-microvolt: fully charged battery voltage.
+
+required:
+  - compatible
+  - io-channels
+  - io-channel-names
+  - monitored-battery
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/iio/adc/ingenic,adc.h>
+
+    simple_battery: battery {
+            compatible = "simple-battery";
+            voltage-min-design-microvolt = <3600000>;
+            voltage-max-design-microvolt = <4200000>;
+    };
+
+    ingenic-battery {
+            compatible = "ingenic,jz4740-battery";
+            io-channels = <&adc INGENIC_ADC_BATTERY>;
+            io-channel-names = "battery";
+            monitored-battery = <&simple_battery>;
+    };
diff --git a/Documentation/devicetree/bindings/power/supply/max17040_battery.txt b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
index 4e0186b..c802f66 100644
--- a/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
+++ b/Documentation/devicetree/bindings/power/supply/max17040_battery.txt
@@ -2,7 +2,9 @@
 ~~~~~~~~~~~~~~~~
 
 Required properties :
- - compatible : "maxim,max17040" or "maxim,max77836-battery"
+ - compatible : "maxim,max17040", "maxim,max17041", "maxim,max17043",
+		"maxim,max17044", "maxim,max17048", "maxim,max17049",
+		"maxim,max17058", "maxim,max17059" or "maxim,max77836-battery"
  - reg: i2c slave address
 
 Optional properties :
@@ -11,6 +13,15 @@
 				generated. Can be configured from 1 up to 32
 				(%). If skipped the power up default value of
 				4 (%) will be used.
+- maxim,double-soc : 		Certain devices return double the capacity.
+				Specify this boolean property to divide the
+				reported value in 2 and thus normalize it.
+				SOC == State of Charge == Capacity.
+- maxim,rcomp :			A value to compensate readings for various
+				battery chemistries and operating temperatures.
+				max17040,41 have 2 byte rcomp, default to
+				0x97 0x00. All other devices have one byte
+				rcomp, default to 0x97.
 - interrupts : 			Interrupt line see Documentation/devicetree/
 				bindings/interrupt-controller/interrupts.txt
 - wakeup-source :		This device has wakeup capabilities. Use this
@@ -31,3 +42,11 @@
 		interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
 		wakeup-source;
 	};
+
+	battery-fuel-gauge@36 {
+		compatible = "maxim,max17048";
+		reg = <0x36>;
+		maxim,rcomp = /bits/ 8 <0x56>;
+		maxim,alert-low-soc-level = <10>;
+		maxim,double-soc;
+	};
diff --git a/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
new file mode 100644
index 0000000..193a23a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/supply/summit,smb347-charger.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Battery charger driver for SMB345, SMB347 and SMB358
+
+maintainers:
+  - David Heidelberg <david@ixit.cz>
+  - Dmitry Osipenko <digetx@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - summit,smb345
+      - summit,smb347
+      - summit,smb358
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  monitored-battery:
+    description: phandle to the battery node
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  summit,enable-usb-charging:
+    type: boolean
+    description: Enable charging through USB.
+
+  summit,enable-otg-charging:
+    type: boolean
+    description: Provide power for USB OTG
+
+  summit,enable-mains-charging:
+    type: boolean
+    description: Enable charging through mains
+
+  summit,enable-charge-control:
+    description: Enable charging control
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum:
+      - 0 # SMB3XX_CHG_ENABLE_SW SW (I2C interface)
+      - 1 # SMB3XX_CHG_ENABLE_PIN_ACTIVE_LOW Pin control (Active Low)
+      - 2 # SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH Pin control (Active High)
+
+  summit,fast-voltage-threshold-microvolt:
+    description: Voltage threshold to transit to fast charge mode (in uV)
+    minimum: 2400000
+    maximum: 3000000
+
+  summit,mains-current-limit-microamp:
+    description: Maximum input current from AC/DC input (in uA)
+
+  summit,usb-current-limit-microamp:
+    description: Maximum input current from USB input (in uA)
+
+  summit,charge-current-compensation-microamp:
+    description: Charge current compensation (in uA)
+
+  summit,chip-temperature-threshold-celsius:
+    description: Chip temperature for thermal regulation in °C.
+    enum: [100, 110, 120, 130]
+
+  summit,soft-compensation-method:
+    description: Soft temperature limit compensation method
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum:
+      - 0 # SMB3XX_SOFT_TEMP_COMPENSATE_NONE Compensation none
+      - 1 # SMB3XX_SOFT_TEMP_COMPENSATE_CURRENT Current compensation
+      - 2 # SMB3XX_SOFT_TEMP_COMPENSATE_VOLTAGE Voltage compensation
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          enum:
+            - summit,smb345
+            - summit,smb358
+
+    then:
+      properties:
+        summit,mains-current-limit-microamp:
+          enum: [ 300000,  500000,  700000, 1000000,
+                 1500000, 1800000, 2000000]
+
+        summit,usb-current-limit-microamp:
+          enum: [ 300000,  500000,  700000, 1000000,
+                 1500000, 1800000, 2000000]
+
+        summit,charge-current-compensation-microamp:
+          enum: [200000, 450000, 600000, 900000]
+
+    else:
+      properties:
+        summit,mains-current-limit-microamp:
+          enum: [ 300000,  500000,  700000,  900000, 1200000,
+                 1500000, 1800000, 2000000, 2200000, 2500000]
+
+        summit,usb-current-limit-microamp:
+          enum: [ 300000,  500000,  700000,  900000, 1200000,
+                 1500000, 1800000, 2000000, 2200000, 2500000]
+
+        summit,charge-current-compensation-microamp:
+          enum: [250000, 700000, 900000, 1200000]
+
+required:
+  - compatible
+  - reg
+
+anyOf:
+  - required:
+      - summit,enable-usb-charging
+  - required:
+      - summit,enable-otg-charging
+  - required:
+      - summit,enable-mains-charging
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/power/summit,smb347-charger.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        charger@7f {
+            compatible = "summit,smb347";
+            reg = <0x7f>;
+
+            summit,enable-charge-control = <SMB3XX_CHG_ENABLE_PIN_ACTIVE_HIGH>;
+            summit,chip-temperature-threshold-celsius = <110>;
+            summit,mains-current-limit-microamp = <2000000>;
+            summit,usb-current-limit-microamp = <500000>;
+            summit,enable-usb-charging;
+            summit,enable-mains-charging;
+
+            monitored-battery = <&battery>;
+        };
+    };
+
+    battery: battery-cell {
+        compatible = "simple-battery";
+        constant-charge-current-max-microamp = <1800000>;
+        operating-range-celsius = <0 45>;
+        alert-celsius = <3 42>;
+    };
diff --git a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
index 44ba676..31edd051 100644
--- a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
+++ b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml
@@ -17,6 +17,7 @@
       - allwinner,sun8i-h3-ths
       - allwinner,sun8i-r40-ths
       - allwinner,sun50i-a64-ths
+      - allwinner,sun50i-a100-ths
       - allwinner,sun50i-h5-ths
       - allwinner,sun50i-h6-ths
 
@@ -61,7 +62,9 @@
       properties:
         compatible:
           contains:
-            const: allwinner,sun50i-h6-ths
+            enum:
+              - allwinner,sun50i-a100-ths
+              - allwinner,sun50i-h6-ths
 
     then:
       properties:
@@ -103,6 +106,7 @@
               - const: allwinner,sun8i-h3-ths
               - const: allwinner,sun8i-r40-ths
               - const: allwinner,sun50i-a64-ths
+              - const: allwinner,sun50i-a100-ths
               - const: allwinner,sun50i-h5-ths
               - const: allwinner,sun50i-h6-ths
 
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
index b1a55ae..f386f2a 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
@@ -20,6 +20,7 @@
     enum:
       - renesas,r8a774a1-thermal # RZ/G2M
       - renesas,r8a774b1-thermal # RZ/G2N
+      - renesas,r8a774e1-thermal # RZ/G2H
       - renesas,r8a7795-thermal  # R-Car H3
       - renesas,r8a7796-thermal  # R-Car M3-W
       - renesas,r8a77961-thermal # R-Car M3-W+
diff --git a/Documentation/filesystems/fuse.rst b/Documentation/filesystems/fuse.rst
index cd717f9..8120c3c 100644
--- a/Documentation/filesystems/fuse.rst
+++ b/Documentation/filesystems/fuse.rst
@@ -47,7 +47,7 @@
 using the sftp protocol.
 
 The userspace library and utilities are available from the
-`FUSE homepage: <http://fuse.sourceforge.net/>`_
+`FUSE homepage: <https://github.com/libfuse/>`_
 
 Filesystem type
 ===============
diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst
index 6c18bc8..6b213fe 100644
--- a/Documentation/filesystems/zonefs.rst
+++ b/Documentation/filesystems/zonefs.rst
@@ -326,6 +326,21 @@
 read-only zone discovered at run-time, as indicated in the previous section.
 The size of the zone file is left unchanged from its last updated value.
 
+A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
+the number of zones that can be active, that is, zones that are in the
+implicit open, explicit open or closed conditions.  This potential limitation
+translates into a risk for applications to see write IO errors due to this
+limit being exceeded if the zone of a file is not already active when a write
+request is issued by the user.
+
+To avoid these potential errors, the "explicit-open" mount option forces zones
+to be made active using an open zone command when a file is opened for writing
+for the first time. If the zone open command succeeds, the application is then
+guaranteed that write requests can be processed. Conversely, the
+"explicit-open" mount option will result in a zone close command being issued
+to the device on the last close() of a zone file if the zone is not full nor
+empty.
+
 Zonefs User Space Tools
 =======================
 
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 57047dc..1f9ea82 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -206,8 +206,8 @@
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
    :doc: pp_power_profile_mode
 
-*_busy_percent
-~~~~~~~~~~~~~~
+\*_busy_percent
+~~~~~~~~~~~~~~~
 
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
    :doc: gpu_busy_percent
diff --git a/MAINTAINERS b/MAINTAINERS
index b158ec4..17ca7c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4256,7 +4256,6 @@
 CISCO VIC LOW LATENCY NIC DRIVER
 M:	Christian Benvenuti <benve@cisco.com>
 M:	Nelson Escobar <neescoba@cisco.com>
-M:	Parvi Kaustubhi <pkaustub@cisco.com>
 S:	Supported
 F:	drivers/infiniband/hw/usnic/
 
@@ -5232,7 +5231,6 @@
 
 DMA-BUF HEAPS FRAMEWORK
 M:	Sumit Semwal <sumit.semwal@linaro.org>
-R:	Andrew F. Davis <afd@ti.com>
 R:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
 R:	Liam Mark <lmark@codeaurora.org>
 R:	Laura Abbott <labbott@redhat.com>
@@ -6653,13 +6651,6 @@
 S:	Maintained
 F:	drivers/iommu/exynos-iommu.c
 
-EZchip NPS platform support
-M:	Vineet Gupta <vgupta@synopsys.com>
-M:	Ofer Levi <oferle@nvidia.com>
-S:	Supported
-F:	arch/arc/boot/dts/eznps.dts
-F:	arch/arc/plat-eznps
-
 F2FS FILE SYSTEM
 M:	Jaegeuk Kim <jaegeuk@kernel.org>
 M:	Chao Yu <yuchao0@huawei.com>
@@ -7239,7 +7230,7 @@
 M:	Miklos Szeredi <miklos@szeredi.hu>
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
-W:	http://fuse.sourceforge.net/
+W:	https://github.com/libfuse/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
 F:	Documentation/filesystems/fuse.rst
 F:	fs/fuse/
@@ -7793,8 +7784,8 @@
 F:	include/uapi/linux/cciss*.h
 
 HFI1 DRIVER
-M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+M:	Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+M:	Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/hfi1
@@ -11613,6 +11604,7 @@
 L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/mips/mscc.txt
+F:	Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
 F:	arch/mips/boot/dts/mscc/
 F:	arch/mips/configs/generic/board-ocelot.config
 F:	arch/mips/generic/board-ocelot.c
@@ -12999,8 +12991,8 @@
 F:	drivers/char/hw_random/optee-rng.c
 
 OPA-VNIC DRIVER
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:	Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+M:	Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:	Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/ulp/opa_vnic
@@ -14301,8 +14293,8 @@
 F:	include/uapi/linux/qemu_fw_cfg.h
 
 QIB DRIVER
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
+M:	Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:	Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/qib/
@@ -14727,8 +14719,8 @@
 F:	drivers/net/ethernet/rdc/r6040.c
 
 RDMAVT - RDMA verbs software
-M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
-M:	Mike Marciniszyn <mike.marciniszyn@intel.com>
+M:	Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+M:	Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/sw/rdmavt
@@ -17399,7 +17391,7 @@
 F:	drivers/thermal/ti-soc-thermal/
 
 TI BQ27XXX POWER SUPPLY DRIVER
-R:	Andrew F. Davis <afd@ti.com>
+R:	Dan Murphy <dmurphy@ti.com>
 F:	drivers/power/supply/bq27xxx_battery.c
 F:	drivers/power/supply/bq27xxx_battery_i2c.c
 F:	include/linux/power/bq27xxx_battery.h
@@ -17673,8 +17665,9 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	Documentation/RCU/torture.rst
 F:	kernel/locking/locktorture.c
-F:	kernel/rcu/rcuperf.c
+F:	kernel/rcu/rcuscale.c
 F:	kernel/rcu/rcutorture.c
+F:	kernel/rcu/refscale.c
 F:	kernel/torture.c
 
 TOSHIBA ACPI EXTRAS DRIVER
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index ec8bed9..ee7b01b 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -479,3 +479,4 @@
 547	common	openat2				sys_openat2
 548	common	pidfd_getfd			sys_pidfd_getfd
 549	common	faccessat2			sys_faccessat2
+550	common	process_madvise			sys_process_madvise
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ba00c4e..19f8f23 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -96,8 +96,6 @@
 
 source "arch/arc/plat-tb10x/Kconfig"
 source "arch/arc/plat-axs10x/Kconfig"
-#New platform adds here
-source "arch/arc/plat-eznps/Kconfig"
 source "arch/arc/plat-hsdk/Kconfig"
 
 endmenu
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d00f8b8..0c6bf0d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -94,13 +94,8 @@
 core-y				+= arch/arc/plat-sim/
 core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
 core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
-core-$(CONFIG_ARC_PLAT_EZNPS)	+= arch/arc/plat-eznps/
 core-$(CONFIG_ARC_SOC_HSDK)	+= arch/arc/plat-hsdk/
 
-ifdef CONFIG_ARC_PLAT_EZNPS
-KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
-endif
-
 drivers-$(CONFIG_OPROFILE)	+= arch/arc/oprofile/
 
 libs-y		+= arch/arc/lib/ $(LIBGCC)
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 79ec27c..2a15160 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -91,7 +91,7 @@ arcpct0: pct {
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index ac8e1b4..cd1edcf 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -129,7 +129,7 @@ mmc@15000 {
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 9da21e7..7077938 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -135,7 +135,7 @@ mmc@15000 {
 	 * avoid duplicating the MB dtsi file given that IRQ from
 	 * this intc to cpu intc are different for axs101 and axs103
 	 */
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0x0 0xe0012000 0x0 0x200 >;
diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts
deleted file mode 100644
index a7e2e8d..0000000
--- a/arch/arc/boot/dts/eznps.dts
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-/dts-v1/;
-
-/ {
-	compatible = "ezchip,arc-nps";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	interrupt-parent = <&intc>;
-	present-cpus = "0-1,16-17";
-	possible-cpus = "0-4095";
-
-	aliases {
-		ethernet0 = &gmac0;
-	};
-
-	chosen {
-		bootargs = "earlycon=uart8250,mmio32be,0xf7209000,115200n8 console=ttyS0,115200n8";
-	};
-
-	memory {
-		device_type = "memory";
-		reg = <0x80000000 0x20000000>;	/* 512M */
-	};
-
-	clocks {
-		sysclk: sysclk {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <83333333>;
-		};
-	};
-
-	soc {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-
-		/* child and parent address space 1:1 mapped */
-		ranges;
-
-		intc: interrupt-controller {
-			compatible = "ezchip,nps400-ic";
-			interrupt-controller;
-			#interrupt-cells = <1>;
-		};
-
-		timer0: timer_clkevt {
-			compatible = "snps,arc-timer";
-			interrupts = <3>;
-			clocks = <&sysclk>;
-		};
-
-		timer1: timer_clksrc {
-			compatible = "ezchip,nps400-timer";
-			clocks = <&sysclk>;
-			clock-names="sysclk";
-		};
-
-		uart@f7209000 {
-			compatible = "snps,dw-apb-uart";
-			device_type = "serial";
-			reg = <0xf7209000 0x100>;
-			interrupts = <6>;
-			clocks = <&sysclk>;
-			clock-names="baudclk";
-			baud = <115200>;
-			reg-shift = <2>;
-			reg-io-width = <4>;
-			native-endian;
-		};
-
-		gmac0: ethernet@f7470000 {
-			compatible = "ezchip,nps-mgt-enet";
-			reg = <0xf7470000 0x1940>;
-			interrupts = <7>;
-			/* Filled in by U-Boot */
-			mac-address = [ 00 C0 00 F0 04 03 ];
-		};
-	};
-};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
index f8be7ba..c21d0eb 100644
--- a/arch/arc/boot/dts/vdk_axc003.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -46,7 +46,7 @@ debug_uart: dw-apb-uart@5000 {
 
 	};
 
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
index 0afa3e5..4d34885 100644
--- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -54,7 +54,7 @@ debug_uart: dw-apb-uart@5000 {
 
 	};
 
-	mb_intc: dw-apb-ictl@e0012000 {
+	mb_intc: interrupt-controller@e0012000 {
 		#interrupt-cells = <1>;
 		compatible = "snps,dw-apb-ictl";
 		reg = < 0xe0012000 0x200 >;
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
deleted file mode 100644
index f7a978d..0000000
--- a/arch/arc/configs/nps_defconfig
+++ /dev/null
@@ -1,80 +0,0 @@
-# CONFIG_LOCALVERSION_AUTO is not set
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_AIO is not set
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_ISA_ARCOMPACT=y
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_ARC_PLAT_EZNPS=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4096
-CONFIG_ARC_CACHE_LINE_SHIFT=5
-# CONFIG_ARC_CACHE_PAGES is not set
-# CONFIG_ARC_HAS_LLSC is not set
-CONFIG_ARC_KVADDR_SIZE=402
-CONFIG_ARC_EMUL_UNALIGNED=y
-CONFIG_PREEMPT=y
-CONFIG_NET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=1
-CONFIG_BLK_DEV_RAM_SIZE=2048
-CONFIG_NETDEVICES=y
-CONFIG_NETCONSOLE=y
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=1
-CONFIG_SERIAL_8250_RUNTIME_UARTS=1
-CONFIG_SERIAL_8250_DW=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_ROOT_NFS=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_ENABLE_DEFAULT_TRACERS=y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index c614857..5afc79c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -14,8 +14,6 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
 #define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
@@ -45,7 +43,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 * LLOCK/SCOND themselves don't provide any such semantics	\
 	 */								\
 	smp_mb();							\
 									\
@@ -71,7 +69,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 * LLOCK/SCOND themselves don't provide any such semantics	\
 	 */								\
 	smp_mb();							\
 									\
@@ -195,108 +193,6 @@ ATOMIC_OPS(andnot, &= ~, bic)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
 
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline int atomic_read(const atomic_t *v)
-{
-	int temp;
-
-	__asm__ __volatile__(
-	"	ld.di %0, [%1]"
-	: "=r"(temp)
-	: "r"(&v->counter)
-	: "memory");
-	return temp;
-}
-
-static inline void atomic_set(atomic_t *v, int i)
-{
-	__asm__ __volatile__(
-	"	st.di %0,[%1]"
-	:
-	: "r"(i), "r"(&v->counter)
-	: "memory");
-}
-
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
-{									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	:								\
-	: "r"(i), "r"(&v->counter), "i"(asm_op)				\
-	: "r2", "r3", "memory");					\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	temp c_op i;							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
-{									\
-	unsigned int temp = i;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(temp)							\
-	: "r"(&v->counter), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return temp;							\
-}
-
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
-#define atomic_sub(i, v) atomic_add(-(i), (v))
-#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)					\
-	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index 7823811..4637de9 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -27,7 +27,7 @@
 #define rmb()	asm volatile("dmb 1\n" : : : "memory")
 #define wmb()	asm volatile("dmb 2\n" : : : "memory")
 
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)  /* CONFIG_ISA_ARCOMPACT */
+#else
 
 /*
  * ARCompact based cores (ARC700) only have SYNC instruction which is super
@@ -37,13 +37,6 @@
 
 #define mb()	asm volatile("sync\n" : : : "memory")
 
-#else	/* CONFIG_ARC_PLAT_EZNPS */
-
-#include <plat/ctop.h>
-
-#define mb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-#define rmb()	asm volatile (".word %0" : : "i"(CTOP_INST_SCHD_RD) : "memory")
-
 #endif
 
 #include <asm-generic/barrier.h>
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 50eb3f6..c6606f4 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -85,7 +85,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	return (old & (1 << nr)) != 0;					\
 }
 
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
+#else /* !CONFIG_ARC_HAS_LLSC */
 
 /*
  * Non hardware assisted Atomic-R-M-W
@@ -136,55 +136,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *
 	return (old & (1UL << (nr & 0x1f))) != 0;			\
 }
 
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-#define BIT_OP(op, c_op, asm_op)					\
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	m += nr >> 5;							\
-									\
-	nr = (1UL << (nr & 0x1f));					\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		nr = ~nr;						\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"	.word %2\n"						\
-	:								\
-	: "r"(nr), "r"(m), "i"(asm_op)					\
-	: "r2", "r3", "memory");					\
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)					\
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{									\
-	unsigned long old;						\
-									\
-	m += nr >> 5;							\
-									\
-	nr = old = (1UL << (nr & 0x1f));				\
-	if (asm_op == CTOP_INST_AAND_DI_R2_R2_R3)			\
-		old = ~old;						\
-									\
-	/* Explicit full memory barrier needed before/after */		\
-	smp_mb();							\
-									\
-	__asm__ __volatile__(						\
-	"	mov r2, %0\n"						\
-	"	mov r3, %1\n"						\
-	"       .word %2\n"						\
-	"	mov %0, r2"						\
-	: "+r"(old)							\
-	: "r"(m), "i"(asm_op)						\
-	: "r2", "r3", "memory");					\
-									\
-	smp_mb();							\
-									\
-	return (old & nr) != 0;					\
-}
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
+#endif
 
 /***************************************
  * Non atomic variants
@@ -226,15 +178,9 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long
 	/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
 	__TEST_N_BIT_OP(op, c_op, asm_op)
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
 BIT_OPS(set, |, bset)
 BIT_OPS(clear, & ~, bclr)
 BIT_OPS(change, ^, bxor)
-#else
-BIT_OPS(set, |, CTOP_INST_AOR_DI_R2_R2_R3)
-BIT_OPS(clear, & ~, CTOP_INST_AAND_DI_R2_R2_R3)
-BIT_OPS(change, ^, CTOP_INST_AXOR_DI_R2_R2_R3)
-#endif
 
 /*
  * This routine doesn't need to be atomic.
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index c113981..9b87e16 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -20,7 +20,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 
 	/*
 	 * Explicit full memory barrier needed before/after as
-	 * LLOCK/SCOND thmeselves don't provide any such semantics
+	 * LLOCK/SCOND themselves don't provide any such semantics
 	 */
 	smp_mb();
 
@@ -41,7 +41,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 	return prev;
 }
 
-#elif !defined(CONFIG_ARC_PLAT_EZNPS)
+#else /* !CONFIG_ARC_HAS_LLSC */
 
 static inline unsigned long
 __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
@@ -61,33 +61,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 	return prev;
 }
 
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-	/*
-	 * Explicit full memory barrier needed before/after
-	 */
-	smp_mb();
-
-	write_aux_reg(CTOP_AUX_GPA1, expected);
-
-	__asm__ __volatile__(
-	"	mov r2, %0\n"
-	"	mov r3, %1\n"
-	"	.word %2\n"
-	"	mov %0, r2"
-	: "+r"(new)
-	: "r"(ptr), "i"(CTOP_INST_EXC_DI_R2_R2_R3)
-	: "r2", "r3", "memory");
-
-	smp_mb();
-
-	return new;
-}
-
-#endif /* CONFIG_ARC_HAS_LLSC */
+#endif
 
 #define cmpxchg(ptr, o, n) ({				\
 	(typeof(*(ptr)))__cmpxchg((ptr),		\
@@ -104,8 +78,6 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
-
 /*
  * xchg (reg with memory) based on "Native atomic" EX insn
  */
@@ -168,44 +140,6 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 
 #endif
 
-#else /* CONFIG_ARC_PLAT_EZNPS */
-
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
-{
-	extern unsigned long __xchg_bad_pointer(void);
-
-	switch (size) {
-	case 4:
-		/*
-		 * Explicit full memory barrier needed before/after
-		 */
-		smp_mb();
-
-		__asm__ __volatile__(
-		"	mov r2, %0\n"
-		"	mov r3, %1\n"
-		"	.word %2\n"
-		"	mov %0, r2\n"
-		: "+r"(val)
-		: "r"(ptr), "i"(CTOP_INST_XEX_DI_R2_R2_R3)
-		: "r2", "r3", "memory");
-
-		smp_mb();
-
-		return val;
-	}
-	return __xchg_bad_pointer();
-}
-
-#define xchg(ptr, with) ({				\
-	(typeof(*(ptr)))__xchg((unsigned long)(with),	\
-			       (ptr),			\
-			       sizeof(*(ptr)));		\
-})
-
-#endif /* CONFIG_ARC_PLAT_EZNPS */
-
 /*
  * "atomic" variant of xchg()
  * REQ: It needs to follow the same serialization rules as other atomic_xxx()
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index c3aa775..6dbf5cec 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -33,10 +33,6 @@
 #include <asm/irqflags-compact.h>
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
-
 /*--------------------------------------------------------------
  * Switch to Kernel Mode stack if SP points to User Mode stack
  *
@@ -189,12 +185,6 @@
 	PUSHAX	lp_start
 	PUSHAX	erbta
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
-#endif
-
 	lr	r10, [ecr]
 	st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
 .endm
@@ -211,11 +201,6 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro EXCEPTION_EPILOGUE
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	erbta
 	POPAX	lp_start
@@ -278,11 +263,6 @@
 	PUSHAX	lp_start
 	PUSHAX	bta_l\LVL\()
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	PUSHAX  CTOP_AUX_GPA1
-	PUSHAX  CTOP_AUX_EFLAGS
-#endif
 .endm
 
 /*--------------------------------------------------------------
@@ -295,11 +275,6 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro INTERRUPT_EPILOGUE  LVL
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	.word CTOP_INST_SCHD_RW
-	POPAX   CTOP_AUX_EFLAGS
-	POPAX   CTOP_AUX_GPA1
-#endif
 
 	POPAX	bta_l\LVL\()
 	POPAX	lp_start
@@ -327,13 +302,11 @@
 	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
-#ifndef CONFIG_ARC_PLAT_EZNPS
 /* Get CPU-ID of this core */
 .macro  GET_CPU_ID  reg
 	lr  \reg, [identity]
 	lsr \reg, \reg, 8
 	bmsk \reg, \reg, 7
 .endm
-#endif
 
 #endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 0fcea5b..e4031ec 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -17,13 +17,6 @@
 #include <asm/dsp.h>
 #include <asm/fpu.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-struct eznps_dp {
-	unsigned int eflags;
-	unsigned int gpa1;
-};
-#endif
-
 /* Arch specific stuff which needs to be saved per task.
  * However these items are not so important so as to earn a place in
  * struct thread_info
@@ -38,9 +31,6 @@ struct thread_struct {
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	struct eznps_dp dp;
-#endif
 };
 
 #define INIT_THREAD  {                          \
@@ -60,17 +50,8 @@ struct task_struct;
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
  */
-#ifndef CONFIG_EZNPS_MTM_EXT
-
 #define cpu_relax()		barrier()
 
-#else
-
-#define cpu_relax()     \
-	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
-
-#endif
-
 #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
@@ -118,25 +99,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 
 #define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-/* NPS architecture defines special window of 129M in user address space for
- * special memory areas, when accessing this window the MMU do not use TLB.
- * Instead MMU direct the access to:
- * 0x57f00000:0x57ffffff -- 1M of closely coupled memory (aka CMEM)
- * 0x58000000:0x5fffffff -- 16 huge pages, 8M each, with fixed map (aka FMTs)
- *
- * CMEM - is the fastest memory we got and its size is 16K.
- * FMT  - is used to map either to internal/external memory.
- * Internal memory is the second fast memory and its size is 16M
- * External memory is the biggest memory (16G) and also the slowest.
- *
- * STACK_TOP need to be PMD align (21bit) that is why we supply 0x57e00000.
- */
-#define STACK_TOP       0x57e00000
-#else
 #define STACK_TOP       TASK_SIZE
-#endif
-
 #define STACK_TOP_MAX   STACK_TOP
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 2fdb87a..4c3c9be 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -16,11 +16,6 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 struct pt_regs {
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-	unsigned long eflags;	/* Extended FLAGS */
-	unsigned long gpa1;	/* General Purpose Aux */
-#endif
-
 	/* Real registers */
 	unsigned long bta;	/* bta_l1, bta_l2, erbta */
 
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 61a97fe..01f8547 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -9,11 +9,7 @@
 #include <linux/types.h>
 #include <uapi/asm/setup.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#define COMMAND_LINE_SIZE 2048
-#else
 #define COMMAND_LINE_SIZE 256
-#endif
 
 /*
  * Data structure to map a ID to string
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 94bbed8..1928716 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -232,15 +232,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 	__asm__ __volatile__(
 	"1:	ex  %0, [%1]		\n"
-#ifdef CONFIG_EZNPS_MTM_EXT
-	"	.word %3		\n"
-#endif
 	"	breq  %0, %2, 1b	\n"
 	: "+&r" (val)
 	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
-#ifdef CONFIG_EZNPS_MTM_EXT
-	, "i"(CTOP_INST_SCHD_RW)
-#endif
 	: "memory");
 
 	smp_mb();
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 4a3d679..1f85de8 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -12,19 +12,10 @@
 #include <asm/dsp-impl.h>
 #include <asm/fpu.h>
 
-#ifdef CONFIG_ARC_PLAT_EZNPS
-extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
-#define ARC_EZNPS_DP_PREV(p, n)      dp_save_restore(p, n)
-#else
-#define ARC_EZNPS_DP_PREV(p, n)
-
-#endif /* !CONFIG_ARC_PLAT_EZNPS */
-
 struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 
 #define switch_to(prev, next, last)	\
 do {					\
-	ARC_EZNPS_DP_PREV(prev, next);	\
 	dsp_save_restore(prev, next);	\
 	fpu_save_restore(prev, next);	\
 	last = __switch_to(prev, next);\
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index e172c33..1a76f2d 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -14,9 +14,6 @@
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
-#ifdef CONFIG_ARC_PLAT_EZNPS
-#include <plat/ctop.h>
-#endif
 
 #define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
 
@@ -68,16 +65,9 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 #ifndef CONFIG_SMP
 		"st  %2, [@_current_task]	\n\t"
 #else
-#ifdef CONFIG_ARC_PLAT_EZNPS
-		"lr   r24, [%4]		\n\t"
-#ifndef CONFIG_EZNPS_MTM_EXT
-		"lsr  r24, r24, 4		\n\t"
-#endif
-#else
 		"lr   r24, [identity]		\n\t"
 		"lsr  r24, r24, 8		\n\t"
 		"bmsk r24, r24, 7		\n\t"
-#endif
 		"add2 r24, @_current_task, r24	\n\t"
 		"st   %2,  [r24]		\n\t"
 #endif
@@ -115,9 +105,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 
 		: "=r"(tmp)
 		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-#ifdef CONFIG_ARC_PLAT_EZNPS
-		, "i"(CTOP_AUX_LOGIC_GLOBAL_ID)
-#endif
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index fa86d13..721d465 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -29,8 +29,6 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
 	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
 		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
 		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
-	else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
-		arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
 	else
 		arc_base_baud = 50000000;	/* Fixed default 50MHz */
 }
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index efeba1f..37f724a 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -116,17 +116,6 @@ void arch_cpu_idle(void)
 		:"I"(arg)); /* can't be "r" has to be embedded const */
 }
 
-#elif defined(CONFIG_EZNPS_MTM_EXT)	/* ARC700 variant in NPS */
-
-void arch_cpu_idle(void)
-{
-	/* only the calling HW thread needs to sleep */
-	__asm__ __volatile__(
-		".word %0	\n"
-		:
-		:"i"(CTOP_INST_HWSCHD_WFT_IE12));
-}
-
 #else	/* ARC700 */
 
 void arch_cpu_idle(void)
@@ -278,10 +267,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
 	 */
 	regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
 
-#ifdef CONFIG_EZNPS_MTM_EXT
-	regs->eflags = 0;
-#endif
-
 	fpu_init_task(regs);
 
 	/* bogus seed values for debugging */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index eca35e0..52906d3 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -226,7 +226,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	}
 
 	if (!cpu_online(cpu)) {
-		pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu);
+		pr_info("Timeout: CPU%u FAILED to come up !!!\n", cpu);
 		return -1;
 	}
 
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 31f54bd..062fae4 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -281,13 +281,6 @@
 .macro COMMIT_ENTRY_TO_MMU
 #if (CONFIG_ARC_MMU_VER < 4)
 
-#ifdef CONFIG_EZNPS_MTM_EXT
-	/* verify if entry for this vaddr+ASID already exists */
-	sr    TLBProbe, [ARC_REG_TLBCOMMAND]
-	lr    r0, [ARC_REG_TLBINDEX]
-	bbit0 r0, 31, 88f
-#endif
-
 	/* Get free TLB slot: Set = computed from vaddr, way = random */
 	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
 
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
deleted file mode 100644
index a645bca..0000000
--- a/arch/arc/plat-eznps/Kconfig
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-#
-
-menuconfig ARC_PLAT_EZNPS
-	bool "\"EZchip\" ARC dev platform"
-	depends on ISA_ARCOMPACT
-	select CPU_BIG_ENDIAN
-	select CLKSRC_NPS if !PHYS_ADDR_T_64BIT
-	select EZNPS_GIC
-	select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
-	help
-	  Support for EZchip development platforms,
-	  based on ARC700 cores.
-	  We handle few flavors:
-	    - Hardware Emulator AKA HE which is FPGA based chassis
-	    - Simulator based on MetaWare nSIM
-	    - NPS400 chip based on ASIC
-
-config EZNPS_MTM_EXT
-	bool "ARC-EZchip MTM Extensions"
-	select CPUMASK_OFFSTACK
-	depends on ARC_PLAT_EZNPS && SMP
-	default y
-	help
-	  Here we add new hierarchy for CPUs topology.
-	  We got:
-	    Core
-	    Thread
-	  At the new thread level each CPU represent one HW thread.
-	  At highest hierarchy each core contain 16 threads,
-	  any of them seem like CPU from Linux point of view.
-	  All threads within same core share the execution unit of the
-	  core and HW scheduler round robin between them.
-
-config EZNPS_MEM_ERROR_ALIGN
-	bool "ARC-EZchip Memory error as an exception"
-	depends on EZNPS_MTM_EXT
-	default n
-	help
-	  On the real chip of the NPS, user memory errors are handled
-	  as a machine check exception, which is fatal, whereas on
-	  simulator platform for NPS, is handled as a Level 2 interrupt
-	  (just a stock ARC700) which is recoverable. This option makes
-	  simulator behave like hardware.
-
-config EZNPS_SHARED_AUX_REGS
-	bool "ARC-EZchip Shared Auxiliary Registers Per Core"
-	depends on ARC_PLAT_EZNPS
-	default y
-	help
-	  On the real chip of the NPS, auxiliary registers are shared between
-	  all the cpus of the core, whereas on simulator platform for NPS,
-	  each cpu has a different set of auxiliary registers. Configuration
-	  should be unset if auxiliary registers are not shared between the cpus
-	  of the core, so there will be a need to initialize them per cpu.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
deleted file mode 100644
index ebb97230..0000000
--- a/arch/arc/plat-eznps/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the linux kernel.
-#
-
-obj-y := entry.o platform.o ctop.o
-obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c
deleted file mode 100644
index b398e6e..0000000
--- a/arch/arc/plat-eznps/ctop.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <plat/ctop.h>
-
-void dp_save_restore(struct task_struct *prev, struct task_struct *next)
-{
-	struct eznps_dp *prev_task_dp = &prev->thread.dp;
-	struct eznps_dp *next_task_dp = &next->thread.dp;
-
-	/* Here we save all Data Plane related auxiliary registers */
-	prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS);
-	write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags);
-
-	prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1);
-	write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1);
-}
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
deleted file mode 100644
index 3f18c01..0000000
--- a/arch/arc/plat-eznps/entry.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*******************************************************************************
-
-  EZNPS CPU startup Code
-  Copyright(c) 2012 EZchip Technologies.
-
-
-*******************************************************************************/
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/cache.h>
-#include <plat/ctop.h>
-
-	.cpu A7
-
-	.section .init.text, "ax",@progbits
-	.align 1024	; HW requierment for restart first PC
-
-ENTRY(res_service)
-#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
-	; There is no work for HW thread id != 0
-	lr	r3, [CTOP_AUX_THREAD_ID]
-	cmp	r3, 0
-	jne	stext
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-	; With no cache coherency mechanism D$ need to be used very carefully.
-	; Address space:
-	; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES.
-	; 2G-3G: We disable D$ by setting this bit.
-	; 3G-4G: D$ is disabled by architecture.
-	; FMT are huge pages for user application reside at 0-2G.
-	; Only FMT left as one who can use D$ where each such page got
-	; disable/enable bit for cachability.
-	; Programmer will use FMT pages for private data so cache coherency
-	; would not be a problem.
-	; First thing we invalidate D$
-	sr	1, [ARC_REG_DC_IVDC]
-	sr	HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY]
-#endif
-
-#ifdef CONFIG_SMP
-	; We set logical cpuid to be used by GET_CPUID
-	; We do not use physical cpuid since we want ids to be continious when
-	; it comes to cpus on the same quad cluster.
-	; This is useful for applications that used shared resources of a quad
-	; cluster such SRAMS.
-	lr 	r3, [CTOP_AUX_CORE_ID]
-	sr	r3, [CTOP_AUX_LOGIC_CORE_ID]
-	lr	r3, [CTOP_AUX_CLUSTER_ID]
-	; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit)
-	; r3 is used since we use short instruction and we need q-class reg
-	.short	CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST
-	.word 	CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM
-	 sr	r3, [CTOP_AUX_LOGIC_CLUSTER_ID]
-#endif
-
-	j	stext
-END(res_service)
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
deleted file mode 100644
index 77712c5..0000000
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_CTOP_H
-#define _PLAT_EZNPS_CTOP_H
-
-#ifndef CONFIG_ARC_PLAT_EZNPS
-#error "Incorrect ctop.h include"
-#endif
-
-#include <linux/bits.h>
-#include <linux/types.h>
-#include <soc/nps/common.h>
-
-/* core auxiliary registers */
-#ifdef __ASSEMBLY__
-#define CTOP_AUX_BASE				(-0x800)
-#else
-#define CTOP_AUX_BASE				0xFFFFF800
-#endif
-
-#define CTOP_AUX_GLOBAL_ID			(CTOP_AUX_BASE + 0x000)
-#define CTOP_AUX_CLUSTER_ID			(CTOP_AUX_BASE + 0x004)
-#define CTOP_AUX_CORE_ID			(CTOP_AUX_BASE + 0x008)
-#define CTOP_AUX_THREAD_ID			(CTOP_AUX_BASE + 0x00C)
-#define CTOP_AUX_LOGIC_GLOBAL_ID		(CTOP_AUX_BASE + 0x010)
-#define CTOP_AUX_LOGIC_CLUSTER_ID		(CTOP_AUX_BASE + 0x014)
-#define CTOP_AUX_LOGIC_CORE_ID			(CTOP_AUX_BASE + 0x018)
-#define CTOP_AUX_MT_CTRL			(CTOP_AUX_BASE + 0x020)
-#define CTOP_AUX_HW_COMPLY			(CTOP_AUX_BASE + 0x024)
-#define CTOP_AUX_DPC				(CTOP_AUX_BASE + 0x02C)
-#define CTOP_AUX_LPC				(CTOP_AUX_BASE + 0x030)
-#define CTOP_AUX_EFLAGS				(CTOP_AUX_BASE + 0x080)
-#define CTOP_AUX_GPA1				(CTOP_AUX_BASE + 0x08C)
-#define CTOP_AUX_UDMC				(CTOP_AUX_BASE + 0x300)
-
-/* EZchip core instructions */
-#define CTOP_INST_HWSCHD_WFT_IE12		0x3E6F7344
-#define CTOP_INST_HWSCHD_OFF_R4			0x3C6F00BF
-#define CTOP_INST_HWSCHD_RESTORE_R4		0x3E6F7103
-#define CTOP_INST_SCHD_RW			0x3E6F7004
-#define CTOP_INST_SCHD_RD			0x3E6F7084
-#define CTOP_INST_ASRI_0_R3			0x3B56003E
-#define CTOP_INST_XEX_DI_R2_R2_R3		0x4A664C00
-#define CTOP_INST_EXC_DI_R2_R2_R3		0x4A664C01
-#define CTOP_INST_AADD_DI_R2_R2_R3		0x4A664C02
-#define CTOP_INST_AAND_DI_R2_R2_R3		0x4A664C04
-#define CTOP_INST_AOR_DI_R2_R2_R3		0x4A664C05
-#define CTOP_INST_AXOR_DI_R2_R2_R3		0x4A664C06
-
-/* Do not use D$ for address in 2G-3G */
-#define HW_COMPLY_KRN_NOT_D_CACHED		BIT(28)
-
-#define NPS_MSU_EN_CFG				0x80
-#define NPS_CRG_BLKID				0x480
-#define NPS_CRG_SYNC_BIT			BIT(0)
-#define NPS_GIM_BLKID				0x5C0
-
-/* GIM registers and fields*/
-#define NPS_GIM_UART_LINE			BIT(7)
-#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE	BIT(10)
-#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE	BIT(11)
-#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE	BIT(25)
-#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE	BIT(26)
-
-#ifndef __ASSEMBLY__
-/* Functional registers definition */
-struct nps_host_reg_mtm_cfg {
-	union {
-		struct {
-			u32 gen:1, gdis:1, clk_gate_dis:1, asb:1,
-			__reserved:9, nat:3, ten:16;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_mtm_cpu_cfg {
-	union {
-		struct {
-			u32 csa:22, dmsid:6, __reserved:3, cs:1;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_thr_init {
-	union {
-		struct {
-			u32 str:1, __reserved:27, thr_id:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_thr_init_sts {
-	union {
-		struct {
-			u32 bsy:1, err:1, __reserved:26, thr_id:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_msu_en_cfg {
-	union {
-		struct {
-			u32     __reserved1:11,
-			rtc_en:1, ipc_en:1, gim_1_en:1,
-			gim_0_en:1, ipi_en:1, buff_e_rls_bmuw:1,
-			buff_e_alc_bmuw:1, buff_i_rls_bmuw:1, buff_i_alc_bmuw:1,
-			buff_e_rls_bmue:1, buff_e_alc_bmue:1, buff_i_rls_bmue:1,
-			buff_i_alc_bmue:1, __reserved2:1, buff_e_pre_en:1,
-			buff_i_pre_en:1, pmuw_ja_en:1, pmue_ja_en:1,
-			pmuw_nj_en:1, pmue_nj_en:1, msu_en:1;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_gim_p_int_dst {
-	union {
-		struct {
-			u32 int_out_en:1, __reserved1:4,
-			is:1, intm:2, __reserved2:4,
-			nid:4, __reserved3:4, cid:4,
-			 __reserved4:4, tid:4;
-		};
-		u32 value;
-	};
-};
-
-/* AUX registers definition */
-struct nps_host_reg_aux_dpc {
-	union {
-		struct {
-			u32 ien:1, men:1, hen:1, reserved:29;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_udmc {
-	union {
-		struct {
-			u32 dcp:1, cme:1, __reserved:19, nat:3,
-			__reserved2:5, dcas:3;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_mt_ctrl {
-	union {
-		struct {
-			u32 mten:1, hsen:1, scd:1, sten:1,
-			st_cnt:8, __reserved:8,
-			hs_cnt:8, __reserved1:4;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_hw_comply {
-	union {
-		struct {
-			u32 me:1, le:1, te:1, knc:1, __reserved:28;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_aux_lpc {
-	union {
-		struct {
-			u32 mep:1, __reserved:31;
-		};
-		u32 value;
-	};
-};
-
-/* CRG registers */
-#define REG_GEN_PURP_0          nps_host_reg_non_cl(NPS_CRG_BLKID, 0x1BF)
-
-/* GIM registers */
-#define REG_GIM_P_INT_EN_0      nps_host_reg_non_cl(NPS_GIM_BLKID, 0x100)
-#define REG_GIM_P_INT_POL_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x110)
-#define REG_GIM_P_INT_SENS_0    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x114)
-#define REG_GIM_P_INT_BLK_0     nps_host_reg_non_cl(NPS_GIM_BLKID, 0x118)
-#define REG_GIM_P_INT_DST_10    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13A)
-#define REG_GIM_P_INT_DST_11    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x13B)
-#define REG_GIM_P_INT_DST_25    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x149)
-#define REG_GIM_P_INT_DST_26    nps_host_reg_non_cl(NPS_GIM_BLKID, 0x14A)
-
-#else
-
-.macro  GET_CPU_ID  reg
-	lr  \reg, [CTOP_AUX_LOGIC_GLOBAL_ID]
-#ifndef CONFIG_EZNPS_MTM_EXT
-	lsr \reg, \reg, 4
-#endif
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _PLAT_EZNPS_CTOP_H */
diff --git a/arch/arc/plat-eznps/include/plat/mtm.h b/arch/arc/plat-eznps/include/plat/mtm.h
deleted file mode 100644
index 7c55bec..0000000
--- a/arch/arc/plat-eznps/include/plat/mtm.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef _PLAT_EZNPS_MTM_H
-#define _PLAT_EZNPS_MTM_H
-
-#include <plat/ctop.h>
-
-static inline void *nps_mtm_reg_addr(u32 cpu, u32 reg)
-{
-	struct global_id gid;
-	u32 core, blkid;
-
-	gid.value = cpu;
-	core = gid.core;
-	blkid = (((core & 0x0C) << 2) | (core & 0x03));
-
-	return nps_host_reg(cpu, blkid, reg);
-}
-
-#ifdef CONFIG_EZNPS_MTM_EXT
-#define NPS_CPU_TO_THREAD_NUM(cpu) \
-	({ struct global_id gid; gid.value = cpu; gid.thread; })
-
-/* MTM registers */
-#define MTM_CFG(cpu)			nps_mtm_reg_addr(cpu, 0x81)
-#define MTM_THR_INIT(cpu)		nps_mtm_reg_addr(cpu, 0x92)
-#define MTM_THR_INIT_STS(cpu)		nps_mtm_reg_addr(cpu, 0x93)
-
-#define get_thread(map) map.thread
-#define eznps_max_cpus 4096
-#define eznps_cpus_per_cluster	256
-
-void mtm_enable_core(unsigned int cpu);
-int mtm_enable_thread(int cpu);
-#else /* !CONFIG_EZNPS_MTM_EXT */
-
-#define get_thread(map) 0
-#define eznps_max_cpus 256
-#define eznps_cpus_per_cluster	16
-#define mtm_enable_core(cpu)
-#define mtm_enable_thread(cpu) 1
-#define NPS_CPU_TO_THREAD_NUM(cpu) 0
-
-#endif /* CONFIG_EZNPS_MTM_EXT */
-
-#endif /* _PLAT_EZNPS_MTM_H */
diff --git a/arch/arc/plat-eznps/include/plat/smp.h b/arch/arc/plat-eznps/include/plat/smp.h
deleted file mode 100644
index e433f11..0000000
--- a/arch/arc/plat-eznps/include/plat/smp.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#ifndef __PLAT_EZNPS_SMP_H
-#define __PLAT_EZNPS_SMP_H
-
-#ifdef CONFIG_SMP
-
-extern void res_service(void);
-
-#endif /* CONFIG_SMP */
-
-#endif
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
deleted file mode 100644
index 3dcf5a9..0000000
--- a/arch/arc/plat-eznps/mtm.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/log2.h>
-#include <asm/arcregs.h>
-#include <plat/mtm.h>
-#include <plat/smp.h>
-
-#define MT_HS_CNT_MIN		0x01
-#define MT_HS_CNT_MAX		0xFF
-#define MT_CTRL_ST_CNT		0xF
-#define NPS_NUM_HW_THREADS	0x10
-
-static int mtm_hs_ctr = MT_HS_CNT_MAX;
-
-#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN
-int do_memory_error(unsigned long address, struct pt_regs *regs)
-{
-	die("Invalid Mem Access", regs, address);
-
-	return 1;
-}
-#endif
-
-static void mtm_init_nat(int cpu)
-{
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-	struct nps_host_reg_aux_udmc udmc;
-	int log_nat, nat = 0, i, t;
-
-	/* Iterate core threads and update nat */
-	for (i = 0, t = cpu; i < NPS_NUM_HW_THREADS; i++, t++)
-		nat += test_bit(t, cpumask_bits(cpu_possible_mask));
-
-	log_nat = ilog2(nat);
-
-	udmc.value = read_aux_reg(CTOP_AUX_UDMC);
-	udmc.nat = log_nat;
-	write_aux_reg(CTOP_AUX_UDMC, udmc.value);
-
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.nat = log_nat;
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-}
-
-static void mtm_init_thread(int cpu)
-{
-	int i, tries = 5;
-	struct nps_host_reg_thr_init thr_init;
-	struct nps_host_reg_thr_init_sts thr_init_sts;
-
-	/* Set thread init register */
-	thr_init.value = 0;
-	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
-	thr_init.thr_id = NPS_CPU_TO_THREAD_NUM(cpu);
-	thr_init.str = 1;
-	iowrite32be(thr_init.value, MTM_THR_INIT(cpu));
-
-	/* Poll till thread init is done */
-	for (i = 0; i < tries; i++) {
-		thr_init_sts.value = ioread32be(MTM_THR_INIT_STS(cpu));
-		if (thr_init_sts.thr_id == thr_init.thr_id) {
-			if (thr_init_sts.bsy)
-				continue;
-			else if (thr_init_sts.err)
-				pr_warn("Failed to thread init cpu %u\n", cpu);
-			break;
-		}
-
-		pr_warn("Wrong thread id in thread init for cpu %u\n", cpu);
-		break;
-	}
-
-	if (i == tries)
-		pr_warn("Got thread init timeout for cpu %u\n", cpu);
-}
-
-int mtm_enable_thread(int cpu)
-{
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) == 0)
-		return 1;
-
-	/* Enable thread in mtm */
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.ten |= (1 << (NPS_CPU_TO_THREAD_NUM(cpu)));
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
-	return 0;
-}
-
-void mtm_enable_core(unsigned int cpu)
-{
-	int i;
-	struct nps_host_reg_aux_mt_ctrl mt_ctrl;
-	struct nps_host_reg_mtm_cfg mtm_cfg;
-	struct nps_host_reg_aux_dpc dpc;
-
-	/*
-	 * Initializing dpc register in each CPU.
-	 * Overwriting the init value of the DPC
-	 * register so that CMEM and FMT virtual address
-	 * spaces are accessible, and Data Plane HW
-	 * facilities are enabled.
-	 */
-	dpc.ien = 1;
-	dpc.men = 1;
-	write_aux_reg(CTOP_AUX_DPC, dpc.value);
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
-		return;
-
-	/* Initialize Number of Active Threads */
-	mtm_init_nat(cpu);
-
-	/* Initialize mtm_cfg */
-	mtm_cfg.value = ioread32be(MTM_CFG(cpu));
-	mtm_cfg.ten = 1;
-	iowrite32be(mtm_cfg.value, MTM_CFG(cpu));
-
-	/* Initialize all other threads in core */
-	for (i = 1; i < NPS_NUM_HW_THREADS; i++)
-		mtm_init_thread(cpu + i);
-
-
-	/* Enable HW schedule, stall counter, mtm */
-	mt_ctrl.value = 0;
-	mt_ctrl.hsen = 1;
-	mt_ctrl.hs_cnt = mtm_hs_ctr;
-	mt_ctrl.mten = 1;
-	write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
-
-	/*
-	 * HW scheduling mechanism will start working
-	 * Only after call to instruction "schd.rw".
-	 * cpu_relax() calls "schd.rw" instruction.
-	 */
-	cpu_relax();
-}
-
-/* Verify and set the value of the mtm hs counter */
-static int __init set_mtm_hs_ctr(char *ctr_str)
-{
-	int hs_ctr;
-	int ret;
-
-	ret = kstrtoint(ctr_str, 0, &hs_ctr);
-
-	if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
-		pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
-		       hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX);
-		return -EINVAL;
-	}
-
-	mtm_hs_ctr = hs_ctr;
-
-	return 0;
-}
-early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr);
diff --git a/arch/arc/plat-eznps/platform.c b/arch/arc/plat-eznps/platform.c
deleted file mode 100644
index 6de2fe8..0000000
--- a/arch/arc/plat-eznps/platform.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/mach_desc.h>
-#include <plat/mtm.h>
-
-static void __init eznps_configure_msu(void)
-{
-	int cpu;
-	struct nps_host_reg_msu_en_cfg msu_en_cfg = {.value = 0};
-
-	msu_en_cfg.msu_en = 1;
-	msu_en_cfg.ipi_en = 1;
-	msu_en_cfg.gim_0_en = 1;
-	msu_en_cfg.gim_1_en = 1;
-
-	/* enable IPI and GIM messages on all clusters */
-	for (cpu = 0 ; cpu < eznps_max_cpus; cpu += eznps_cpus_per_cluster)
-		iowrite32be(msu_en_cfg.value,
-			    nps_host_reg(cpu, NPS_MSU_BLKID, NPS_MSU_EN_CFG));
-}
-
-static void __init eznps_configure_gim(void)
-{
-	u32 reg_value;
-	u32 gim_int_lines;
-	struct nps_host_reg_gim_p_int_dst gim_p_int_dst = {.value = 0};
-
-	gim_int_lines = NPS_GIM_UART_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
-	gim_int_lines |= NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE;
-
-	/*
-	 * IRQ polarity
-	 * low or high level
-	 * negative or positive edge
-	 */
-	reg_value = ioread32be(REG_GIM_P_INT_POL_0);
-	reg_value &= ~gim_int_lines;
-	iowrite32be(reg_value, REG_GIM_P_INT_POL_0);
-
-	/* IRQ type level or edge */
-	reg_value = ioread32be(REG_GIM_P_INT_SENS_0);
-	reg_value |= NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE;
-	reg_value |= NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE;
-	iowrite32be(reg_value, REG_GIM_P_INT_SENS_0);
-
-	/*
-	 * GIM interrupt select type for
-	 * dbg_lan TX and RX interrupts
-	 * should be type 1
-	 * type 0 = IRQ line 6
-	 * type 1 = IRQ line 7
-	 */
-	gim_p_int_dst.is = 1;
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_10);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_11);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_25);
-	iowrite32be(gim_p_int_dst.value, REG_GIM_P_INT_DST_26);
-
-	/*
-	 * CTOP IRQ lines should be defined
-	 * as blocking in GIM
-	*/
-	iowrite32be(gim_int_lines, REG_GIM_P_INT_BLK_0);
-
-	/* enable CTOP IRQ lines in GIM */
-	iowrite32be(gim_int_lines, REG_GIM_P_INT_EN_0);
-}
-
-static void __init eznps_early_init(void)
-{
-	eznps_configure_msu();
-	eznps_configure_gim();
-}
-
-static const char *eznps_compat[] __initconst = {
-	"ezchip,arc-nps",
-	NULL,
-};
-
-MACHINE_START(NPS, "nps")
-	.dt_compat	= eznps_compat,
-	.init_early	= eznps_early_init,
-MACHINE_END
diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c
deleted file mode 100644
index f119cb7..0000000
--- a/arch/arc/plat-eznps/smp.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright(c) 2015 EZchip Technologies.
- */
-
-#include <linux/smp.h>
-#include <linux/of_fdt.h>
-#include <linux/io.h>
-#include <linux/irqdomain.h>
-#include <asm/irq.h>
-#include <plat/ctop.h>
-#include <plat/smp.h>
-#include <plat/mtm.h>
-
-#define NPS_DEFAULT_MSID	0x34
-#define NPS_MTM_CPU_CFG		0x90
-
-static char smp_cpuinfo_buf[128] = {"Extn [EZNPS-SMP]\t: On\n"};
-
-/* Get cpu map from device tree */
-static int __init eznps_get_map(const char *name, struct cpumask *cpumask)
-{
-	unsigned long dt_root = of_get_flat_dt_root();
-	const char *buf;
-
-	buf = of_get_flat_dt_prop(dt_root, name, NULL);
-	if (!buf)
-		return 1;
-
-	cpulist_parse(buf, cpumask);
-
-	return 0;
-}
-
-/* Update board cpu maps */
-static void __init eznps_init_cpumasks(void)
-{
-	struct cpumask cpumask;
-
-	if (eznps_get_map("present-cpus", &cpumask)) {
-		pr_err("Failed to get present-cpus from dtb");
-		return;
-	}
-	init_cpu_present(&cpumask);
-
-	if (eznps_get_map("possible-cpus", &cpumask)) {
-		pr_err("Failed to get possible-cpus from dtb");
-		return;
-	}
-	init_cpu_possible(&cpumask);
-}
-
-static void eznps_init_core(unsigned int cpu)
-{
-	u32 sync_value;
-	struct nps_host_reg_aux_hw_comply hw_comply;
-	struct nps_host_reg_aux_lpc lpc;
-
-	if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
-		return;
-
-	hw_comply.value = read_aux_reg(CTOP_AUX_HW_COMPLY);
-	hw_comply.me  = 1;
-	hw_comply.le  = 1;
-	hw_comply.te  = 1;
-	write_aux_reg(CTOP_AUX_HW_COMPLY, hw_comply.value);
-
-	/* Enable MMU clock */
-	lpc.mep = 1;
-	write_aux_reg(CTOP_AUX_LPC, lpc.value);
-
-	/* Boot CPU only */
-	if (!cpu) {
-		/* Write to general purpose register in CRG */
-		sync_value = ioread32be(REG_GEN_PURP_0);
-		sync_value |= NPS_CRG_SYNC_BIT;
-		iowrite32be(sync_value, REG_GEN_PURP_0);
-	}
-}
-
-/*
- * Master kick starting another CPU
- */
-static void __init eznps_smp_wakeup_cpu(int cpu, unsigned long pc)
-{
-	struct nps_host_reg_mtm_cpu_cfg cpu_cfg;
-
-	if (mtm_enable_thread(cpu) == 0)
-		return;
-
-	/* set PC, dmsid, and start CPU */
-	cpu_cfg.value = (u32)res_service;
-	cpu_cfg.dmsid = NPS_DEFAULT_MSID;
-	cpu_cfg.cs = 1;
-	iowrite32be(cpu_cfg.value, nps_mtm_reg_addr(cpu, NPS_MTM_CPU_CFG));
-}
-
-static void eznps_ipi_send(int cpu)
-{
-	struct global_id gid;
-	struct {
-		union {
-			struct {
-				u32 num:8, cluster:8, core:8, thread:8;
-			};
-			u32 value;
-		};
-	} ipi;
-
-	gid.value = cpu;
-	ipi.thread = get_thread(gid);
-	ipi.core = gid.core;
-	ipi.cluster = nps_cluster_logic_to_phys(gid.cluster);
-	ipi.num = NPS_IPI_IRQ;
-
-	__asm__ __volatile__(
-	"	mov r3, %0\n"
-	"	.word %1\n"
-	:
-	: "r"(ipi.value), "i"(CTOP_INST_ASRI_0_R3)
-	: "r3");
-}
-
-static void eznps_init_per_cpu(int cpu)
-{
-	smp_ipi_irq_setup(cpu, NPS_IPI_IRQ);
-
-	eznps_init_core(cpu);
-	mtm_enable_core(cpu);
-}
-
-struct plat_smp_ops plat_smp_ops = {
-	.info		= smp_cpuinfo_buf,
-	.init_early_smp	= eznps_init_cpumasks,
-	.cpu_kick	= eznps_smp_wakeup_cpu,
-	.ipi_send	= eznps_ipi_send,
-	.init_per_cpu	= eznps_init_per_cpu,
-};
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
index ce81018..6b5c545 100644
--- a/arch/arc/plat-hsdk/Kconfig
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -8,5 +8,6 @@
 	select ARC_HAS_ACCL_REGS
 	select ARC_IRQ_NO_AUTOSAVE
 	select CLK_HSDK
+	select RESET_CONTROLLER
 	select RESET_HSDK
 	select HAVE_PCI
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 80000a6..87912e5 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1546,6 +1546,17 @@
 	bool
 	depends on ARCH_SIRF
 
+config DEBUG_UART_FLOW_CONTROL
+	bool "Enable flow control (CTS) for the debug UART"
+	depends on DEBUG_LL
+	default y if ARCH_EBSA110 || DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC
+	help
+	  Some UART ports are connected to terminals that will use modem
+	  control signals to indicate whether they are ready to receive text.
+	  In practice this means that the terminal is asserting the special
+	  control signal CTS (Clear To Send). If your debug UART supports
+	  this and your debug terminal will require it, enable this option.
+
 config DEBUG_LL_INCLUDE
 	string
 	default "debug/sa1100.S" if DEBUG_SA1100
@@ -1893,11 +1904,6 @@
 	  except for having a different register layout.  Say Y here if
 	  the debug UART is of this type.
 
-config DEBUG_UART_8250_FLOW_CONTROL
-	bool "Enable flow control for 8250 UART"
-	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
-	default y if ARCH_EBSA110 || DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC
-
 config DEBUG_UNCOMPRESS
 	bool "Enable decompressor debugging via DEBUG_LL output"
 	depends on ARCH_MULTIPLATFORM || PLAT_SAMSUNG || ARM_SINGLE_ARMV7M
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e589da3..c430143 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -143,6 +143,9 @@
 
 # Text offset. This list is sorted numerically by address in order to
 # provide a means to avoid/resolve conflicts in multi-arch kernels.
+# Note: the 32kB below this value is reserved for use by the kernel
+# during boot, and this offset is critical to the functioning of
+# kexec-tools.
 textofs-y	:= 0x00008000
 # We don't want the htc bootloader to corrupt kernel during resume
 textofs-$(CONFIG_PM_H1940)      := 0x00108000
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 58028ab..47f001c 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -7,11 +7,11 @@
 
 OBJS		=
 
-AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
 HEAD	= head.o
 OBJS	+= misc.o decompress.o
 ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y)
 OBJS	+= debug.o
+AFLAGS_head.o += -DDEBUG
 endif
 FONTC	= $(srctree)/lib/fonts/font_acorn_8x8.c
 
@@ -68,7 +68,12 @@
 ZBSSADDR	:= ALIGN(8)
 endif
 
+MALLOC_SIZE	:= 65536
+
+AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET) -DMALLOC_SIZE=$(MALLOC_SIZE)
 CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
+CPPFLAGS_vmlinux.lds += -DTEXT_OFFSET="$(TEXT_OFFSET)"
+CPPFLAGS_vmlinux.lds += -DMALLOC_SIZE="$(MALLOC_SIZE)"
 
 compress-$(CONFIG_KERNEL_GZIP) = gzip
 compress-$(CONFIG_KERNEL_LZO)  = lzo
diff --git a/arch/arm/boot/compressed/debug.S b/arch/arm/boot/compressed/debug.S
index 6bf2917..fac40a7 100644
--- a/arch/arm/boot/compressed/debug.S
+++ b/arch/arm/boot/compressed/debug.S
@@ -8,7 +8,10 @@
 
 ENTRY(putc)
 	addruart r1, r2, r3
-	waituart r3, r1
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+	waituartcts r3, r1
+#endif
+	waituarttxrdy r3, r1
 	senduart r0, r1
 	busyuart r3, r1
 	mov	 pc, lr
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 434a169..2e04ec5 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -28,19 +28,19 @@
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
 		.macro	loadsp, rb, tmp1, tmp2
 		.endm
-		.macro	writeb, ch, rb
+		.macro	writeb, ch, rb, tmp
 		mcr	p14, 0, \ch, c0, c5, 0
 		.endm
 #elif defined(CONFIG_CPU_XSCALE)
 		.macro	loadsp, rb, tmp1, tmp2
 		.endm
-		.macro	writeb, ch, rb
+		.macro	writeb, ch, rb, tmp
 		mcr	p14, 0, \ch, c8, c0, 0
 		.endm
 #else
 		.macro	loadsp, rb, tmp1, tmp2
 		.endm
-		.macro	writeb, ch, rb
+		.macro	writeb, ch, rb, tmp
 		mcr	p14, 0, \ch, c1, c0, 0
 		.endm
 #endif
@@ -49,8 +49,13 @@
 
 #include CONFIG_DEBUG_LL_INCLUDE
 
-		.macro	writeb,	ch, rb
+		.macro	writeb,	ch, rb, tmp
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+		waituartcts \tmp, \rb
+#endif
+		waituarttxrdy \tmp, \rb
 		senduart \ch, \rb
+		busyuart \tmp, \rb
 		.endm
 
 #if defined(CONFIG_ARCH_SA1100)
@@ -81,42 +86,11 @@
 		bl	phex
 		.endm
 
-		.macro	debug_reloc_start
-#ifdef DEBUG
-		kputc	#'\n'
-		kphex	r6, 8		/* processor id */
-		kputc	#':'
-		kphex	r7, 8		/* architecture id */
-#ifdef CONFIG_CPU_CP15
-		kputc	#':'
-		mrc	p15, 0, r0, c1, c0
-		kphex	r0, 8		/* control reg */
-#endif
-		kputc	#'\n'
-		kphex	r5, 8		/* decompressed kernel start */
-		kputc	#'-'
-		kphex	r9, 8		/* decompressed kernel end  */
-		kputc	#'>'
-		kphex	r4, 8		/* kernel execution address */
-		kputc	#'\n'
-#endif
-		.endm
-
-		.macro	debug_reloc_end
-#ifdef DEBUG
-		kphex	r5, 8		/* end of kernel */
-		kputc	#'\n'
-		mov	r0, r4
-		bl	memdump		/* dump 256 bytes at start of kernel */
-#endif
-		.endm
-
 		/*
 		 * Debug kernel copy by printing the memory addresses involved
 		 */
 		.macro dbgkc, begin, end, cbegin, cend
 #ifdef DEBUG
-		kputc   #'\n'
 		kputc   #'C'
 		kputc   #':'
 		kputc   #'0'
@@ -136,7 +110,28 @@
 		kputc	#'x'
 		kphex	\cend, 8	/* End of kernel copy */
 		kputc	#'\n'
-		kputc	#'\r'
+#endif
+		.endm
+
+		/*
+		 * Debug print of the final appended DTB location
+		 */
+		.macro dbgadtb, begin, end
+#ifdef DEBUG
+		kputc   #'D'
+		kputc   #'T'
+		kputc   #'B'
+		kputc   #':'
+		kputc   #'0'
+		kputc   #'x'
+		kphex   \begin, 8	/* Start of appended DTB */
+		kputc	#' '
+		kputc	#'('
+		kputc	#'0'
+		kputc	#'x'
+		kphex	\end, 8		/* End of appended DTB */
+		kputc	#')'
+		kputc	#'\n'
 #endif
 		.endm
 
@@ -303,7 +298,7 @@
 
 #ifndef CONFIG_ZBOOT_ROM
 		/* malloc space is above the relocated stack (64k max) */
-		add	r10, sp, #0x10000
+		add	r10, sp, #MALLOC_SIZE
 #else
 		/*
 		 * With ZBOOT_ROM the bss/stack is non relocatable,
@@ -357,6 +352,7 @@
 		mov	r5, r5, ror #8
 		eor	r5, r5, r1, lsr #8
 #endif
+		dbgadtb	r6, r5
 		/* 50% DTB growth should be good enough */
 		add	r5, r5, r5, lsr #1
 		/* preserve 64-bit alignment */
@@ -614,7 +610,7 @@
  */
 		mov	r0, r4
 		mov	r1, sp			@ malloc space above stack
-		add	r2, sp, #0x10000	@ 64k max
+		add	r2, sp, #MALLOC_SIZE	@ 64k max
 		mov	r3, r7
 		bl	decompress_kernel
 
@@ -1356,7 +1352,7 @@
 1:		ldrb	r2, [r0], #1
 		teq	r2, #0
 		moveq	pc, lr
-2:		writeb	r2, r3
+2:		writeb	r2, r3, r1
 		mov	r1, #0x00020000
 3:		subs	r1, r1, #1
 		bne	3b
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index b914be3..1bcb68a 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -44,10 +44,12 @@
   }
   .table : ALIGN(4) {
     _table_start = .;
-    LONG(ZIMAGE_MAGIC(4))
+    LONG(ZIMAGE_MAGIC(6))
     LONG(ZIMAGE_MAGIC(0x5a534c4b))
     LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start))
     LONG(ZIMAGE_MAGIC(_kernel_bss_size))
+    LONG(ZIMAGE_MAGIC(TEXT_OFFSET))
+    LONG(ZIMAGE_MAGIC(MALLOC_SIZE))
     LONG(0)
     _table_end = .;
   }
diff --git a/arch/arm/include/debug/8250.S b/arch/arm/include/debug/8250.S
index e4a036f..e3692a37c 100644
--- a/arch/arm/include/debug/8250.S
+++ b/arch/arm/include/debug/8250.S
@@ -45,10 +45,11 @@
 		bne	1002b
 		.endm
 
-		.macro	waituart,rd,rx
-#ifdef CONFIG_DEBUG_UART_8250_FLOW_CONTROL
+		.macro	waituarttxrdy,rd,rx
+		.endm
+
+		.macro	waituartcts,rd,rx
 1001:		load	\rd, [\rx, #UART_MSR << UART_SHIFT]
 		tst	\rd, #UART_MSR_CTS
 		beq	1001b
-#endif
 		.endm
diff --git a/arch/arm/include/debug/asm9260.S b/arch/arm/include/debug/asm9260.S
index 0da1eb6..5a0ce14 100644
--- a/arch/arm/include/debug/asm9260.S
+++ b/arch/arm/include/debug/asm9260.S
@@ -11,7 +11,10 @@
 		ldr	\rv, = CONFIG_DEBUG_UART_VIRT
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituarttxrdy,rd,rx
+		.endm
+
+		.macro	waituartcts,rd,rx
 		.endm
 
 		.macro	senduart,rd,rx
diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S
index 6c91cba..1772282 100644
--- a/arch/arm/include/debug/at91.S
+++ b/arch/arm/include/debug/at91.S
@@ -19,12 +19,15 @@
 	strb	\rd, [\rx, #(AT91_DBGU_THR)]		@ Write to Transmitter Holding Register
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituarttxrdy,rd,rx
 1001:	ldr	\rd, [\rx, #(AT91_DBGU_SR)]		@ Read Status Register
 	tst	\rd, #AT91_DBGU_TXRDY			@ DBGU_TXRDY = 1 when ready to transmit
 	beq	1001b
 	.endm
 
+	.macro	waituartcts,rd,rx
+	.endm
+
 	.macro	busyuart,rd,rx
 1001:	ldr	\rd, [\rx, #(AT91_DBGU_SR)]		@ Read Status Register
 	tst	\rd, #AT91_DBGU_TXEMPTY			@ DBGU_TXEMPTY = 1 when transmission complete
diff --git a/arch/arm/include/debug/bcm63xx.S b/arch/arm/include/debug/bcm63xx.S
index 06a8962..da65abb 100644
--- a/arch/arm/include/debug/bcm63xx.S
+++ b/arch/arm/include/debug/bcm63xx.S
@@ -17,12 +17,15 @@
 	strb	\rd, [\rx, #UART_FIFO_REG]
 	.endm
 
-	.macro	waituart, rd, rx
+	.macro	waituarttxrdy, rd, rx
 1001:	ldr	\rd, [\rx, #UART_IR_REG]
 	tst	\rd, #(1 << UART_IR_TXEMPTY)
 	beq	1001b
 	.endm
 
+	.macro	waituartcts, rd, rx
+	.endm
+
 	.macro	busyuart, rd, rx
 1002:	ldr	\rd, [\rx, #UART_IR_REG]
 	tst	\rd, #(1 << UART_IR_TXTRESH)
diff --git a/arch/arm/include/debug/brcmstb.S b/arch/arm/include/debug/brcmstb.S
index 132a20c..7ffe669 100644
--- a/arch/arm/include/debug/brcmstb.S
+++ b/arch/arm/include/debug/brcmstb.S
@@ -142,7 +142,10 @@
 		bne	1002b
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituarttxrdy,rd,rx
+		.endm
+
+		.macro	waituartcts,rd,rx
 		.endm
 
 /*
diff --git a/arch/arm/include/debug/clps711x.S b/arch/arm/include/debug/clps711x.S
index 774a67a..a983d12 100644
--- a/arch/arm/include/debug/clps711x.S
+++ b/arch/arm/include/debug/clps711x.S
@@ -20,7 +20,10 @@
 	ldr	\rp, =CLPS711X_UART_PADDR
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 	.endm
 
 	.macro	senduart,rd,rx
diff --git a/arch/arm/include/debug/dc21285.S b/arch/arm/include/debug/dc21285.S
index d7e8c71..4ec0e5e 100644
--- a/arch/arm/include/debug/dc21285.S
+++ b/arch/arm/include/debug/dc21285.S
@@ -34,5 +34,8 @@
 		bne	1001b
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 		.endm
diff --git a/arch/arm/include/debug/digicolor.S b/arch/arm/include/debug/digicolor.S
index 256f5f4..443674c 100644
--- a/arch/arm/include/debug/digicolor.S
+++ b/arch/arm/include/debug/digicolor.S
@@ -21,7 +21,10 @@
 		strb	\rd, [\rx, #UA0_EMI_REC]
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 		.endm
 
 	.macro	busyuart,rd,rx
diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S
index 5ed5028..b0083d6 100644
--- a/arch/arm/include/debug/efm32.S
+++ b/arch/arm/include/debug/efm32.S
@@ -29,7 +29,10 @@
 		strb	\rd, [\rx, #UARTn_TXDATA]
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 1001:		ldr	\rd, [\rx, #UARTn_STATUS]
 		tst	\rd, #UARTn_STATUS_TXBL
 		beq	1001b
diff --git a/arch/arm/include/debug/icedcc.S b/arch/arm/include/debug/icedcc.S
index 74a0dd0..d5e65da 100644
--- a/arch/arm/include/debug/icedcc.S
+++ b/arch/arm/include/debug/icedcc.S
@@ -23,7 +23,10 @@
 		beq	1001b
 		.endm
 
-		.macro	waituart, rd, rx
+		.macro	waituartcts, rd, rx
+		.endm
+
+		.macro	waituarttxrdy, rd, rx
 		mov	\rd, #0x2000000
 1001:
 		subs	\rd, \rd, #1
@@ -47,7 +50,10 @@
 		beq	1001b
 		.endm
 
-		.macro	waituart, rd, rx
+		.macro	waituartcts, rd, rx
+		.endm
+
+		.macro	waituarttxrdy, rd, rx
 		mov	\rd, #0x10000000
 1001:
 		subs	\rd, \rd, #1
@@ -72,7 +78,10 @@
 
 		.endm
 
-		.macro	waituart, rd, rx
+		.macro	waituartcts, rd, rx
+		.endm
+
+		.macro	waituarttxrdy, rd, rx
 		mov	\rd, #0x2000000
 1001:
 		subs	\rd, \rd, #1
diff --git a/arch/arm/include/debug/imx.S b/arch/arm/include/debug/imx.S
index 1c1b9d1..bb7b9550 100644
--- a/arch/arm/include/debug/imx.S
+++ b/arch/arm/include/debug/imx.S
@@ -35,7 +35,10 @@
 		str	\rd, [\rx, #0x40]	@ TXDATA
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 		.endm
 
 		.macro	busyuart,rd,rx
diff --git a/arch/arm/include/debug/meson.S b/arch/arm/include/debug/meson.S
index 1e501a00..7b60e44 100644
--- a/arch/arm/include/debug/meson.S
+++ b/arch/arm/include/debug/meson.S
@@ -25,7 +25,10 @@
 	beq	1002b
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 1001:	ldr	\rd, [\rx, #MESON_AO_UART_STATUS]
 	tst	\rd, #MESON_AO_UART_TX_FIFO_FULL
 	bne	1001b
diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S
index 9405b71..530edc7 100644
--- a/arch/arm/include/debug/msm.S
+++ b/arch/arm/include/debug/msm.S
@@ -17,7 +17,10 @@
 	str	\rd, [\rx, #0x70]
 	.endm
 
-	.macro	waituart, rd, rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy, rd, rx
 	@ check for TX_EMT in UARTDM_SR
 	ldr	\rd, [\rx, #0x08]
 ARM_BE8(rev     \rd, \rd )
diff --git a/arch/arm/include/debug/omap2plus.S b/arch/arm/include/debug/omap2plus.S
index b5696a3..0680be6 100644
--- a/arch/arm/include/debug/omap2plus.S
+++ b/arch/arm/include/debug/omap2plus.S
@@ -75,5 +75,8 @@
 		bne	1001b
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 		.endm
diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
index a2a553a..0c7bfa4 100644
--- a/arch/arm/include/debug/pl01x.S
+++ b/arch/arm/include/debug/pl01x.S
@@ -26,7 +26,10 @@
 		strb	\rd, [\rx, #UART01x_DR]
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 1001:		ldr	\rd, [\rx, #UART01x_FR]
  ARM_BE8(	rev	\rd, \rd )
 		tst	\rd, #UART01x_FR_TXFF
diff --git a/arch/arm/include/debug/renesas-scif.S b/arch/arm/include/debug/renesas-scif.S
index 25f0666..8e433e9 100644
--- a/arch/arm/include/debug/renesas-scif.S
+++ b/arch/arm/include/debug/renesas-scif.S
@@ -33,7 +33,10 @@
 	ldr	\rv, =SCIF_VIRT
 	.endm
 
-	.macro	waituart, rd, rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy, rd, rx
 1001:	ldrh	\rd, [\rx, #FSR]
 	tst	\rd, #TDFE
 	beq	1001b
diff --git a/arch/arm/include/debug/sa1100.S b/arch/arm/include/debug/sa1100.S
index 6109e60..7968ea5 100644
--- a/arch/arm/include/debug/sa1100.S
+++ b/arch/arm/include/debug/sa1100.S
@@ -51,7 +51,10 @@
 		str	\rd, [\rx, #UTDR]
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 1001:		ldr	\rd, [\rx, #UTSR1]
 		tst	\rd, #UTSR1_TNF
 		beq	1001b
diff --git a/arch/arm/include/debug/samsung.S b/arch/arm/include/debug/samsung.S
index 69201d7..ab474d5 100644
--- a/arch/arm/include/debug/samsung.S
+++ b/arch/arm/include/debug/samsung.S
@@ -69,7 +69,10 @@
 1002:		@ exit busyuart
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 		ldr	\rd, [\rx, # S3C2410_UFCON]
 ARM_BE8(rev \rd, \rd)
 		tst	\rd, #S3C2410_UFCON_FIFOMODE	@ fifo enabled?
diff --git a/arch/arm/include/debug/sirf.S b/arch/arm/include/debug/sirf.S
index e73e4de..3612c7b 100644
--- a/arch/arm/include/debug/sirf.S
+++ b/arch/arm/include/debug/sirf.S
@@ -29,7 +29,10 @@
 	.macro	busyuart,rd,rx
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 1001:	ldr	\rd, [\rx, #SIRF_LLUART_TXFIFO_STATUS]
 	tst	\rd, #SIRF_LLUART_TXFIFO_EMPTY
 	beq	1001b
diff --git a/arch/arm/include/debug/sti.S b/arch/arm/include/debug/sti.S
index 6b42c91..72d0525 100644
--- a/arch/arm/include/debug/sti.S
+++ b/arch/arm/include/debug/sti.S
@@ -45,7 +45,10 @@
                 strb    \rd, [\rx, #ASC_TX_BUF_OFF]
                 .endm
 
-                .macro  waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+                .macro  waituarttxrdy,rd,rx
 1001:           ldr     \rd, [\rx, #ASC_STA_OFF]
                 tst     \rd, #ASC_STA_TX_FULL
                 bne     1001b
diff --git a/arch/arm/include/debug/stm32.S b/arch/arm/include/debug/stm32.S
index f3c4a37..b6d9df3 100644
--- a/arch/arm/include/debug/stm32.S
+++ b/arch/arm/include/debug/stm32.S
@@ -27,7 +27,10 @@
 	strb    \rd, [\rx, #STM32_USART_TDR_OFF]
 .endm
 
-.macro  waituart,rd,rx
+.macro	waituartcts,rd,rx
+.endm
+
+.macro  waituarttxrdy,rd,rx
 1001:	ldr	\rd, [\rx, #(STM32_USART_SR_OFF)]	@ Read Status Register
 	tst	\rd, #STM32_USART_TXE			@ TXE = 1 = tx empty
 	beq	1001b
diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S
index 2148d0f..98daa7f 100644
--- a/arch/arm/include/debug/tegra.S
+++ b/arch/arm/include/debug/tegra.S
@@ -178,15 +178,16 @@
 1002:
 		.endm
 
-		.macro	waituart, rd, rx
-#ifdef FLOW_CONTROL
+		.macro	waituartcts, rd, rx
 		cmp	\rx, #0
 		beq	1002f
 1001:		ldrb	\rd, [\rx, #UART_MSR << UART_SHIFT]
 		tst	\rd, #UART_MSR_CTS
 		beq	1001b
 1002:
-#endif
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 		.endm
 
 /*
diff --git a/arch/arm/include/debug/vf.S b/arch/arm/include/debug/vf.S
index 854d9bd..035bcbf 100644
--- a/arch/arm/include/debug/vf.S
+++ b/arch/arm/include/debug/vf.S
@@ -29,5 +29,8 @@
 	beq	1001b			@ wait until transmit done
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 	.endm
diff --git a/arch/arm/include/debug/vt8500.S b/arch/arm/include/debug/vt8500.S
index 8dc1df2..d01094f 100644
--- a/arch/arm/include/debug/vt8500.S
+++ b/arch/arm/include/debug/vt8500.S
@@ -28,7 +28,10 @@
 	bne	1001b
 	.endm
 
-	.macro	waituart,rd,rx
+	.macro	waituartcts,rd,rx
+	.endm
+
+	.macro	waituarttxrdy,rd,rx
 	.endm
 
 #endif
diff --git a/arch/arm/include/debug/zynq.S b/arch/arm/include/debug/zynq.S
index 58d77c9..5d42cc3 100644
--- a/arch/arm/include/debug/zynq.S
+++ b/arch/arm/include/debug/zynq.S
@@ -33,7 +33,10 @@
 		strb	\rd, [\rx, #UART_FIFO_OFFSET]	@ TXDATA
 		.endm
 
-		.macro	waituart,rd,rx
+		.macro	waituartcts,rd,rx
+		.endm
+
+		.macro	waituarttxrdy,rd,rx
 1001:		ldr	\rd, [\rx, #UART_SR_OFFSET]
 ARM_BE8(	rev	\rd, \rd )
 		tst	\rd, #UART_SR_TXEMPTY
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index e112072..d92f44b 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -89,11 +89,18 @@
 2:		teq     r1, #'\n'
 		bne	3f
 		mov	r1, #'\r'
-		waituart r2, r3
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+		waituartcts r2, r3
+#endif
+		waituarttxrdy r2, r3
 		senduart r1, r3
 		busyuart r2, r3
 		mov	r1, #'\n'
-3:		waituart r2, r3
+3:
+#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
+		waituartcts r2, r3
+#endif
+		waituarttxrdy r2, r3
 		senduart r1, r3
 		busyuart r2, r3
 		b	1b
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7a4853b..08660ae 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -683,6 +683,40 @@ static void disable_single_step(struct perf_event *bp)
 	arch_install_hw_breakpoint(bp);
 }
 
+/*
+ * Arm32 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See this same function in the arm64 platform code, which has the same
+ * problem.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
+					struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	u32 wp_low, wp_high;
+	u32 lens, lene;
+
+	lens = __ffs(ctrl->len);
+	lene = __fls(ctrl->len);
+
+	wp_low = val + lens;
+	wp_high = val + lene;
+	if (addr < wp_low)
+		return wp_low - addr;
+	else if (addr > wp_high)
+		return addr - wp_high;
+	else
+		return 0;
+}
+
 static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
 				       struct arch_hw_breakpoint *info)
 {
@@ -692,23 +726,25 @@ static int watchpoint_fault_on_uaccess(struct pt_regs *regs,
 static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 			       struct pt_regs *regs)
 {
-	int i, access;
-	u32 val, ctrl_reg, alignment_mask;
+	int i, access, closest_match = 0;
+	u32 min_dist = -1, dist;
+	u32 val, ctrl_reg;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
 	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = this_cpu_ptr(wp_on_reg);
 
+	/*
+	 * Find all watchpoints that match the reported address. If no exact
+	 * match is found. Attribute the hit to the closest watchpoint.
+	 */
+	rcu_read_lock();
 	for (i = 0; i < core_num_wrps; ++i) {
-		rcu_read_lock();
-
 		wp = slots[i];
-
 		if (wp == NULL)
-			goto unlock;
+			continue;
 
-		info = counter_arch_bp(wp);
 		/*
 		 * The DFAR is an unknown value on debug architectures prior
 		 * to 7.1. Since we only allow a single watchpoint on these
@@ -717,33 +753,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 */
 		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
 			BUG_ON(i > 0);
+			info = counter_arch_bp(wp);
 			info->trigger = wp->attr.bp_addr;
 		} else {
-			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
-				alignment_mask = 0x7;
-			else
-				alignment_mask = 0x3;
-
-			/* Check if the watchpoint value matches. */
-			val = read_wb_reg(ARM_BASE_WVR + i);
-			if (val != (addr & ~alignment_mask))
-				goto unlock;
-
-			/* Possible match, check the byte address select. */
-			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
-			decode_ctrl_reg(ctrl_reg, &ctrl);
-			if (!((1 << (addr & alignment_mask)) & ctrl.len))
-				goto unlock;
-
 			/* Check that the access type matches. */
 			if (debug_exception_updates_fsr()) {
 				access = (fsr & ARM_FSR_ACCESS_MASK) ?
 					  HW_BREAKPOINT_W : HW_BREAKPOINT_R;
 				if (!(access & hw_breakpoint_type(wp)))
-					goto unlock;
+					continue;
 			}
 
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			dist = get_distance_from_watchpoint(addr, val, &ctrl);
+			if (dist < min_dist) {
+				min_dist = dist;
+				closest_match = i;
+			}
+			/* Is this an exact match? */
+			if (dist != 0)
+				continue;
+
 			/* We have a winner. */
+			info = counter_arch_bp(wp);
 			info->trigger = addr;
 		}
 
@@ -765,13 +799,23 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 * we can single-step over the watchpoint trigger.
 		 */
 		if (!is_default_overflow_handler(wp))
-			goto unlock;
-
+			continue;
 step:
 		enable_single_step(wp, instruction_pointer(regs));
-unlock:
-		rcu_read_unlock();
 	}
+
+	if (min_dist > 0 && min_dist != -1) {
+		/* No exact match found. */
+		wp = slots[closest_match];
+		info = counter_arch_bp(wp);
+		info->trigger = addr;
+		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+		perf_bp_event(wp, regs);
+		if (is_default_overflow_handler(wp))
+			enable_single_step(wp, instruction_pointer(regs));
+	}
+
+	rcu_read_unlock();
 }
 
 static void watchpoint_single_step_handler(unsigned long pc)
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 10768869..a20ba12 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -306,7 +306,7 @@ static struct davinci_nand_pdata da830_evm_nand_pdata = {
 	.core_chipsel	= 1,
 	.parts		= da830_evm_nand_partitions,
 	.nr_parts	= ARRAY_SIZE(da830_evm_nand_partitions),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits	= 4,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.bbt_td		= &da830_evm_nand_bbt_main_descr,
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 6751292..4280126 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -239,7 +239,7 @@ static struct davinci_nand_pdata da850_evm_nandflash_data = {
 	.core_chipsel	= 1,
 	.parts		= da850_evm_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(da850_evm_nandflash_partition),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits	= 4,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.timing		= &da850_evm_nandflash_timing,
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index 5113273..3c5a9e3 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -82,7 +82,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.mask_chipsel		= BIT(14),
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
-	.ecc_mode		= NAND_ECC_HW,
+	.engine_type		= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 	.ecc_bits		= 4,
 };
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index b9e9950..e475b21 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -76,7 +76,8 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.mask_chipsel		= BIT(14),
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
-	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.engine_type		= NAND_ECC_ENGINE_TYPE_ON_HOST,
+	.ecc_placement		= NAND_ECC_PLACEMENT_INTERLEAVED,
 	.ecc_bits		= 4,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 };
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 2328b15..bdf31eb 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -146,7 +146,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.mask_chipsel		= BIT(14),
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
-	.ecc_mode		= NAND_ECC_HW,
+	.engine_type		= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 	.ecc_bits		= 4,
 };
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a5d3708..bcb3c40 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -162,7 +162,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = {
 	.core_chipsel	= 0,
 	.parts		= davinci_evm_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_evm_nandflash_partition),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.timing		= &davinci_evm_nandflash_timing,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index dd7d60f..8319a60 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -91,7 +91,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.mask_ale 		= 0x40000,
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
-	.ecc_mode		= NAND_ECC_HW,
+	.engine_type		= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits		= 1,
 	.options		= 0,
 };
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 3382b93..5205008 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -432,7 +432,7 @@ static struct davinci_nand_pdata mityomapl138_nandflash_data = {
 	.core_chipsel	= 1,
 	.parts		= mityomapl138_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(mityomapl138_nandflash_partition),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.options	= NAND_BUSWIDTH_16,
 	.ecc_bits	= 1, /* 4 bit mode is not supported with 16 bit NAND */
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 6cf46bb..b4843f6 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -90,7 +90,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = {
 	.core_chipsel	= 0,
 	.parts		= davinci_ntosd2_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 };
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index 6c79039..88df801 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -206,7 +206,7 @@ static struct davinci_nand_pdata omapl138_hawk_nandflash_data = {
 	.core_chipsel	= 1,
 	.parts		= omapl138_hawk_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(omapl138_hawk_nandflash_partition),
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 	.ecc_bits	= 4,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.options	= NAND_BUSWIDTH_16,
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 3d2c108..4317097 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -369,6 +369,15 @@ static struct pxaficp_platform_data tosa_ficp_platform_data = {
 /*
  * Tosa AC IN
  */
+static struct gpiod_lookup_table tosa_power_gpiod_table = {
+	.dev_id = "gpio-charger",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_AC_IN,
+			    NULL, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static char *tosa_ac_supplied_to[] = {
 	"main-battery",
 	"backup-battery",
@@ -378,8 +387,6 @@ static char *tosa_ac_supplied_to[] = {
 static struct gpio_charger_platform_data tosa_power_data = {
 	.name			= "charger",
 	.type			= POWER_SUPPLY_TYPE_MAINS,
-	.gpio			= TOSA_GPIO_AC_IN,
-	.gpio_active_low	= 1,
 	.supplied_to		= tosa_ac_supplied_to,
 	.num_supplicants	= ARRAY_SIZE(tosa_ac_supplied_to),
 };
@@ -951,6 +958,7 @@ static void __init tosa_init(void)
 	clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL);
 
 	gpiod_add_lookup_table(&tosa_udc_gpiod_table);
+	gpiod_add_lookup_table(&tosa_power_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
diff --git a/arch/arm/mach-s3c24xx/common-smdk.c b/arch/arm/mach-s3c24xx/common-smdk.c
index 75064df..121646a 100644
--- a/arch/arm/mach-s3c24xx/common-smdk.c
+++ b/arch/arm/mach-s3c24xx/common-smdk.c
@@ -191,7 +191,7 @@ static struct s3c2410_platform_nand smdk_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(smdk_nand_sets),
 	.sets		= smdk_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* devices we initialise */
diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c
index 072966d..2832624 100644
--- a/arch/arm/mach-s3c24xx/mach-anubis.c
+++ b/arch/arm/mach-s3c24xx/mach-anubis.c
@@ -218,7 +218,7 @@ static struct s3c2410_platform_nand __initdata anubis_nand_info = {
 	.nr_sets	= ARRAY_SIZE(anubis_nand_sets),
 	.sets		= anubis_nand_sets,
 	.select_chip	= anubis_nand_select,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* IDE channels */
diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c
index 58c5ef3..04dedeb 100644
--- a/arch/arm/mach-s3c24xx/mach-at2440evb.c
+++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c
@@ -109,7 +109,7 @@ static struct s3c2410_platform_nand __initdata at2440evb_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(at2440evb_nand_sets),
 	.sets		= at2440evb_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c
index a7c3955..6465eab 100644
--- a/arch/arm/mach-s3c24xx/mach-bast.c
+++ b/arch/arm/mach-s3c24xx/mach-bast.c
@@ -294,7 +294,7 @@ static struct s3c2410_platform_nand __initdata bast_nand_info = {
 	.nr_sets	= ARRAY_SIZE(bast_nand_sets),
 	.sets		= bast_nand_sets,
 	.select_chip	= bast_nand_select,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* DM9000 */
diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c
index 526fd09..7327481 100644
--- a/arch/arm/mach-s3c24xx/mach-gta02.c
+++ b/arch/arm/mach-s3c24xx/mach-gta02.c
@@ -417,7 +417,7 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = {
 	.twrph1		= 15,
 	.nr_sets	= ARRAY_SIZE(gta02_nand_sets),
 	.sets		= gta02_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index 885e8f1..8233dcf 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -228,7 +228,7 @@ static struct s3c2410_platform_nand __initdata jive_nand_info = {
 	.twrph1		= 40,
 	.sets		= jive_nand_sets,
 	.nr_sets	= ARRAY_SIZE(jive_nand_sets),
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static int __init jive_mtdset(char *options)
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index 2357494..057dcba 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -296,7 +296,7 @@ static struct s3c2410_platform_nand mini2440_nand_info __initdata = {
 	.nr_sets	= ARRAY_SIZE(mini2440_nand_sets),
 	.sets		= mini2440_nand_sets,
 	.ignore_unset_ecc = 1,
-	.ecc_mode	= NAND_ECC_HW,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_ON_HOST,
 };
 
 /* DM9000AEP 10/100 ethernet controller */
diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c
index ee3630c..1574488 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris.c
@@ -234,7 +234,7 @@ static struct s3c2410_platform_nand __initdata osiris_nand_info = {
 	.nr_sets	= ARRAY_SIZE(osiris_nand_sets),
 	.sets		= osiris_nand_sets,
 	.select_chip	= osiris_nand_select,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* PCMCIA control and configuration */
diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c
index ff9e319..f3131d9 100644
--- a/arch/arm/mach-s3c24xx/mach-qt2410.c
+++ b/arch/arm/mach-s3c24xx/mach-qt2410.c
@@ -287,7 +287,7 @@ static struct s3c2410_platform_nand __initdata qt2410_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(qt2410_nand_sets),
 	.sets		= qt2410_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 /* UDC */
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index e9806bf..3645b9c 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -620,7 +620,7 @@ static struct s3c2410_platform_nand rx1950_nand_info = {
 	.twrph1 = 15,
 	.nr_sets = ARRAY_SIZE(rx1950_nand_sets),
 	.sets = rx1950_nand_sets,
-	.ecc_mode = NAND_ECC_SOFT,
+	.engine_type = NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c
index 995f1ff..017010d 100644
--- a/arch/arm/mach-s3c24xx/mach-rx3715.c
+++ b/arch/arm/mach-s3c24xx/mach-rx3715.c
@@ -158,7 +158,7 @@ static struct s3c2410_platform_nand __initdata rx3715_nand_info = {
 	.twrph1		= 15,
 	.nr_sets	= ARRAY_SIZE(rx3715_nand_sets),
 	.sets		= rx3715_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct platform_device *rx3715_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index d76b28b..c5fa215 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -112,7 +112,7 @@ static struct s3c2410_platform_nand __initdata vstms_nand_info = {
 	.twrph1		= 20,
 	.nr_sets	= ARRAY_SIZE(vstms_nand_sets),
 	.sets		= vstms_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct platform_device *vstms_devices[] __initdata = {
diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c
index e708021..0d9acaf 100644
--- a/arch/arm/mach-s3c64xx/mach-hmt.c
+++ b/arch/arm/mach-s3c64xx/mach-hmt.c
@@ -199,7 +199,7 @@ static struct s3c2410_platform_nand hmt_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(hmt_nand_sets),
 	.sets		= hmt_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct gpio_led hmt_leds[] = {
diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index 0dd36ae..6fbb578 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -136,7 +136,7 @@ static struct s3c2410_platform_nand mini6410_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(mini6410_nand_sets),
 	.sets		= mini6410_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct s3c_fb_pd_win mini6410_lcd_type0_fb_win = {
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index 0ff88b6..1e98e53 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -188,7 +188,7 @@ static struct s3c2410_platform_nand real6410_nand_info = {
 	.twrph1		= 40,
 	.nr_sets	= ARRAY_SIZE(real6410_nand_sets),
 	.sets		= real6410_nand_sets,
-	.ecc_mode       = NAND_ECC_SOFT,
+	.engine_type	= NAND_ECC_ENGINE_TYPE_SOFT,
 };
 
 static struct platform_device *real6410_devices[] __initdata = {
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 3cc2b71..bd3a52f 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -30,6 +30,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/power/gpio-charger.h>
 
 #include <video/sa1100fb.h>
@@ -131,16 +132,23 @@ static struct irda_platform_data collie_ir_data = {
 /*
  * Collie AC IN
  */
+static struct gpiod_lookup_table collie_power_gpiod_table = {
+	.dev_id = "gpio-charger",
+	.table = {
+		GPIO_LOOKUP("gpio", COLLIE_GPIO_AC_IN,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static char *collie_ac_supplied_to[] = {
 	"main-battery",
 	"backup-battery",
 };
 
-
 static struct gpio_charger_platform_data collie_power_data = {
 	.name			= "charger",
 	.type			= POWER_SUPPLY_TYPE_MAINS,
-	.gpio			= COLLIE_GPIO_AC_IN,
 	.supplied_to		= collie_ac_supplied_to,
 	.num_supplicants	= ARRAY_SIZE(collie_ac_supplied_to),
 };
@@ -386,6 +394,8 @@ static void __init collie_init(void)
 
 	platform_scoop_config = &collie_pcmcia_config;
 
+	gpiod_add_lookup_table(&collie_power_gpiod_table);
+
 	ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	if (ret) {
 		printk(KERN_WARNING "collie: Unable to register LoCoMo device\n");
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 12c26eb..43d91bf 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1249,20 +1249,28 @@ static void __init l2c310_of_parse(const struct device_node *np,
 
 	ret = of_property_read_u32(np, "prefetch-data", &val);
 	if (ret == 0) {
-		if (val)
+		if (val) {
 			prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
-		else
+			*aux_val |= L310_PREFETCH_CTRL_DATA_PREFETCH;
+		} else {
 			prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+			*aux_val &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+		}
+		*aux_mask &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
 	} else if (ret != -EINVAL) {
 		pr_err("L2C-310 OF prefetch-data property value is missing\n");
 	}
 
 	ret = of_property_read_u32(np, "prefetch-instr", &val);
 	if (ret == 0) {
-		if (val)
+		if (val) {
 			prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
-		else
+			*aux_val |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
+		} else {
 			prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+			*aux_val &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+		}
+		*aux_mask &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
 	} else if (ret != -EINVAL) {
 		pr_err("L2C-310 OF prefetch-instr property value is missing\n");
 	}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 698cc74..ab69250 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -17,7 +17,6 @@
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
-#include <asm/sections.h>
 #include <asm/cachetype.h>
 #include <asm/fixmap.h>
 #include <asm/sections.h>
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 171077c..d056a54 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -453,3 +453,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3b85959..b3b2019 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		440
+#define __NR_compat_syscalls		441
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 2a3ad9b..107f08e 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -887,6 +887,8 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 81901c5..c89bd5f 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -40,7 +40,7 @@
 endif
 obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
 
-obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
+obj-$(CONFIG_ELF_CORE)		+= elfcore.o
 
 # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 4799c96..b96ed8b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -360,3 +360,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 93bbb74..c830705 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -31,6 +31,7 @@
 	select NO_DMA if !MMU && !COLDFIRE
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
+	select UACCESS_MEMCPY if !MMU
 	select VIRT_TO_BUS
 
 config CPU_BIG_ENDIAN
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index 9ef4ec0..59f7dfe 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -554,7 +554,7 @@ static struct platform_device mcf_edma = {
 };
 #endif /* IS_ENABLED(CONFIG_MCF_EDMA) */
 
-#if IS_ENABLED(CONFIG_MMC)
+#ifdef MCFSDHC_BASE
 static struct mcf_esdhc_platform_data mcf_esdhc_data = {
 	.max_bus_width = 4,
 	.cd_type = ESDHC_CD_NONE,
@@ -579,7 +579,7 @@ static struct platform_device mcf_esdhc = {
 	.resource		= mcf_esdhc_resources,
 	.dev.platform_data	= &mcf_esdhc_data,
 };
-#endif /* IS_ENABLED(CONFIG_MMC) */
+#endif /* MCFSDHC_BASE */
 
 static struct platform_device *mcf_devices[] __initdata = {
 	&mcf_uart,
@@ -613,7 +613,7 @@ static struct platform_device *mcf_devices[] __initdata = {
 #if IS_ENABLED(CONFIG_MCF_EDMA)
 	&mcf_edma,
 #endif
-#if IS_ENABLED(CONFIG_MMC)
+#ifdef MCFSDHC_BASE
 	&mcf_esdhc,
 #endif
 };
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index e896466..f98208c 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -1,7 +1,397 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __uClinux__
-#include <asm/uaccess_no.h>
-#else
-#include <asm/uaccess_mm.h>
-#endif
+#ifndef __M68K_UACCESS_H
+#define __M68K_UACCESS_H
+
+#ifdef CONFIG_MMU
+
+/*
+ * User space memory access functions
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/segment.h>
 #include <asm/extable.h>
+
+/* We let the MMU do all checking */
+static inline int access_ok(const void __user *addr,
+			    unsigned long size)
+{
+	return 1;
+}
+
+/*
+ * Not all varients of the 68k family support the notion of address spaces.
+ * The traditional 680x0 parts do, and they use the sfc/dfc registers and
+ * the "moves" instruction to access user space from kernel space. Other
+ * family members like ColdFire don't support this, and only have a single
+ * address space, and use the usual "move" instruction for user space access.
+ *
+ * Outside of this difference the user space access functions are the same.
+ * So lets keep the code simple and just define in what we need to use.
+ */
+#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
+#define	MOVES	"moves"
+#else
+#define	MOVES	"move"
+#endif
+
+extern int __put_user_bad(void);
+extern int __get_user_bad(void);
+
+#define __put_user_asm(res, x, ptr, bwl, reg, err)	\
+asm volatile ("\n"					\
+	"1:	"MOVES"."#bwl"	%2,%1\n"		\
+	"2:\n"						\
+	"	.section .fixup,\"ax\"\n"		\
+	"	.even\n"				\
+	"10:	moveq.l	%3,%0\n"			\
+	"	jra 2b\n"				\
+	"	.previous\n"				\
+	"\n"						\
+	"	.section __ex_table,\"a\"\n"		\
+	"	.align	4\n"				\
+	"	.long	1b,10b\n"			\
+	"	.long	2b,10b\n"			\
+	"	.previous"				\
+	: "+d" (res), "=m" (*(ptr))			\
+	: #reg (x), "i" (err))
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define __put_user(x, ptr)						\
+({									\
+	typeof(*(ptr)) __pu_val = (x);					\
+	int __pu_err = 0;						\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof (*(ptr))) {					\
+	case 1:								\
+		__put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT);	\
+		break;							\
+	case 2:								\
+		__put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT);	\
+		break;							\
+	case 4:								\
+		__put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT);	\
+		break;							\
+	case 8:								\
+ 	    {								\
+ 		const void __user *__pu_ptr = (ptr);			\
+		asm volatile ("\n"					\
+			"1:	"MOVES".l	%2,(%1)+\n"		\
+			"2:	"MOVES".l	%R2,(%1)\n"		\
+			"3:\n"						\
+			"	.section .fixup,\"ax\"\n"		\
+			"	.even\n"				\
+			"10:	movel %3,%0\n"				\
+			"	jra 3b\n"				\
+			"	.previous\n"				\
+			"\n"						\
+			"	.section __ex_table,\"a\"\n"		\
+			"	.align 4\n"				\
+			"	.long 1b,10b\n"				\
+			"	.long 2b,10b\n"				\
+			"	.long 3b,10b\n"				\
+			"	.previous"				\
+			: "+d" (__pu_err), "+a" (__pu_ptr)		\
+			: "r" (__pu_val), "i" (-EFAULT)			\
+			: "memory");					\
+		break;							\
+	    }								\
+	default:							\
+		__pu_err = __put_user_bad();				\
+		break;							\
+	}								\
+	__pu_err;							\
+})
+#define put_user(x, ptr)	__put_user(x, ptr)
+
+
+#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({		\
+	type __gu_val;							\
+	asm volatile ("\n"						\
+		"1:	"MOVES"."#bwl"	%2,%1\n"			\
+		"2:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.even\n"					\
+		"10:	move.l	%3,%0\n"				\
+		"	sub.l	%1,%1\n"				\
+		"	jra	2b\n"					\
+		"	.previous\n"					\
+		"\n"							\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	4\n"					\
+		"	.long	1b,10b\n"				\
+		"	.previous"					\
+		: "+d" (res), "=&" #reg (__gu_val)			\
+		: "m" (*(ptr)), "i" (err));				\
+	(x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val;	\
+})
+
+#define __get_user(x, ptr)						\
+({									\
+	int __gu_err = 0;						\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+		__get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT);	\
+		break;							\
+	case 2:								\
+		__get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT);	\
+		break;							\
+	case 4:								\
+		__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);	\
+		break;							\
+	case 8: {							\
+		const void __user *__gu_ptr = (ptr);			\
+		union {							\
+			u64 l;						\
+			__typeof__(*(ptr)) t;				\
+		} __gu_val;						\
+		asm volatile ("\n"					\
+			"1:	"MOVES".l	(%2)+,%1\n"		\
+			"2:	"MOVES".l	(%2),%R1\n"		\
+			"3:\n"						\
+			"	.section .fixup,\"ax\"\n"		\
+			"	.even\n"				\
+			"10:	move.l	%3,%0\n"			\
+			"	sub.l	%1,%1\n"			\
+			"	sub.l	%R1,%R1\n"			\
+			"	jra	3b\n"				\
+			"	.previous\n"				\
+			"\n"						\
+			"	.section __ex_table,\"a\"\n"		\
+			"	.align	4\n"				\
+			"	.long	1b,10b\n"			\
+			"	.long	2b,10b\n"			\
+			"	.previous"				\
+			: "+d" (__gu_err), "=&r" (__gu_val.l),		\
+			  "+a" (__gu_ptr)				\
+			: "i" (-EFAULT)					\
+			: "memory");					\
+		(x) = __gu_val.t;					\
+		break;							\
+	}								\
+	default:							\
+		__gu_err = __get_user_bad();				\
+		break;							\
+	}								\
+	__gu_err;							\
+})
+#define get_user(x, ptr) __get_user(x, ptr)
+
+unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n);
+unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+#define __suffix0
+#define __suffix1 b
+#define __suffix2 w
+#define __suffix4 l
+
+#define ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
+	asm volatile ("\n"						\
+		"1:	"MOVES"."#s1"	(%2)+,%3\n"			\
+		"	move."#s1"	%3,(%1)+\n"			\
+		"	.ifnc	\""#s2"\",\"\"\n"			\
+		"2:	"MOVES"."#s2"	(%2)+,%3\n"			\
+		"	move."#s2"	%3,(%1)+\n"			\
+		"	.ifnc	\""#s3"\",\"\"\n"			\
+		"3:	"MOVES"."#s3"	(%2)+,%3\n"			\
+		"	move."#s3"	%3,(%1)+\n"			\
+		"	.endif\n"					\
+		"	.endif\n"					\
+		"4:\n"							\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	4\n"					\
+		"	.long	1b,10f\n"				\
+		"	.ifnc	\""#s2"\",\"\"\n"			\
+		"	.long	2b,20f\n"				\
+		"	.ifnc	\""#s3"\",\"\"\n"			\
+		"	.long	3b,30f\n"				\
+		"	.endif\n"					\
+		"	.endif\n"					\
+		"	.previous\n"					\
+		"\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.even\n"					\
+		"10:	addq.l #"#n1",%0\n"				\
+		"	.ifnc	\""#s2"\",\"\"\n"			\
+		"20:	addq.l #"#n2",%0\n"				\
+		"	.ifnc	\""#s3"\",\"\"\n"			\
+		"30:	addq.l #"#n3",%0\n"				\
+		"	.endif\n"					\
+		"	.endif\n"					\
+		"	jra	4b\n"					\
+		"	.previous\n"					\
+		: "+d" (res), "+&a" (to), "+a" (from), "=&d" (tmp)	\
+		: : "memory")
+
+#define ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
+	____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)
+#define __constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3)	\
+	___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3,  \
+					__suffix##n1, __suffix##n2, __suffix##n3)
+
+static __always_inline unsigned long
+__constant_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long res = 0, tmp;
+
+	switch (n) {
+	case 1:
+		__constant_copy_from_user_asm(res, to, from, tmp, 1, 0, 0);
+		break;
+	case 2:
+		__constant_copy_from_user_asm(res, to, from, tmp, 2, 0, 0);
+		break;
+	case 3:
+		__constant_copy_from_user_asm(res, to, from, tmp, 2, 1, 0);
+		break;
+	case 4:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 0, 0);
+		break;
+	case 5:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 1, 0);
+		break;
+	case 6:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 0);
+		break;
+	case 7:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 1);
+		break;
+	case 8:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 0);
+		break;
+	case 9:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 1);
+		break;
+	case 10:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 2);
+		break;
+	case 12:
+		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 4);
+		break;
+	default:
+		/* we limit the inlined version to 3 moves */
+		return __generic_copy_from_user(to, from, n);
+	}
+
+	return res;
+}
+
+#define __constant_copy_to_user_asm(res, to, from, tmp, n, s1, s2, s3)	\
+	asm volatile ("\n"						\
+		"	move."#s1"	(%2)+,%3\n"			\
+		"11:	"MOVES"."#s1"	%3,(%1)+\n"			\
+		"12:	move."#s2"	(%2)+,%3\n"			\
+		"21:	"MOVES"."#s2"	%3,(%1)+\n"			\
+		"22:\n"							\
+		"	.ifnc	\""#s3"\",\"\"\n"			\
+		"	move."#s3"	(%2)+,%3\n"			\
+		"31:	"MOVES"."#s3"	%3,(%1)+\n"			\
+		"32:\n"							\
+		"	.endif\n"					\
+		"4:\n"							\
+		"\n"							\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	4\n"					\
+		"	.long	11b,5f\n"				\
+		"	.long	12b,5f\n"				\
+		"	.long	21b,5f\n"				\
+		"	.long	22b,5f\n"				\
+		"	.ifnc	\""#s3"\",\"\"\n"			\
+		"	.long	31b,5f\n"				\
+		"	.long	32b,5f\n"				\
+		"	.endif\n"					\
+		"	.previous\n"					\
+		"\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.even\n"					\
+		"5:	moveq.l	#"#n",%0\n"				\
+		"	jra	4b\n"					\
+		"	.previous\n"					\
+		: "+d" (res), "+a" (to), "+a" (from), "=&d" (tmp)	\
+		: : "memory")
+
+static __always_inline unsigned long
+__constant_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	unsigned long res = 0, tmp;
+
+	switch (n) {
+	case 1:
+		__put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
+		break;
+	case 2:
+		__put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
+		break;
+	case 3:
+		__constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
+		break;
+	case 4:
+		__put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
+		break;
+	case 5:
+		__constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
+		break;
+	case 6:
+		__constant_copy_to_user_asm(res, to, from, tmp, 6, l, w,);
+		break;
+	case 7:
+		__constant_copy_to_user_asm(res, to, from, tmp, 7, l, w, b);
+		break;
+	case 8:
+		__constant_copy_to_user_asm(res, to, from, tmp, 8, l, l,);
+		break;
+	case 9:
+		__constant_copy_to_user_asm(res, to, from, tmp, 9, l, l, b);
+		break;
+	case 10:
+		__constant_copy_to_user_asm(res, to, from, tmp, 10, l, l, w);
+		break;
+	case 12:
+		__constant_copy_to_user_asm(res, to, from, tmp, 12, l, l, l);
+		break;
+	default:
+		/* limit the inlined version to 3 moves */
+		return __generic_copy_to_user(to, from, n);
+	}
+
+	return res;
+}
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (__builtin_constant_p(n))
+		return __constant_copy_from_user(to, from, n);
+	return __generic_copy_from_user(to, from, n);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (__builtin_constant_p(n))
+		return __constant_copy_to_user(to, from, n);
+	return __generic_copy_to_user(to, from, n);
+}
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+
+#define user_addr_max() \
+	(uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+unsigned long __clear_user(void __user *to, unsigned long n);
+
+#define clear_user	__clear_user
+
+#else /* !CONFIG_MMU */
+#include <asm-generic/uaccess.h>
+#endif
+
+#endif /* _M68K_UACCESS_H */
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
deleted file mode 100644
index 9ae9f8d..0000000
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __M68K_UACCESS_H
-#define __M68K_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/segment.h>
-
-/* We let the MMU do all checking */
-static inline int access_ok(const void __user *addr,
-			    unsigned long size)
-{
-	return 1;
-}
-
-/*
- * Not all varients of the 68k family support the notion of address spaces.
- * The traditional 680x0 parts do, and they use the sfc/dfc registers and
- * the "moves" instruction to access user space from kernel space. Other
- * family members like ColdFire don't support this, and only have a single
- * address space, and use the usual "move" instruction for user space access.
- *
- * Outside of this difference the user space access functions are the same.
- * So lets keep the code simple and just define in what we need to use.
- */
-#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
-#define	MOVES	"moves"
-#else
-#define	MOVES	"move"
-#endif
-
-extern int __put_user_bad(void);
-extern int __get_user_bad(void);
-
-#define __put_user_asm(res, x, ptr, bwl, reg, err)	\
-asm volatile ("\n"					\
-	"1:	"MOVES"."#bwl"	%2,%1\n"		\
-	"2:\n"						\
-	"	.section .fixup,\"ax\"\n"		\
-	"	.even\n"				\
-	"10:	moveq.l	%3,%0\n"			\
-	"	jra 2b\n"				\
-	"	.previous\n"				\
-	"\n"						\
-	"	.section __ex_table,\"a\"\n"		\
-	"	.align	4\n"				\
-	"	.long	1b,10b\n"			\
-	"	.long	2b,10b\n"			\
-	"	.previous"				\
-	: "+d" (res), "=m" (*(ptr))			\
-	: #reg (x), "i" (err))
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
-
-#define __put_user(x, ptr)						\
-({									\
-	typeof(*(ptr)) __pu_val = (x);					\
-	int __pu_err = 0;						\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof (*(ptr))) {					\
-	case 1:								\
-		__put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT);	\
-		break;							\
-	case 2:								\
-		__put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT);	\
-		break;							\
-	case 4:								\
-		__put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT);	\
-		break;							\
-	case 8:								\
- 	    {								\
- 		const void __user *__pu_ptr = (ptr);			\
-		asm volatile ("\n"					\
-			"1:	"MOVES".l	%2,(%1)+\n"		\
-			"2:	"MOVES".l	%R2,(%1)\n"		\
-			"3:\n"						\
-			"	.section .fixup,\"ax\"\n"		\
-			"	.even\n"				\
-			"10:	movel %3,%0\n"				\
-			"	jra 3b\n"				\
-			"	.previous\n"				\
-			"\n"						\
-			"	.section __ex_table,\"a\"\n"		\
-			"	.align 4\n"				\
-			"	.long 1b,10b\n"				\
-			"	.long 2b,10b\n"				\
-			"	.long 3b,10b\n"				\
-			"	.previous"				\
-			: "+d" (__pu_err), "+a" (__pu_ptr)		\
-			: "r" (__pu_val), "i" (-EFAULT)			\
-			: "memory");					\
-		break;							\
-	    }								\
-	default:							\
-		__pu_err = __put_user_bad();				\
-		break;							\
-	}								\
-	__pu_err;							\
-})
-#define put_user(x, ptr)	__put_user(x, ptr)
-
-
-#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({		\
-	type __gu_val;							\
-	asm volatile ("\n"						\
-		"1:	"MOVES"."#bwl"	%2,%1\n"			\
-		"2:\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.even\n"					\
-		"10:	move.l	%3,%0\n"				\
-		"	sub.l	%1,%1\n"				\
-		"	jra	2b\n"					\
-		"	.previous\n"					\
-		"\n"							\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	4\n"					\
-		"	.long	1b,10b\n"				\
-		"	.previous"					\
-		: "+d" (res), "=&" #reg (__gu_val)			\
-		: "m" (*(ptr)), "i" (err));				\
-	(x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val;	\
-})
-
-#define __get_user(x, ptr)						\
-({									\
-	int __gu_err = 0;						\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		__get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT);	\
-		break;							\
-	case 2:								\
-		__get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT);	\
-		break;							\
-	case 4:								\
-		__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);	\
-		break;							\
-	case 8: {							\
-		const void __user *__gu_ptr = (ptr);			\
-		union {							\
-			u64 l;						\
-			__typeof__(*(ptr)) t;				\
-		} __gu_val;						\
-		asm volatile ("\n"					\
-			"1:	"MOVES".l	(%2)+,%1\n"		\
-			"2:	"MOVES".l	(%2),%R1\n"		\
-			"3:\n"						\
-			"	.section .fixup,\"ax\"\n"		\
-			"	.even\n"				\
-			"10:	move.l	%3,%0\n"			\
-			"	sub.l	%1,%1\n"			\
-			"	sub.l	%R1,%R1\n"			\
-			"	jra	3b\n"				\
-			"	.previous\n"				\
-			"\n"						\
-			"	.section __ex_table,\"a\"\n"		\
-			"	.align	4\n"				\
-			"	.long	1b,10b\n"			\
-			"	.long	2b,10b\n"			\
-			"	.previous"				\
-			: "+d" (__gu_err), "=&r" (__gu_val.l),		\
-			  "+a" (__gu_ptr)				\
-			: "i" (-EFAULT)					\
-			: "memory");					\
-		(x) = __gu_val.t;					\
-		break;							\
-	}								\
-	default:							\
-		__gu_err = __get_user_bad();				\
-		break;							\
-	}								\
-	__gu_err;							\
-})
-#define get_user(x, ptr) __get_user(x, ptr)
-
-unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n);
-unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n);
-
-#define __suffix0
-#define __suffix1 b
-#define __suffix2 w
-#define __suffix4 l
-
-#define ____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
-	asm volatile ("\n"						\
-		"1:	"MOVES"."#s1"	(%2)+,%3\n"			\
-		"	move."#s1"	%3,(%1)+\n"			\
-		"	.ifnc	\""#s2"\",\"\"\n"			\
-		"2:	"MOVES"."#s2"	(%2)+,%3\n"			\
-		"	move."#s2"	%3,(%1)+\n"			\
-		"	.ifnc	\""#s3"\",\"\"\n"			\
-		"3:	"MOVES"."#s3"	(%2)+,%3\n"			\
-		"	move."#s3"	%3,(%1)+\n"			\
-		"	.endif\n"					\
-		"	.endif\n"					\
-		"4:\n"							\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	4\n"					\
-		"	.long	1b,10f\n"				\
-		"	.ifnc	\""#s2"\",\"\"\n"			\
-		"	.long	2b,20f\n"				\
-		"	.ifnc	\""#s3"\",\"\"\n"			\
-		"	.long	3b,30f\n"				\
-		"	.endif\n"					\
-		"	.endif\n"					\
-		"	.previous\n"					\
-		"\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.even\n"					\
-		"10:	addq.l #"#n1",%0\n"				\
-		"	.ifnc	\""#s2"\",\"\"\n"			\
-		"20:	addq.l #"#n2",%0\n"				\
-		"	.ifnc	\""#s3"\",\"\"\n"			\
-		"30:	addq.l #"#n3",%0\n"				\
-		"	.endif\n"					\
-		"	.endif\n"					\
-		"	jra	4b\n"					\
-		"	.previous\n"					\
-		: "+d" (res), "+&a" (to), "+a" (from), "=&d" (tmp)	\
-		: : "memory")
-
-#define ___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)\
-	____constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3, s1, s2, s3)
-#define __constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3)	\
-	___constant_copy_from_user_asm(res, to, from, tmp, n1, n2, n3,  \
-					__suffix##n1, __suffix##n2, __suffix##n3)
-
-static __always_inline unsigned long
-__constant_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long res = 0, tmp;
-
-	switch (n) {
-	case 1:
-		__constant_copy_from_user_asm(res, to, from, tmp, 1, 0, 0);
-		break;
-	case 2:
-		__constant_copy_from_user_asm(res, to, from, tmp, 2, 0, 0);
-		break;
-	case 3:
-		__constant_copy_from_user_asm(res, to, from, tmp, 2, 1, 0);
-		break;
-	case 4:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 0, 0);
-		break;
-	case 5:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 1, 0);
-		break;
-	case 6:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 0);
-		break;
-	case 7:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 2, 1);
-		break;
-	case 8:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 0);
-		break;
-	case 9:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 1);
-		break;
-	case 10:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 2);
-		break;
-	case 12:
-		__constant_copy_from_user_asm(res, to, from, tmp, 4, 4, 4);
-		break;
-	default:
-		/* we limit the inlined version to 3 moves */
-		return __generic_copy_from_user(to, from, n);
-	}
-
-	return res;
-}
-
-#define __constant_copy_to_user_asm(res, to, from, tmp, n, s1, s2, s3)	\
-	asm volatile ("\n"						\
-		"	move."#s1"	(%2)+,%3\n"			\
-		"11:	"MOVES"."#s1"	%3,(%1)+\n"			\
-		"12:	move."#s2"	(%2)+,%3\n"			\
-		"21:	"MOVES"."#s2"	%3,(%1)+\n"			\
-		"22:\n"							\
-		"	.ifnc	\""#s3"\",\"\"\n"			\
-		"	move."#s3"	(%2)+,%3\n"			\
-		"31:	"MOVES"."#s3"	%3,(%1)+\n"			\
-		"32:\n"							\
-		"	.endif\n"					\
-		"4:\n"							\
-		"\n"							\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	4\n"					\
-		"	.long	11b,5f\n"				\
-		"	.long	12b,5f\n"				\
-		"	.long	21b,5f\n"				\
-		"	.long	22b,5f\n"				\
-		"	.ifnc	\""#s3"\",\"\"\n"			\
-		"	.long	31b,5f\n"				\
-		"	.long	32b,5f\n"				\
-		"	.endif\n"					\
-		"	.previous\n"					\
-		"\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.even\n"					\
-		"5:	moveq.l	#"#n",%0\n"				\
-		"	jra	4b\n"					\
-		"	.previous\n"					\
-		: "+d" (res), "+a" (to), "+a" (from), "=&d" (tmp)	\
-		: : "memory")
-
-static __always_inline unsigned long
-__constant_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	unsigned long res = 0, tmp;
-
-	switch (n) {
-	case 1:
-		__put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
-		break;
-	case 2:
-		__put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
-		break;
-	case 3:
-		__constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
-		break;
-	case 4:
-		__put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
-		break;
-	case 5:
-		__constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
-		break;
-	case 6:
-		__constant_copy_to_user_asm(res, to, from, tmp, 6, l, w,);
-		break;
-	case 7:
-		__constant_copy_to_user_asm(res, to, from, tmp, 7, l, w, b);
-		break;
-	case 8:
-		__constant_copy_to_user_asm(res, to, from, tmp, 8, l, l,);
-		break;
-	case 9:
-		__constant_copy_to_user_asm(res, to, from, tmp, 9, l, l, b);
-		break;
-	case 10:
-		__constant_copy_to_user_asm(res, to, from, tmp, 10, l, l, w);
-		break;
-	case 12:
-		__constant_copy_to_user_asm(res, to, from, tmp, 12, l, l, l);
-		break;
-	default:
-		/* limit the inlined version to 3 moves */
-		return __generic_copy_to_user(to, from, n);
-	}
-
-	return res;
-}
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	if (__builtin_constant_p(n))
-		return __constant_copy_from_user(to, from, n);
-	return __generic_copy_from_user(to, from, n);
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (__builtin_constant_p(n))
-		return __constant_copy_to_user(to, from, n);
-	return __generic_copy_to_user(to, from, n);
-}
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#define user_addr_max() \
-	(uaccess_kernel() ? ~0UL : TASK_SIZE)
-
-extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern __must_check long strnlen_user(const char __user *str, long n);
-
-unsigned long __clear_user(void __user *to, unsigned long n);
-
-#define clear_user	__clear_user
-
-#endif /* _M68K_UACCESS_H */
diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h
deleted file mode 100644
index dcfb693..0000000
--- a/arch/m68k/include/asm/uaccess_no.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __M68KNOMMU_UACCESS_H
-#define __M68KNOMMU_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/string.h>
-
-#include <asm/segment.h>
-
-#define access_ok(addr,size)	_access_ok((unsigned long)(addr),(size))
-
-/*
- * It is not enough to just have access_ok check for a real RAM address.
- * This would disallow the case of code/ro-data running XIP in flash/rom.
- * Ideally we would check the possible flash ranges too, but that is
- * currently not so easy.
- */
-static inline int _access_ok(unsigned long addr, unsigned long size)
-{
-	return 1;
-}
-
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
-
-#define put_user(x, ptr)				\
-({							\
-    int __pu_err = 0;					\
-    typeof(*(ptr)) __pu_val = (x);			\
-    switch (sizeof (*(ptr))) {				\
-    case 1:						\
-	__put_user_asm(__pu_err, __pu_val, ptr, b);	\
-	break;						\
-    case 2:						\
-	__put_user_asm(__pu_err, __pu_val, ptr, w);	\
-	break;						\
-    case 4:						\
-	__put_user_asm(__pu_err, __pu_val, ptr, l);	\
-	break;						\
-    case 8:						\
-	memcpy((void __force *)ptr, &__pu_val, sizeof(*(ptr))); \
-	break;						\
-    default:						\
-	__pu_err = __put_user_bad();			\
-	break;						\
-    }							\
-    __pu_err;						\
-})
-#define __put_user(x, ptr) put_user(x, ptr)
-
-extern int __put_user_bad(void);
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-
-#define __ptr(x) ((unsigned long __user *)(x))
-
-#define __put_user_asm(err,x,ptr,bwl)				\
-	__asm__ ("move" #bwl " %0,%1"				\
-		: /* no outputs */						\
-		:"d" (x),"m" (*__ptr(ptr)) : "memory")
-
-#define get_user(x, ptr)					\
-({								\
-    int __gu_err = 0;						\
-    switch (sizeof(*(ptr))) {					\
-    case 1:							\
-	__get_user_asm(__gu_err, x, ptr, b, "=d");		\
-	break;							\
-    case 2:							\
-	__get_user_asm(__gu_err, x, ptr, w, "=r");		\
-	break;							\
-    case 4:							\
-	__get_user_asm(__gu_err, x, ptr, l, "=r");		\
-	break;							\
-    case 8: {							\
-	union {							\
-	    u64 l;						\
-	    __typeof__(*(ptr)) t;				\
-	} __gu_val;						\
-	memcpy(&__gu_val.l, (const void __force *)ptr, sizeof(__gu_val.l)); \
-	(x) = __gu_val.t;					\
-	break;							\
-    }								\
-    default:							\
-	__gu_err = __get_user_bad();				\
-	break;							\
-    }								\
-    __gu_err;							\
-})
-#define __get_user(x, ptr) get_user(x, ptr)
-
-extern int __get_user_bad(void);
-
-#define __get_user_asm(err,x,ptr,bwl,reg)			\
-	__asm__ ("move" #bwl " %1,%0"				\
-		 : "=d" (x)					\
-		 : "m" (*__ptr(ptr)))
-
-static inline unsigned long
-raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	memcpy(to, (__force const void *)from, n);
-	return 0;
-}
-
-static inline unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	memcpy((__force void *)to, from, n);
-	return 0;
-}
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-static inline long
-strncpy_from_user(char *dst, const char *src, long count)
-{
-	char *tmp;
-	strncpy(dst, src, count);
-	for (tmp = dst; *tmp && count > 0; tmp++, count--)
-		;
-	return(tmp - dst); /* DAVIDM should we count a NUL ?  check getname */
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-static inline long strnlen_user(const char *src, long n)
-{
-	return(strlen(src) + 1); /* DAVIDM make safer */
-}
-
-/*
- * Zero Userspace
- */
-
-static inline unsigned long
-__clear_user(void *to, unsigned long n)
-{
-	memset(to, 0, n);
-	return 0;
-}
-
-#define	clear_user(to,n)	__clear_user(to,n)
-
-#endif /* _M68KNOMMU_UACCESS_H */
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a98fca9..a85f59b 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -920,7 +920,8 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 	err |= __put_user(0x70004e40 + (__NR_sigreturn << 16),
 			  (long __user *)(frame->retcode));
 #else
-	err |= __put_user((void *) ret_from_user_signal, &frame->pretcode);
+	err |= __put_user((long) ret_from_user_signal,
+			  (long __user *) &frame->pretcode);
 #endif
 
 	if (err)
@@ -1004,7 +1005,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	err |= __put_user(0x4e40, (short __user *)(frame->retcode + 4));
 #endif
 #else
-	err |= __put_user((void *) ret_from_user_rt_signal, &frame->pretcode);
+	err |= __put_user((long) ret_from_user_rt_signal,
+			  (long __user *) &frame->pretcode);
 #endif /* CONFIG_MMU */
 
 	if (err)
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 81fc799..625fb6d 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -439,3 +439,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index b4e2639..aae729c 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -445,3 +445,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cf72a02..32817c9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -378,3 +378,4 @@
 437	n32	openat2				sys_openat2
 438	n32	pidfd_getfd			sys_pidfd_getfd
 439	n32	faccessat2			sys_faccessat2
+440	n32	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 557f995..9e4ea3c 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -354,3 +354,4 @@
 437	n64	openat2				sys_openat2
 438	n64	pidfd_getfd			sys_pidfd_getfd
 439	n64	faccessat2			sys_faccessat2
+440	n64	process_madvise			sys_process_madvise
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a17aab5..29f5f28 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -427,3 +427,4 @@
 437	o32	openat2				sys_openat2
 438	o32	pidfd_getfd			sys_pidfd_getfd
 439	o32	faccessat2			sys_faccessat2
+440	o32	process_madvise			sys_process_madvise
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index ae3dab3..38c63e5 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 9d7fb4c..1275dae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -529,3 +529,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b7821ac..483fc55 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -38,6 +38,7 @@
 	select GENERIC_ARCH_TOPOLOGY if SMP
 	select GENERIC_ATOMIC64 if !64BIT
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_EARLY_IOREMAP
 	select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
 	select GENERIC_IOREMAP
 	select GENERIC_IRQ_MULTI_HANDLER
@@ -388,6 +389,28 @@
 
 endchoice
 
+config EFI_STUB
+	bool
+
+config EFI
+	bool "UEFI runtime support"
+	depends on OF
+	select LIBFDT
+	select UCS2_STRING
+	select EFI_PARAMS_FROM_FDT
+	select EFI_STUB
+	select EFI_GENERIC_STUB
+	select EFI_RUNTIME_WRAPPERS
+	select RISCV_ISA_C
+	depends on MMU
+	default y
+	help
+	  This option provides support for runtime services provided
+	  by UEFI firmware (such as non-volatile variables, realtime
+	  clock, and platform reset). A UEFI stub is also provided to
+	  allow the kernel to be booted as an EFI application. This
+	  is only useful on systems that have UEFI firmware.
+
 endmenu
 
 config BUILTIN_DTB
@@ -400,3 +423,5 @@
 source "kernel/power/Kconfig"
 
 endmenu
+
+source "drivers/firmware/Kconfig"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index fb6e37d..10df59f 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -80,6 +80,7 @@
 core-y += arch/riscv/
 
 libs-y += arch/riscv/lib/
+libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 PHONY += vdso_install
 vdso_install:
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d58c93e..d222d35 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -130,3 +130,4 @@
 # CONFIG_RUNTIME_TESTING_MENU is not set
 CONFIG_MEMTEST=y
 # CONFIG_SYSFS_SYSCALL is not set
+CONFIG_EFI=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 3d9410b..59dd7be 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
 generic-y += kvm_para.h
diff --git a/arch/riscv/include/asm/cacheinfo.h b/arch/riscv/include/asm/cacheinfo.h
index 5d9662e..d1a3652 100644
--- a/arch/riscv/include/asm/cacheinfo.h
+++ b/arch/riscv/include/asm/cacheinfo.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SiFive
+ */
 
 #ifndef _ASM_RISCV_CACHEINFO_H
 #define _ASM_RISCV_CACHEINFO_H
@@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops {
 };
 
 void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops);
+uintptr_t get_cache_size(u32 level, enum cache_type type);
+uintptr_t get_cache_geometry(u32 level, enum cache_type type);
 
 #endif /* _ASM_RISCV_CACHEINFO_H */
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
new file mode 100644
index 0000000..7542282
--- /dev/null
+++ b/arch/riscv/include/asm/efi.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/csr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/ptrace.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+#else
+#define efi_init()
+#endif
+
+int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+#define arch_efi_call_virt_setup()      efi_virtmap_load()
+#define arch_efi_call_virt_teardown()   efi_virtmap_unload()
+
+#define arch_efi_call_virt(p, f, args...) p->f(args)
+
+#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
+
+/* on RISC-V, the FDT may be located anywhere in system RAM */
+static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
+{
+	return ULONG_MAX;
+}
+
+/* Load initrd at enough distance from DRAM start */
+static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
+{
+	return image_addr + SZ_256M;
+}
+
+#define alloc_screen_info(x...)		(&screen_info)
+
+static inline void free_screen_info(struct screen_info *si)
+{
+}
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index d83a4ef..5c725e1 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -11,6 +11,7 @@
 #include <uapi/asm/elf.h>
 #include <asm/auxvec.h>
 #include <asm/byteorder.h>
+#include <asm/cacheinfo.h>
 
 /*
  * These are used to set parameters in the core dumps.
@@ -61,6 +62,18 @@ extern unsigned long elf_hwcap;
 do {								\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 		(elf_addr_t)current->mm->context.vdso);		\
+	NEW_AUX_ENT(AT_L1I_CACHESIZE,				\
+		get_cache_size(1, CACHE_TYPE_INST));		\
+	NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY,			\
+		get_cache_geometry(1, CACHE_TYPE_INST));	\
+	NEW_AUX_ENT(AT_L1D_CACHESIZE,				\
+		get_cache_size(1, CACHE_TYPE_DATA));		\
+	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY,			\
+		get_cache_geometry(1, CACHE_TYPE_DATA));	\
+	NEW_AUX_ENT(AT_L2_CACHESIZE,				\
+		get_cache_size(2, CACHE_TYPE_UNIFIED));		\
+	NEW_AUX_ENT(AT_L2_CACHEGEOMETRY,			\
+		get_cache_geometry(2, CACHE_TYPE_UNIFIED));	\
 } while (0)
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
 struct linux_binprm;
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 1ff075a..54cbf07 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -22,14 +22,24 @@
  */
 enum fixed_addresses {
 	FIX_HOLE,
-#define FIX_FDT_SIZE	SZ_1M
-	FIX_FDT_END,
-	FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
 	FIX_PTE,
 	FIX_PMD,
 	FIX_TEXT_POKE1,
 	FIX_TEXT_POKE0,
 	FIX_EARLYCON_MEM_BASE,
+
+	__end_of_permanent_fixed_addresses,
+	/*
+	 * Temporary boot-time mappings, used by early_ioremap(),
+	 * before ioremap() is functional.
+	 */
+#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS	7
+#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 3835c32..c025a74 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/pgtable.h>
 #include <asm/mmiowb.h>
+#include <asm/early_ioremap.h>
 
 /*
  * MMIO access functions are separated out to break dependency cycles
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 967eacb..dabcf2c 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -20,6 +20,8 @@ typedef struct {
 #endif
 } mm_context_t;
 
+void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
+			       phys_addr_t sz, pgprot_t prot);
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_MMU_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index eaea1f7..183f1f4 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -100,6 +100,10 @@
 
 #define PAGE_KERNEL		__pgprot(_PAGE_KERNEL)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
+#define PAGE_KERNEL_READ	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL | _PAGE_EXEC)
+#define PAGE_KERNEL_READ_EXEC	__pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
+					 | _PAGE_EXEC)
 
 #define PAGE_TABLE		__pgprot(_PAGE_TABLE)
 
@@ -464,6 +468,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
 #define kern_addr_valid(addr)   (1) /* FIXME */
 
 extern void *dtb_early_va;
+extern uintptr_t dtb_early_pa;
 void setup_bootmem(void);
 void paging_init(void);
 
diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
new file mode 100644
index 0000000..3a9971b
--- /dev/null
+++ b/arch/riscv/include/asm/sections.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _start[];
+extern char _start_kernel[];
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index d86cb17..32c73ba 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -10,4 +10,28 @@
 /* vDSO location */
 #define AT_SYSINFO_EHDR 33
 
+/*
+ * The set of entries below represent more extensive information
+ * about the caches, in the form of two entry per cache type,
+ * one entry containing the cache size in bytes, and the other
+ * containing the cache line size in bytes in the bottom 16 bits
+ * and the cache associativity in the next 16 bits.
+ *
+ * The associativity is such that if N is the 16-bit value, the
+ * cache is N way set associative. A value if 0xffff means fully
+ * associative, a value of 1 means directly mapped.
+ *
+ * For all these fields, a value of 0 means that the information
+ * is not known.
+ */
+#define AT_L1I_CACHESIZE	40
+#define AT_L1I_CACHEGEOMETRY	41
+#define AT_L1D_CACHESIZE	42
+#define AT_L1D_CACHEGEOMETRY	43
+#define AT_L2_CACHESIZE		44
+#define AT_L2_CACHEGEOMETRY	45
+
+/* entries in ARCH_DLINFO */
+#define AT_VECTOR_SIZE_ARCH	7
+
 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index dc93710..fa896c5 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -55,4 +55,6 @@
 
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
+obj-$(CONFIG_EFI)		+= efi.o
+
 clean:
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index bd0f122..de59dd4 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -3,7 +3,6 @@
  * Copyright (C) 2017 SiFive
  */
 
-#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -25,12 +24,84 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
 	return NULL;
 }
 
-static void ci_leaf_init(struct cacheinfo *this_leaf,
-			 struct device_node *node,
-			 enum cache_type type, unsigned int level)
+static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id());
+	struct cacheinfo *this_leaf;
+	int index;
+
+	for (index = 0; index < this_cpu_ci->num_leaves; index++) {
+		this_leaf = this_cpu_ci->info_list + index;
+		if (this_leaf->level == level && this_leaf->type == type)
+			return this_leaf;
+	}
+
+	return NULL;
+}
+
+uintptr_t get_cache_size(u32 level, enum cache_type type)
+{
+	struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+	return this_leaf ? this_leaf->size : 0;
+}
+
+uintptr_t get_cache_geometry(u32 level, enum cache_type type)
+{
+	struct cacheinfo *this_leaf = get_cacheinfo(level, type);
+
+	return this_leaf ? (this_leaf->ways_of_associativity << 16 |
+			    this_leaf->coherency_line_size) :
+			   0;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
+			 unsigned int level, unsigned int size,
+			 unsigned int sets, unsigned int line_size)
 {
 	this_leaf->level = level;
 	this_leaf->type = type;
+	this_leaf->size = size;
+	this_leaf->number_of_sets = sets;
+	this_leaf->coherency_line_size = line_size;
+
+	/*
+	 * If the cache is fully associative, there is no need to
+	 * check the other properties.
+	 */
+	if (sets == 1)
+		return;
+
+	/*
+	 * Set the ways number for n-ways associative, make sure
+	 * all properties are big than zero.
+	 */
+	if (sets > 0 && size > 0 && line_size > 0)
+		this_leaf->ways_of_associativity = (size / sets) / line_size;
+}
+
+static void fill_cacheinfo(struct cacheinfo **this_leaf,
+			   struct device_node *node, unsigned int level)
+{
+	unsigned int size, sets, line_size;
+
+	if (!of_property_read_u32(node, "cache-size", &size) &&
+	    !of_property_read_u32(node, "cache-block-size", &line_size) &&
+	    !of_property_read_u32(node, "cache-sets", &sets)) {
+		ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size);
+	}
+
+	if (!of_property_read_u32(node, "i-cache-size", &size) &&
+	    !of_property_read_u32(node, "i-cache-sets", &sets) &&
+	    !of_property_read_u32(node, "i-cache-block-size", &line_size)) {
+		ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size);
+	}
+
+	if (!of_property_read_u32(node, "d-cache-size", &size) &&
+	    !of_property_read_u32(node, "d-cache-sets", &sets) &&
+	    !of_property_read_u32(node, "d-cache-block-size", &line_size)) {
+		ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size);
+	}
 }
 
 static int __init_cache_level(unsigned int cpu)
@@ -83,29 +154,24 @@ static int __populate_cache_leaves(unsigned int cpu)
 	struct device_node *prev = NULL;
 	int levels = 1, level = 1;
 
-	if (of_property_read_bool(np, "cache-size"))
-		ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
-	if (of_property_read_bool(np, "i-cache-size"))
-		ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
-	if (of_property_read_bool(np, "d-cache-size"))
-		ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+	/* Level 1 caches in cpu node */
+	fill_cacheinfo(&this_leaf, np, level);
 
+	/* Next level caches in cache nodes */
 	prev = np;
 	while ((np = of_find_next_cache_node(np))) {
 		of_node_put(prev);
 		prev = np;
+
 		if (!of_device_is_compatible(np, "cache"))
 			break;
 		if (of_property_read_u32(np, "cache-level", &level))
 			break;
 		if (level <= levels)
 			break;
-		if (of_property_read_bool(np, "cache-size"))
-			ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
-		if (of_property_read_bool(np, "i-cache-size"))
-			ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
-		if (of_property_read_bool(np, "d-cache-size"))
-			ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+
+		fill_cacheinfo(&this_leaf, np, level);
+
 		levels = level;
 	}
 	of_node_put(np);
diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
new file mode 100644
index 0000000..8e733aa
--- /dev/null
+++ b/arch/riscv/kernel/efi-header.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi-header.S
+ */
+
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
+	.macro	__EFI_PE_HEADER
+	.long	PE_MAGIC
+coff_header:
+#ifdef CONFIG_64BIT
+	.short	IMAGE_FILE_MACHINE_RISCV64		// Machine
+#else
+	.short	IMAGE_FILE_MACHINE_RISCV32		// Machine
+#endif
+	.short	section_count				// NumberOfSections
+	.long	0 					// TimeDateStamp
+	.long	0					// PointerToSymbolTable
+	.long	0					// NumberOfSymbols
+	.short	section_table - optional_header		// SizeOfOptionalHeader
+	.short	IMAGE_FILE_DEBUG_STRIPPED | \
+		IMAGE_FILE_EXECUTABLE_IMAGE | \
+		IMAGE_FILE_LINE_NUMS_STRIPPED		// Characteristics
+
+optional_header:
+#ifdef CONFIG_64BIT
+	.short	PE_OPT_MAGIC_PE32PLUS			// PE32+ format
+#else
+	.short	PE_OPT_MAGIC_PE32			// PE32 format
+#endif
+	.byte	0x02					// MajorLinkerVersion
+	.byte	0x14					// MinorLinkerVersion
+	.long	__pecoff_text_end - efi_header_end	// SizeOfCode
+	.long	__pecoff_data_virt_size			// SizeOfInitializedData
+	.long	0					// SizeOfUninitializedData
+	.long	__efistub_efi_pe_entry - _start		// AddressOfEntryPoint
+	.long	efi_header_end - _start			// BaseOfCode
+#ifdef CONFIG_32BIT
+	.long  __pecoff_text_end - _start		// BaseOfData
+#endif
+
+extra_header_fields:
+	.quad	0					// ImageBase
+	.long	PECOFF_SECTION_ALIGNMENT		// SectionAlignment
+	.long	PECOFF_FILE_ALIGNMENT			// FileAlignment
+	.short	0					// MajorOperatingSystemVersion
+	.short	0					// MinorOperatingSystemVersion
+	.short	LINUX_EFISTUB_MAJOR_VERSION		// MajorImageVersion
+	.short	LINUX_EFISTUB_MINOR_VERSION		// MinorImageVersion
+	.short	0					// MajorSubsystemVersion
+	.short	0					// MinorSubsystemVersion
+	.long	0					// Win32VersionValue
+
+	.long	_end - _start				// SizeOfImage
+
+	// Everything before the kernel image is considered part of the header
+	.long	efi_header_end - _start			// SizeOfHeaders
+	.long	0					// CheckSum
+	.short	IMAGE_SUBSYSTEM_EFI_APPLICATION		// Subsystem
+	.short	0					// DllCharacteristics
+	.quad	0					// SizeOfStackReserve
+	.quad	0					// SizeOfStackCommit
+	.quad	0					// SizeOfHeapReserve
+	.quad	0					// SizeOfHeapCommit
+	.long	0					// LoaderFlags
+	.long	(section_table - .) / 8			// NumberOfRvaAndSizes
+
+	.quad	0					// ExportTable
+	.quad	0					// ImportTable
+	.quad	0					// ResourceTable
+	.quad	0					// ExceptionTable
+	.quad	0					// CertificationTable
+	.quad	0					// BaseRelocationTable
+
+	// Section table
+section_table:
+	.ascii	".text\0\0\0"
+	.long	__pecoff_text_end - efi_header_end	// VirtualSize
+	.long	efi_header_end - _start			// VirtualAddress
+	.long	__pecoff_text_end - efi_header_end	// SizeOfRawData
+	.long	efi_header_end - _start			// PointerToRawData
+
+	.long	0					// PointerToRelocations
+	.long	0					// PointerToLineNumbers
+	.short	0					// NumberOfRelocations
+	.short	0					// NumberOfLineNumbers
+	.long	IMAGE_SCN_CNT_CODE | \
+		IMAGE_SCN_MEM_READ | \
+		IMAGE_SCN_MEM_EXECUTE			// Characteristics
+
+	.ascii	".data\0\0\0"
+	.long	__pecoff_data_virt_size			// VirtualSize
+	.long	__pecoff_text_end - _start		// VirtualAddress
+	.long	__pecoff_data_raw_size			// SizeOfRawData
+	.long	__pecoff_text_end - _start		// PointerToRawData
+
+	.long	0					// PointerToRelocations
+	.long	0					// PointerToLineNumbers
+	.short	0					// NumberOfRelocations
+	.short	0					// NumberOfLineNumbers
+	.long	IMAGE_SCN_CNT_INITIALIZED_DATA | \
+		IMAGE_SCN_MEM_READ | \
+		IMAGE_SCN_MEM_WRITE			// Characteristics
+
+	.set	section_count, (. - section_table) / 40
+
+	.balign	0x1000
+efi_header_end:
+	.endm
diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
new file mode 100644
index 0000000..0241592
--- /dev/null
+++ b/arch/riscv/kernel/efi.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Adapted from arch/arm64/kernel/efi.c
+ */
+
+#include <linux/efi.h>
+#include <linux/init.h>
+
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-bits.h>
+
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md)
+{
+	u64 attr = md->attribute;
+	u32 type = md->type;
+
+	if (type == EFI_MEMORY_MAPPED_IO)
+		return PAGE_KERNEL;
+
+	/* R-- */
+	if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+	    (EFI_MEMORY_XP | EFI_MEMORY_RO))
+		return PAGE_KERNEL_READ;
+
+	/* R-X */
+	if (attr & EFI_MEMORY_RO)
+		return PAGE_KERNEL_READ_EXEC;
+
+	/* RW- */
+	if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
+	     EFI_MEMORY_XP) ||
+	    type != EFI_RUNTIME_SERVICES_CODE)
+		return PAGE_KERNEL;
+
+	/* RWX */
+	return PAGE_KERNEL_EXEC;
+}
+
+int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+{
+	pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) &
+				~(_PAGE_GLOBAL));
+	int i;
+
+	/* RISC-V maps one page at a time */
+	for (i = 0; i < md->num_pages; i++)
+		create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE,
+				   md->phys_addr + i * PAGE_SIZE,
+				   PAGE_SIZE, prot);
+	return 0;
+}
+
+static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
+{
+	efi_memory_desc_t *md = data;
+	pte_t pte = READ_ONCE(*ptep);
+	unsigned long val;
+
+	if (md->attribute & EFI_MEMORY_RO) {
+		val = pte_val(pte) & ~_PAGE_WRITE;
+		val = pte_val(pte) | _PAGE_READ;
+		pte = __pte(val);
+	}
+	if (md->attribute & EFI_MEMORY_XP) {
+		val = pte_val(pte) & ~_PAGE_EXEC;
+		pte = __pte(val);
+	}
+	set_pte(ptep, pte);
+
+	return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+				       efi_memory_desc_t *md)
+{
+	BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+	       md->type != EFI_RUNTIME_SERVICES_DATA);
+
+	/*
+	 * Calling apply_to_page_range() is only safe on regions that are
+	 * guaranteed to be mapped down to pages. Since we are only called
+	 * for regions that have been mapped using efi_create_mapping() above
+	 * (and this is checked by the generic Memory Attributes table parsing
+	 * routines), there is no need to check that again here.
+	 */
+	return apply_to_page_range(mm, md->virt_addr,
+				   md->num_pages << EFI_PAGE_SHIFT,
+				   set_permissions, md);
+}
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 0a4e81b..11e2a4f 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -3,7 +3,6 @@
  * Copyright (C) 2012 Regents of the University of California
  */
 
-#include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm.h>
 #include <linux/init.h>
@@ -13,6 +12,7 @@
 #include <asm/csr.h>
 #include <asm/hwcap.h>
 #include <asm/image.h>
+#include "efi-header.S"
 
 __HEAD
 ENTRY(_start)
@@ -22,10 +22,18 @@
 	 * Do not modify it without modifying the structure and all bootloaders
 	 * that expects this header format!!
 	 */
+#ifdef CONFIG_EFI
+	/*
+	 * This instruction decodes to "MZ" ASCII required by UEFI.
+	 */
+	c.li s4,-13
+	j _start_kernel
+#else
 	/* jump to start kernel */
 	j _start_kernel
 	/* reserved */
 	.word 0
+#endif
 	.balign 8
 #if __riscv_xlen == 64
 	/* Image load offset(2MB) from start of RAM */
@@ -43,7 +51,14 @@
 	.ascii RISCV_IMAGE_MAGIC
 	.balign 4
 	.ascii RISCV_IMAGE_MAGIC2
+#ifdef CONFIG_EFI
+	.word pe_head_start - _start
+pe_head_start:
+
+	__EFI_PE_HEADER
+#else
 	.word 0
+#endif
 
 .align 2
 #ifdef CONFIG_MMU
@@ -259,7 +274,6 @@
 #endif
 	/* Start the kernel */
 	call soc_early_init
-	call parse_dtb
 	tail start_kernel
 
 .Lsecondary_start:
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 105fb04..b48dda3 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
 extern void *__cpu_up_stack_pointer[];
 extern void *__cpu_up_task_pointer[];
 
-void __init parse_dtb(void);
-
 #endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h
new file mode 100644
index 0000000..8c212ef
--- /dev/null
+++ b/arch/riscv/kernel/image-vars.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ * Linker script variables to be set after section resolution, as
+ * ld.lld does not like variables assigned before SECTIONS is processed.
+ * Based on arch/arm64/kerne/image-vars.h
+ */
+#ifndef __RISCV_KERNEL_IMAGE_VARS_H
+#define __RISCV_KERNEL_IMAGE_VARS_H
+
+#ifndef LINKER_SCRIPT
+#error This file should only be included in vmlinux.lds.S
+#endif
+
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp		= memcmp;
+__efistub_memchr		= memchr;
+__efistub_memcpy		= memcpy;
+__efistub_memmove		= memmove;
+__efistub_memset		= memset;
+__efistub_strlen		= strlen;
+__efistub_strnlen		= strnlen;
+__efistub_strcmp		= strcmp;
+__efistub_strncmp		= strncmp;
+__efistub_strrchr		= strrchr;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy		= memcpy;
+__efistub___memmove		= memmove;
+__efistub___memset		= memset;
+#endif
+
+__efistub__start		= _start;
+__efistub__start_kernel		= _start_kernel;
+__efistub__end			= _end;
+__efistub__edata		= _edata;
+__efistub_screen_info		= screen_info;
+
+#endif
+
+#endif /* __RISCV_KERNEL_IMAGE_VARS_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 2c6dd32..4c96ac1 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -17,19 +17,22 @@
 #include <linux/sched/task.h>
 #include <linux/swiotlb.h>
 #include <linux/smp.h>
+#include <linux/efi.h>
 
 #include <asm/cpu_ops.h>
+#include <asm/early_ioremap.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
 #include <asm/thread_info.h>
 #include <asm/kasan.h>
+#include <asm/efi.h>
 
 #include "head.h"
 
-#ifdef CONFIG_DUMMY_CONSOLE
-struct screen_info screen_info = {
+#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
+struct screen_info screen_info __section(.data) = {
 	.orig_video_lines	= 30,
 	.orig_video_cols	= 80,
 	.orig_video_mode	= 0,
@@ -48,8 +51,9 @@ atomic_t hart_lottery __section(.sdata);
 unsigned long boot_cpu_hartid;
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-void __init parse_dtb(void)
+static void __init parse_dtb(void)
 {
+	/* Early scan of device tree from init memory */
 	if (early_init_dt_scan(dtb_early_va))
 		return;
 
@@ -62,6 +66,7 @@ void __init parse_dtb(void)
 
 void __init setup_arch(char **cmdline_p)
 {
+	parse_dtb();
 	init_mm.start_code = (unsigned long) _stext;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
@@ -69,14 +74,19 @@ void __init setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
+	early_ioremap_setup();
 	parse_early_param();
 
+	efi_init();
 	setup_bootmem();
 	paging_init();
 #if IS_ENABLED(CONFIG_BUILTIN_DTB)
 	unflatten_and_copy_device_tree();
 #else
-	unflatten_device_tree();
+	if (early_init_dt_verify(__va(dtb_early_pa)))
+		unflatten_device_tree();
+	else
+		pr_err("No DTB found in kernel mappings\n");
 #endif
 
 #ifdef CONFIG_SWIOTLB
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 67db80e..3ffbd6c 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -10,6 +10,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/set_memory.h>
+#include "image-vars.h"
 
 #include <linux/sizes.h>
 OUTPUT_ARCH(riscv)
@@ -17,6 +18,9 @@
 
 jiffies = jiffies_64;
 
+PECOFF_SECTION_ALIGNMENT = 0x1000;
+PECOFF_FILE_ALIGNMENT = 0x200;
+
 SECTIONS
 {
 	/* Beginning of code and text segment */
@@ -66,6 +70,11 @@
 		_etext = .;
 	}
 
+#ifdef CONFIG_EFI
+	. = ALIGN(PECOFF_SECTION_ALIGNMENT);
+	__pecoff_text_end = .;
+#endif
+
 	INIT_DATA_SECTION(16)
 
 	/* Start of data section */
@@ -84,16 +93,26 @@
 	.sdata : {
 		__global_pointer$ = . + 0x800;
 		*(.sdata*)
-		/* End of data section */
-		_edata = .;
 	}
 
+#ifdef CONFIG_EFI
+	.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+	__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
+
+	/* End of data section */
+	_edata = .;
+
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
 	.rel.dyn : {
 		*(.rel.dyn*)
 	}
 
+#ifdef CONFIG_EFI
+	. = ALIGN(PECOFF_SECTION_ALIGNMENT);
+	__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
+#endif
 	_end = .;
 
 	STABS_DEBUG
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 716d64e..1359e21 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -19,6 +19,167 @@
 
 #include "../kernel/head.h"
 
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+	/* Are we prepared to handle this kernel fault? */
+	if (fixup_exception(regs))
+		return;
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
+		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		"paging request", addr);
+	die(regs, "Oops");
+	do_exit(SIGKILL);
+}
+
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, call the OOM killer, and return the userspace
+		 * (which will retry the fault, or kill us if we got oom-killed).
+		 */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		pagefault_out_of_memory();
+		return;
+	} else if (fault & VM_FAULT_SIGBUS) {
+		/* Kernel mode? Handle exceptions or die */
+		if (!user_mode(regs)) {
+			no_context(regs, addr);
+			return;
+		}
+		do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+		return;
+	}
+	BUG();
+}
+
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+	/*
+	 * Something tried to access memory that isn't in our memory map.
+	 * Fix it, but check if it's kernel or user first.
+	 */
+	mmap_read_unlock(mm);
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		do_trap(regs, SIGSEGV, code, addr);
+		return;
+	}
+
+	no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	p4d_t *p4d, *p4d_k;
+	pmd_t *pmd, *pmd_k;
+	pte_t *pte_k;
+	int index;
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs))
+		return do_trap(regs, SIGSEGV, code, addr);
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "tsk->active_mm->pgd" here.
+	 * We might be inside an interrupt in the middle
+	 * of a task switch.
+	 */
+	index = pgd_index(addr);
+	pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pgd(pgd, *pgd_k);
+
+	p4d = p4d_offset(pgd, addr);
+	p4d_k = p4d_offset(pgd_k, addr);
+	if (!p4d_present(*p4d_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	pud = pud_offset(p4d, addr);
+	pud_k = pud_offset(p4d_k, addr);
+	if (!pud_present(*pud_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	/*
+	 * Since the vmalloc area is global, it is unnecessary
+	 * to copy individual PTEs
+	 */
+	pmd = pmd_offset(pud, addr);
+	pmd_k = pmd_offset(pud_k, addr);
+	if (!pmd_present(*pmd_k)) {
+		no_context(regs, addr);
+		return;
+	}
+	set_pmd(pmd, *pmd_k);
+
+	/*
+	 * Make sure the actual PTE exists as well to
+	 * catch kernel vmalloc-area accesses to non-mapped
+	 * addresses. If we don't do this, this will just
+	 * silently loop forever.
+	 */
+	pte_k = pte_offset_kernel(pmd_k, addr);
+	if (!pte_present(*pte_k)) {
+		no_context(regs, addr);
+		return;
+	}
+
+	/*
+	 * The kernel assumes that TLBs don't cache invalid
+	 * entries, but in RISC-V, SFENCE.VMA specifies an
+	 * ordering constraint, not a cache flush; it is
+	 * necessary even after writing invalid entries.
+	 */
+	local_flush_tlb_page(addr);
+}
+
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+{
+	switch (cause) {
+	case EXC_INST_PAGE_FAULT:
+		if (!(vma->vm_flags & VM_EXEC)) {
+			return true;
+		}
+		break;
+	case EXC_LOAD_PAGE_FAULT:
+		if (!(vma->vm_flags & VM_READ)) {
+			return true;
+		}
+		break;
+	case EXC_STORE_PAGE_FAULT:
+		if (!(vma->vm_flags & VM_WRITE)) {
+			return true;
+		}
+		break;
+	default:
+		panic("%s: unhandled cause %lu", __func__, cause);
+	}
+	return false;
+}
+
 /*
  * This routine handles page faults.  It determines the address and the
  * problem, and then passes it off to one of the appropriate routines.
@@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 	 * only copy the information from the master page table,
 	 * nothing more.
 	 */
-	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
-		goto vmalloc_fault;
+	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+		vmalloc_fault(regs, code, addr);
+		return;
+	}
 
 	/* Enable interrupts if they were enabled in the parent context. */
 	if (likely(regs->status & SR_PIE))
@@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 	 * If we're in an interrupt, have no user context, or are running
 	 * in an atomic region, then we must not take the fault.
 	 */
-	if (unlikely(faulthandler_disabled() || !mm))
-		goto no_context;
+	if (unlikely(faulthandler_disabled() || !mm)) {
+		no_context(regs, addr);
+		return;
+	}
 
 	if (user_mode(regs))
 		flags |= FAULT_FLAG_USER;
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 
+	if (cause == EXC_STORE_PAGE_FAULT)
+		flags |= FAULT_FLAG_WRITE;
+	else if (cause == EXC_INST_PAGE_FAULT)
+		flags |= FAULT_FLAG_INSTRUCTION;
 retry:
 	mmap_read_lock(mm);
 	vma = find_vma(mm, addr);
-	if (unlikely(!vma))
-		goto bad_area;
+	if (unlikely(!vma)) {
+		bad_area(regs, mm, code, addr);
+		return;
+	}
 	if (likely(vma->vm_start <= addr))
 		goto good_area;
-	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
-		goto bad_area;
-	if (unlikely(expand_stack(vma, addr)))
-		goto bad_area;
+	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+		bad_area(regs, mm, code, addr);
+		return;
+	}
+	if (unlikely(expand_stack(vma, addr))) {
+		bad_area(regs, mm, code, addr);
+		return;
+	}
 
 	/*
 	 * Ok, we have a good vm_area for this memory access, so
@@ -86,22 +261,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 good_area:
 	code = SEGV_ACCERR;
 
-	switch (cause) {
-	case EXC_INST_PAGE_FAULT:
-		if (!(vma->vm_flags & VM_EXEC))
-			goto bad_area;
-		break;
-	case EXC_LOAD_PAGE_FAULT:
-		if (!(vma->vm_flags & VM_READ))
-			goto bad_area;
-		break;
-	case EXC_STORE_PAGE_FAULT:
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-		flags |= FAULT_FLAG_WRITE;
-		break;
-	default:
-		panic("%s: unhandled cause %lu", __func__, cause);
+	if (unlikely(access_error(cause, vma))) {
+		bad_area(regs, mm, code, addr);
+		return;
 	}
 
 	/*
@@ -119,144 +281,22 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
 	if (fault_signal_pending(fault, regs))
 		return;
 
+	if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+		flags |= FAULT_FLAG_TRIED;
+
+		/*
+		 * No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+		goto retry;
+	}
+
+	mmap_read_unlock(mm);
+
 	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-
-	if (flags & FAULT_FLAG_ALLOW_RETRY) {
-		if (fault & VM_FAULT_RETRY) {
-			flags |= FAULT_FLAG_TRIED;
-
-			/*
-			 * No need to mmap_read_unlock(mm) as we would
-			 * have already released it in __lock_page_or_retry
-			 * in mm/filemap.c.
-			 */
-			goto retry;
-		}
-	}
-
-	mmap_read_unlock(mm);
-	return;
-
-	/*
-	 * Something tried to access memory that isn't in our memory map.
-	 * Fix it, but check if it's kernel or user first.
-	 */
-bad_area:
-	mmap_read_unlock(mm);
-	/* User mode accesses just cause a SIGSEGV */
-	if (user_mode(regs)) {
-		do_trap(regs, SIGSEGV, code, addr);
+		mm_fault_error(regs, addr, fault);
 		return;
 	}
-
-no_context:
-	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs))
-		return;
-
-	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
-	 */
-	bust_spinlocks(1);
-	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
-		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
-		"paging request", addr);
-	die(regs, "Oops");
-	do_exit(SIGKILL);
-
-	/*
-	 * We ran out of memory, call the OOM killer, and return the userspace
-	 * (which will retry the fault, or kill us if we got oom-killed).
-	 */
-out_of_memory:
-	mmap_read_unlock(mm);
-	if (!user_mode(regs))
-		goto no_context;
-	pagefault_out_of_memory();
 	return;
-
-do_sigbus:
-	mmap_read_unlock(mm);
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-	do_trap(regs, SIGBUS, BUS_ADRERR, addr);
-	return;
-
-vmalloc_fault:
-	{
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		p4d_t *p4d, *p4d_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-		int index;
-
-		/* User mode accesses just cause a SIGSEGV */
-		if (user_mode(regs))
-			return do_trap(regs, SIGSEGV, code, addr);
-
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk->active_mm->pgd" here.
-		 * We might be inside an interrupt in the middle
-		 * of a task switch.
-		 */
-		index = pgd_index(addr);
-		pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
-		pgd_k = init_mm.pgd + index;
-
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-		set_pgd(pgd, *pgd_k);
-
-		p4d = p4d_offset(pgd, addr);
-		p4d_k = p4d_offset(pgd_k, addr);
-		if (!p4d_present(*p4d_k))
-			goto no_context;
-
-		pud = pud_offset(p4d, addr);
-		pud_k = pud_offset(p4d_k, addr);
-		if (!pud_present(*pud_k))
-			goto no_context;
-
-		/*
-		 * Since the vmalloc area is global, it is unnecessary
-		 * to copy individual PTEs
-		 */
-		pmd = pmd_offset(pud, addr);
-		pmd_k = pmd_offset(pud_k, addr);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-		set_pmd(pmd, *pmd_k);
-
-		/*
-		 * Make sure the actual PTE exists as well to
-		 * catch kernel vmalloc-area accesses to non-mapped
-		 * addresses. If we don't do this, this will just
-		 * silently loop forever.
-		 */
-		pte_k = pte_offset_kernel(pmd_k, addr);
-		if (!pte_present(*pte_k))
-			goto no_context;
-
-		/*
-		 * The kernel assumes that TLBs don't cache invalid
-		 * entries, but in RISC-V, SFENCE.VMA specifies an
-		 * ordering constraint, not a cache flush; it is
-		 * necessary even after writing invalid entries.
-		 */
-		local_flush_tlb_page(addr);
-
-		return;
-	}
 }
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1dc8930..ea933b7 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -28,7 +28,18 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 EXPORT_SYMBOL(empty_zero_page);
 
 extern char _start[];
-void *dtb_early_va;
+#define DTB_EARLY_BASE_VA      PGDIR_SIZE
+void *dtb_early_va __initdata;
+uintptr_t dtb_early_pa __initdata;
+
+struct pt_alloc_ops {
+	pte_t *(*get_pte_virt)(phys_addr_t pa);
+	phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+	pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+	phys_addr_t (*alloc_pmd)(uintptr_t va);
+#endif
+};
 
 static void __init zone_sizes_init(void)
 {
@@ -141,8 +152,6 @@ static void __init setup_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-static phys_addr_t dtb_early_pa __initdata;
-
 void __init setup_bootmem(void)
 {
 	phys_addr_t mem_size = 0;
@@ -194,6 +203,8 @@ void __init setup_bootmem(void)
 }
 
 #ifdef CONFIG_MMU
+static struct pt_alloc_ops pt_ops;
+
 unsigned long va_pa_offset;
 EXPORT_SYMBOL(va_pa_offset);
 unsigned long pfn_base;
@@ -202,7 +213,6 @@ EXPORT_SYMBOL(pfn_base);
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
-static bool mmu_enabled;
 
 #define MAX_EARLY_MAPPING_SIZE	SZ_128M
 
@@ -224,27 +234,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 	local_flush_tlb_page(addr);
 }
 
-static pte_t *__init get_pte_virt(phys_addr_t pa)
+static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
 {
-	if (mmu_enabled) {
-		clear_fixmap(FIX_PTE);
-		return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
-	} else {
-		return (pte_t *)((uintptr_t)pa);
-	}
+	return (pte_t *)((uintptr_t)pa);
 }
 
-static phys_addr_t __init alloc_pte(uintptr_t va)
+static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
+{
+	clear_fixmap(FIX_PTE);
+	return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
+}
+
+static inline pte_t *get_pte_virt_late(phys_addr_t pa)
+{
+	return (pte_t *) __va(pa);
+}
+
+static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
 {
 	/*
 	 * We only create PMD or PGD early mappings so we
 	 * should never reach here with MMU disabled.
 	 */
-	BUG_ON(!mmu_enabled);
+	BUG();
+}
 
+static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
+{
 	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 }
 
+static phys_addr_t alloc_pte_late(uintptr_t va)
+{
+	unsigned long vaddr;
+
+	vaddr = __get_free_page(GFP_KERNEL);
+	if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
+		BUG();
+	return __pa(vaddr);
+}
+
 static void __init create_pte_mapping(pte_t *ptep,
 				      uintptr_t va, phys_addr_t pa,
 				      phys_addr_t sz, pgprot_t prot)
@@ -269,28 +298,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
 #endif
 pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
 
-static pmd_t *__init get_pmd_virt(phys_addr_t pa)
+static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
 {
-	if (mmu_enabled) {
-		clear_fixmap(FIX_PMD);
-		return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
-	} else {
-		return (pmd_t *)((uintptr_t)pa);
-	}
+	/* Before MMU is enabled */
+	return (pmd_t *)((uintptr_t)pa);
 }
 
-static phys_addr_t __init alloc_pmd(uintptr_t va)
+static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
+{
+	clear_fixmap(FIX_PMD);
+	return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+}
+
+static pmd_t *get_pmd_virt_late(phys_addr_t pa)
+{
+	return (pmd_t *) __va(pa);
+}
+
+static phys_addr_t __init alloc_pmd_early(uintptr_t va)
 {
 	uintptr_t pmd_num;
 
-	if (mmu_enabled)
-		return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
-
 	pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
 	BUG_ON(pmd_num >= NUM_EARLY_PMDS);
 	return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
 }
 
+static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
+{
+	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pmd_late(uintptr_t va)
+{
+	unsigned long vaddr;
+
+	vaddr = __get_free_page(GFP_KERNEL);
+	BUG_ON(!vaddr);
+	return __pa(vaddr);
+}
+
 static void __init create_pmd_mapping(pmd_t *pmdp,
 				      uintptr_t va, phys_addr_t pa,
 				      phys_addr_t sz, pgprot_t prot)
@@ -306,34 +353,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
 	}
 
 	if (pmd_none(pmdp[pmd_idx])) {
-		pte_phys = alloc_pte(va);
+		pte_phys = pt_ops.alloc_pte(va);
 		pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
-		ptep = get_pte_virt(pte_phys);
+		ptep = pt_ops.get_pte_virt(pte_phys);
 		memset(ptep, 0, PAGE_SIZE);
 	} else {
 		pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
-		ptep = get_pte_virt(pte_phys);
+		ptep = pt_ops.get_pte_virt(pte_phys);
 	}
 
 	create_pte_mapping(ptep, va, pa, sz, prot);
 }
 
 #define pgd_next_t		pmd_t
-#define alloc_pgd_next(__va)	alloc_pmd(__va)
-#define get_pgd_next_virt(__pa)	get_pmd_virt(__pa)
+#define alloc_pgd_next(__va)	pt_ops.alloc_pmd(__va)
+#define get_pgd_next_virt(__pa)	pt_ops.get_pmd_virt(__pa)
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		fixmap_pmd
 #else
 #define pgd_next_t		pte_t
-#define alloc_pgd_next(__va)	alloc_pte(__va)
-#define get_pgd_next_virt(__pa)	get_pte_virt(__pa)
+#define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
+#define get_pgd_next_virt(__pa)	pt_ops.get_pte_virt(__pa)
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		fixmap_pte
 #endif
 
-static void __init create_pgd_mapping(pgd_t *pgdp,
+void __init create_pgd_mapping(pgd_t *pgdp,
 				      uintptr_t va, phys_addr_t pa,
 				      phys_addr_t sz, pgprot_t prot)
 {
@@ -389,10 +436,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
-	uintptr_t va, end_va;
+	uintptr_t va, pa, end_va;
 	uintptr_t load_pa = (uintptr_t)(&_start);
 	uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
 	uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+#ifndef __PAGETABLE_PMD_FOLDED
+	pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#endif
 
 	va_pa_offset = PAGE_OFFSET - load_pa;
 	pfn_base = PFN_DOWN(load_pa);
@@ -408,6 +458,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	BUG_ON((load_pa % map_size) != 0);
 	BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
 
+	pt_ops.alloc_pte = alloc_pte_early;
+	pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+	pt_ops.alloc_pmd = alloc_pmd_early;
+	pt_ops.get_pmd_virt = get_pmd_virt_early;
+#endif
 	/* Setup early PGD for fixmap */
 	create_pgd_mapping(early_pg_dir, FIXADDR_START,
 			   (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
@@ -438,17 +494,44 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 				   load_pa + (va - PAGE_OFFSET),
 				   map_size, PAGE_KERNEL_EXEC);
 
-	/* Create fixed mapping for early FDT parsing */
-	end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
-	for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
-		create_pte_mapping(fixmap_pte, va,
-				   dtb_pa + (va - __fix_to_virt(FIX_FDT)),
-				   PAGE_SIZE, PAGE_KERNEL);
-
-	/* Save pointer to DTB for early FDT parsing */
-	dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
-	/* Save physical address for memblock reservation */
+	/* Create two consecutive PGD mappings for FDT early scan */
+	pa = dtb_pa & ~(PGDIR_SIZE - 1);
+	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+			   pa, PGDIR_SIZE, PAGE_KERNEL);
+	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
+			   pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
+	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
 	dtb_early_pa = dtb_pa;
+
+	/*
+	 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+	 * range can not span multiple pmds.
+	 */
+	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	/*
+	 * Early ioremap fixmap is already created as it lies within first 2MB
+	 * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
+	 * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
+	 * the user if not.
+	 */
+	fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
+	fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
+	if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
+		WARN_ON(1);
+		pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
+			pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
+		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
+#endif
 }
 
 static void __init setup_vm_final(void)
@@ -457,9 +540,16 @@ static void __init setup_vm_final(void)
 	phys_addr_t pa, start, end;
 	u64 i;
 
-	/* Set mmu_enabled flag */
-	mmu_enabled = true;
-
+	/**
+	 * MMU is enabled at this point. But page table setup is not complete yet.
+	 * fixmap page table alloc functions should be used at this point
+	 */
+	pt_ops.alloc_pte = alloc_pte_fixmap;
+	pt_ops.get_pte_virt = get_pte_virt_fixmap;
+#ifndef __PAGETABLE_PMD_FOLDED
+	pt_ops.alloc_pmd = alloc_pmd_fixmap;
+	pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
+#endif
 	/* Setup swapper PGD for fixmap */
 	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
 			   __pa_symbol(fixmap_pgd_next),
@@ -488,6 +578,14 @@ static void __init setup_vm_final(void)
 	/* Move to swapper page table */
 	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
 	local_flush_tlb_all();
+
+	/* generic page allocation functions must be used to setup page table */
+	pt_ops.alloc_pte = alloc_pte_late;
+	pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+	pt_ops.alloc_pmd = alloc_pmd_late;
+	pt_ops.get_pmd_virt = get_pmd_virt_late;
+#endif
 }
 #else
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 0831c2e..ace74de 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2019 SiFive
  */
 
+#include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -49,6 +50,14 @@ struct addr_marker {
 	const char *name;
 };
 
+/* Private information for debugfs */
+struct ptd_mm_info {
+	struct mm_struct		*mm;
+	const struct addr_marker	*markers;
+	unsigned long base_addr;
+	unsigned long end;
+};
+
 static struct addr_marker address_markers[] = {
 #ifdef CONFIG_KASAN
 	{KASAN_SHADOW_START,	"Kasan shadow start"},
@@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = {
 	{-1, NULL},
 };
 
+static struct ptd_mm_info kernel_ptd_info = {
+	.mm		= &init_mm,
+	.markers	= address_markers,
+	.base_addr	= KERN_VIRT_START,
+	.end		= ULONG_MAX,
+};
+
+#ifdef CONFIG_EFI
+static struct addr_marker efi_addr_markers[] = {
+		{ 0,		"UEFI runtime start" },
+		{ SZ_1G,	"UEFI runtime end" },
+		{ -1,		NULL }
+};
+
+static struct ptd_mm_info efi_ptd_info = {
+	.mm		= &efi_mm,
+	.markers	= efi_addr_markers,
+	.base_addr	= 0,
+	.end		= SZ_2G,
+};
+#endif
+
 /* Page Table Entry */
 struct prot_bits {
 	u64 mask;
@@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr,
 	}
 }
 
-static void ptdump_walk(struct seq_file *s)
+static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
 {
 	struct pg_state st = {
 		.seq = s,
-		.marker = address_markers,
+		.marker = pinfo->markers,
 		.level = -1,
 		.ptdump = {
 			.note_page = note_page,
 			.range = (struct ptdump_range[]) {
-				{KERN_VIRT_START, ULONG_MAX},
+				{pinfo->base_addr, pinfo->end},
 				{0, 0}
 			}
 		}
 	};
 
-	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
 }
 
 void ptdump_check_wx(void)
@@ -293,7 +324,7 @@ void ptdump_check_wx(void)
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
-	ptdump_walk(m);
+	ptdump_walk(m, m->private);
 
 	return 0;
 }
@@ -308,8 +339,13 @@ static int ptdump_init(void)
 		for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
 			pg_level[i].mask |= pte_bits[j].mask;
 
-	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+	debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
 			    &ptdump_fops);
+#ifdef CONFIG_EFI
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
+		debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
+				    &ptdump_fops);
+#endif
 
 	return 0;
 }
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 1c3b481..28c16800 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
 437  common	openat2			sys_openat2			sys_openat2
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index ae0a00b..7837384 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -442,3 +442,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 37ec52b..7816026 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -485,3 +485,4 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index d49f471..1618721 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -62,12 +62,12 @@
 
 source "arch/$(HEADER_ARCH)/um/Kconfig"
 
-config FORBID_STATIC_LINK
-	bool
+config MAY_HAVE_RUNTIME_DEPS
+        bool
 
 config STATIC_LINK
 	bool "Force a static link"
-	depends on !FORBID_STATIC_LINK
+	depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS
 	help
 	  This option gives you the ability to force a static link of UML.
 	  Normally, UML is linked as a shared binary.  This is inconvenient for
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 9160ead..2e7b8e0 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -234,7 +234,7 @@
 config UML_NET_VECTOR
 	bool "Vector I/O high performance network devices"
 	depends on UML_NET
-	select FORBID_STATIC_LINK
+	select MAY_HAVE_RUNTIME_DEPS
 	help
 	This User-Mode Linux network driver uses multi-message send
 	and receive functions. The host running the UML guest must have
@@ -246,7 +246,7 @@
 config UML_NET_VDE
 	bool "VDE transport (obsolete)"
 	depends on UML_NET
-	select FORBID_STATIC_LINK
+	select MAY_HAVE_RUNTIME_DEPS
 	help
 	This User-Mode Linux network transport allows one or more running
 	UMLs on a single host to communicate with each other and also
@@ -294,7 +294,7 @@
 config UML_NET_PCAP
 	bool "pcap transport (obsolete)"
 	depends on UML_NET
-	select FORBID_STATIC_LINK
+	select MAY_HAVE_RUNTIME_DEPS
 	help
 	The pcap transport makes a pcap packet stream on the host look
 	like an ethernet device inside UML.  This is useful for making
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index 3695821..785baed 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -7,6 +7,7 @@
  */
 
 #include <stdint.h>
+#include <string.h>
 #include <unistd.h>
 #include <errno.h>
 #include <sys/types.h>
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index bbd2063..52ddda3 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -32,7 +32,7 @@ static int pcap_user_init(void *data, void *dev)
 	return 0;
 }
 
-static int pcap_open(void *data)
+static int pcap_user_open(void *data)
 {
 	struct pcap_data *pri = data;
 	__u32 netmask;
@@ -44,14 +44,14 @@ static int pcap_open(void *data)
 	if (pri->filter != NULL) {
 		err = dev_netmask(pri->dev, &netmask);
 		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n");
+			printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
 			return -EIO;
 		}
 
 		pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
 					UM_GFP_KERNEL);
 		if (pri->compiled == NULL) {
-			printk(UM_KERN_ERR "pcap_open : kmalloc failed\n");
+			printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
 			return -ENOMEM;
 		}
 
@@ -59,14 +59,14 @@ static int pcap_open(void *data)
 				   (struct bpf_program *) pri->compiled,
 				   pri->filter, pri->optimize, netmask);
 		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : pcap_compile failed - "
+			printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
 			       "'%s'\n", pcap_geterr(pri->pcap));
 			goto out;
 		}
 
 		err = pcap_setfilter(pri->pcap, pri->compiled);
 		if (err < 0) {
-			printk(UM_KERN_ERR "pcap_open : pcap_setfilter "
+			printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
 			       "failed - '%s'\n", pcap_geterr(pri->pcap));
 			goto out;
 		}
@@ -127,7 +127,7 @@ int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
 
 const struct net_user_info pcap_user_info = {
 	.init		= pcap_user_init,
-	.open		= pcap_open,
+	.open		= pcap_user_open,
 	.close	 	= NULL,
 	.remove	 	= pcap_remove,
 	.add_address	= NULL,
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index 8016d32..482a19c 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -9,7 +9,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
-#include <sys/termios.h>
+#include <termios.h>
 #include <sys/wait.h>
 #include <net_user.h>
 #include <os.h>
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 8735c46..555203e 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1403,7 +1403,7 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
 		kfree(vp->bpf->filter);
 		vp->bpf->filter = NULL;
 	} else {
-		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
+		vp->bpf = kmalloc(sizeof(struct sock_fprog), GFP_ATOMIC);
 		if (vp->bpf == NULL) {
 			netdev_err(dev, "failed to allocate memory for firmware\n");
 			goto flash_fail;
@@ -1415,7 +1415,7 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
 	if (request_firmware(&fw, efl->data, &vdevice->pdev.dev))
 		goto flash_fail;
 
-	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	vp->bpf->filter = kmemdup(fw->data, fw->size, GFP_ATOMIC);
 	if (!vp->bpf->filter)
 		goto free_buffer;
 
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index c4a0f26..bae5322 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -18,9 +18,7 @@
 #include <fcntl.h>
 #include <sys/socket.h>
 #include <sys/un.h>
-#include <net/ethernet.h>
 #include <netinet/ip.h>
-#include <netinet/ether.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <sys/wait.h>
@@ -39,6 +37,7 @@
 #define ID_MAX 2
 
 #define TOKEN_IFNAME "ifname"
+#define TOKEN_SCRIPT "ifup"
 
 #define TRANS_RAW "raw"
 #define TRANS_RAW_LEN strlen(TRANS_RAW)
@@ -55,6 +54,9 @@
 
 #define MAX_UN_LEN 107
 
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char *template = "tapXXXXXX";
+
 /* This is very ugly and brute force lookup, but it is done
  * only once at initialization so not worth doing hashes or
  * anything more intelligent
@@ -191,16 +193,21 @@ static int create_raw_fd(char *iface, int flags, int proto)
 	return err;
 }
 
+
 static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
 {
-	int fd = -1;
+	int fd = -1, i;
 	char *iface;
 	struct vector_fds *result = NULL;
+	bool dynamic = false;
+	char dynamic_ifname[IFNAMSIZ];
+	char *argv[] = {NULL, NULL, NULL, NULL};
 
 	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
 	if (iface == NULL) {
-		printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
-		goto tap_cleanup;
+		dynamic = true;
+		iface = dynamic_ifname;
+		srand(getpid());
 	}
 
 	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
@@ -214,14 +221,30 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
 	result->remote_addr_size = 0;
 
 	/* TAP */
+	do {
+		if (dynamic) {
+			strcpy(iface, template);
+			for (i = 0; i < strlen(iface); i++) {
+				if (iface[i] == 'X') {
+					iface[i] = padchar[rand() % strlen(padchar)];
+				}
+			}
+		}
+		fd = create_tap_fd(iface);
+		if ((fd < 0) && (!dynamic)) {
+			printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
+			goto tap_cleanup;
+		}
+		result->tx_fd = fd;
+		result->rx_fd = fd;
+	} while (fd < 0);
 
-	fd = create_tap_fd(iface);
-	if (fd < 0) {
-		printk(UM_KERN_ERR "uml_tap: failed to create tun interface\n");
-		goto tap_cleanup;
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
 	}
-	result->tx_fd = fd;
-	result->rx_fd = fd;
+
 	return result;
 tap_cleanup:
 	printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd);
@@ -233,6 +256,7 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
 {
 	char *iface;
 	struct vector_fds *result = NULL;
+	char *argv[] = {NULL, NULL, NULL, NULL};
 
 	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
 	if (iface == NULL) {
@@ -266,6 +290,12 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec)
 			"uml_tap: failed to create paired raw socket: %i\n", result->rx_fd);
 		goto hybrid_cleanup;
 	}
+
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
+	}
 	return result;
 hybrid_cleanup:
 	printk(UM_KERN_ERR "user_init_hybrid: init failed");
@@ -332,7 +362,7 @@ static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id)
 	}
 	switch (id) {
 	case ID_BESS:
-		if (connect(fd, remote_addr, sizeof(struct sockaddr_un)) < 0) {
+		if (connect(fd, (const struct sockaddr *) remote_addr, sizeof(struct sockaddr_un)) < 0) {
 			printk(UM_KERN_ERR "bess open:cannot connect to %s %i", remote_addr->sun_path, -errno);
 			goto unix_cleanup;
 		}
@@ -399,8 +429,7 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
 fd_cleanup:
 	if (fd >= 0)
 		os_close_file(fd);
-	if (result != NULL)
-		kfree(result);
+	kfree(result);
 	return NULL;
 }
 
@@ -410,6 +439,7 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
 	int err = -ENOMEM;
 	char *iface;
 	struct vector_fds *result = NULL;
+	char *argv[] = {NULL, NULL, NULL, NULL};
 
 	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
 	if (iface == NULL)
@@ -432,6 +462,11 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
 		result->remote_addr = NULL;
 		result->remote_addr_size = 0;
 	}
+	argv[0] = uml_vector_fetch_arg(ifspec, TOKEN_SCRIPT);
+	if (argv[0]) {
+		argv[1] = iface;
+		run_helper(NULL, NULL, argv);
+	}
 	return result;
 raw_cleanup:
 	printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
@@ -789,10 +824,12 @@ void *uml_vector_user_bpf(char *filename)
 		return false;
 	}
 	bpf_prog = uml_kmalloc(sizeof(struct sock_fprog), UM_GFP_KERNEL);
-	if (bpf_prog != NULL) {
-		bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
-		bpf_prog->filter = NULL;
+	if (bpf_prog == NULL) {
+		printk(KERN_ERR "Failed to allocate bpf prog buffer");
+		return NULL;
 	}
+	bpf_prog->len = statbuf.st_size / sizeof(struct sock_filter);
+	bpf_prog->filter = NULL;
 	ffd = os_open_file(filename, of_read(OPENFLAGS()), 0);
 	if (ffd < 0) {
 		printk(KERN_ERR "Error %d opening bpf file", -errno);
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index 10c99e0..d1cffc2 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -35,14 +35,14 @@ int write_sigio_irq(int fd)
 }
 
 /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
-static DEFINE_SPINLOCK(sigio_spinlock);
+static DEFINE_MUTEX(sigio_mutex);
 
 void sigio_lock(void)
 {
-	spin_lock(&sigio_spinlock);
+	mutex_lock(&sigio_mutex);
 }
 
 void sigio_unlock(void)
 {
-	spin_unlock(&sigio_spinlock);
+	mutex_unlock(&sigio_mutex);
 }
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index acbc879..7452f70 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -47,12 +47,10 @@ void show_stack(struct task_struct *task, unsigned long *stack,
 		if (kstack_end(stack))
 			break;
 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-			printk("%s\n", loglvl);
+			pr_cont("\n");
 		pr_cont(" %08lx", *stack++);
 	}
-	printk("%s\n", loglvl);
 
 	printk("%sCall Trace:\n", loglvl);
 	dump_trace(current, &stackops, (void *)loglvl);
-	printk("%s\n", loglvl);
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 25eaa6a..3d109ff 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -70,13 +70,17 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
 	 * read of the message and write of the ACK.
 	 */
 	if (mode != TTMH_READ) {
+		bool disabled = irqs_disabled();
+
+		BUG_ON(mode == TTMH_IDLE && !disabled);
+
+		if (disabled)
+			local_irq_enable();
 		while (os_poll(1, &time_travel_ext_fd) != 0) {
-			if (mode == TTMH_IDLE) {
-				BUG_ON(!irqs_disabled());
-				local_irq_enable();
-				local_irq_disable();
-			}
+			/* nothing */
 		}
+		if (disabled)
+			local_irq_disable();
 	}
 
 	ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
@@ -102,6 +106,7 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
 		break;
 	}
 
+	resp.seq = msg->seq;
 	os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
 }
 
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 9e16078..1d7558d 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -97,7 +97,7 @@ static int remove_files_and_dir(char *dir)
 	while ((ent = readdir(directory)) != NULL) {
 		if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
 			continue;
-		len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1;
+		len = strlen(dir) + strlen("/") + strlen(ent->d_name) + 1;
 		if (len > sizeof(file)) {
 			ret = -E2BIG;
 			goto out;
@@ -135,7 +135,7 @@ static int remove_files_and_dir(char *dir)
  */
 static inline int is_umdir_used(char *dir)
 {
-	char pid[sizeof("nnnnn\0")], *end, *file;
+	char pid[sizeof("nnnnnnnnn")], *end, *file;
 	int dead, fd, p, n, err;
 	size_t filelen;
 
@@ -217,10 +217,10 @@ static int umdir_take_if_dead(char *dir)
 
 static void __init create_pid_file(void)
 {
-	char pid[sizeof("nnnnn\0")], *file;
+	char pid[sizeof("nnnnnnnnn")], *file;
 	int fd, n;
 
-	n = strlen(uml_dir) + UMID_LEN + sizeof("/pid\0");
+	n = strlen(uml_dir) + UMID_LEN + sizeof("/pid");
 	file = malloc(n);
 	if (!file)
 		return;
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index ecf2f39..0732742 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -10,7 +10,7 @@
 #include <signal.h>
 #include <string.h>
 #include <termios.h>
-#include <wait.h>
+#include <sys/wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
 #include <init.h>
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9b6931f..0d0667a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -444,3 +444,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	process_madvise		sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 3478096..1f47e24 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,6 +361,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	process_madvise		sys_process_madvise
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index a84a141..8443a67 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -229,7 +229,8 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
 		return;
 
 	idx = srcu_read_lock(&head->track_srcu);
-	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+				srcu_read_lock_held(&head->track_srcu))
 		if (n->track_write)
 			n->track_write(vcpu, gpa, new, bytes, n);
 	srcu_read_unlock(&head->track_srcu, idx);
@@ -254,7 +255,8 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 		return;
 
 	idx = srcu_read_lock(&head->track_srcu);
-	hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+				srcu_read_lock_held(&head->track_srcu))
 		if (n->track_flush_slot)
 			n->track_flush_slot(kvm, slot, n);
 	srcu_read_unlock(&head->track_srcu, idx);
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 09a085b..1401899 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -52,14 +52,6 @@ static const int reg_offsets[] =
 
 int putreg(struct task_struct *child, int regno, unsigned long value)
 {
-#ifdef TIF_IA32
-	/*
-	 * Some code in the 64bit emulation may not be 64bit clean.
-	 * Don't take any chances.
-	 */
-	if (test_tsk_thread_flag(child, TIF_IA32))
-		value &= 0xffffffff;
-#endif
 	switch (regno) {
 	case R8:
 	case R9:
@@ -137,10 +129,7 @@ int poke_user(struct task_struct *child, long addr, long data)
 unsigned long getreg(struct task_struct *child, int regno)
 {
 	unsigned long mask = ~0UL;
-#ifdef TIF_IA32
-	if (test_tsk_thread_flag(child, TIF_IA32))
-		mask = 0xffffffff;
-#endif
+
 	switch (regno) {
 	case R8:
 	case R9:
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index c51dd83..bae6155 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -2,7 +2,7 @@
 #include <stdio.h>
 #include <stddef.h>
 #include <signal.h>
-#include <sys/poll.h>
+#include <poll.h>
 #include <sys/mman.h>
 #include <sys/user.h>
 #define __FRAME_OFFSETS
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 4988e19..1e681bf 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -25,6 +25,7 @@
 static struct gnttab_vm_area {
 	struct vm_struct *area;
 	pte_t **ptes;
+	int idx;
 } gnttab_shared_vm_area, gnttab_status_vm_area;
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
@@ -90,19 +91,31 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
 	}
 }
 
+static int gnttab_apply(pte_t *pte, unsigned long addr, void *data)
+{
+	struct gnttab_vm_area *area = data;
+
+	area->ptes[area->idx++] = pte;
+	return 0;
+}
+
 static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
 {
 	area->ptes = kmalloc_array(nr_frames, sizeof(*area->ptes), GFP_KERNEL);
 	if (area->ptes == NULL)
 		return -ENOMEM;
-
-	area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
-	if (area->area == NULL) {
-		kfree(area->ptes);
-		return -ENOMEM;
-	}
-
+	area->area = get_vm_area(PAGE_SIZE * nr_frames, VM_IOREMAP);
+	if (!area->area)
+		goto out_free_ptes;
+	if (apply_to_page_range(&init_mm, (unsigned long)area->area->addr,
+			PAGE_SIZE * nr_frames, gnttab_apply, area))
+		goto out_free_vm_area;
 	return 0;
+out_free_vm_area:
+	free_vm_area(area->area);
+out_free_ptes:
+	kfree(area->ptes);
+	return -ENOMEM;
 }
 
 static void arch_gnttab_vfree(struct gnttab_vm_area *area)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6276e3c..b070f27 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -410,3 +410,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index adfc935..501e9da 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -201,7 +201,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
 
 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
-static int do_block_io_op(struct xen_blkif_ring *ring);
+static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
 static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 				struct blkif_request *req,
 				struct pending_req *pending_req);
@@ -612,6 +612,8 @@ int xen_blkif_schedule(void *arg)
 	struct xen_vbd *vbd = &blkif->vbd;
 	unsigned long timeout;
 	int ret;
+	bool do_eoi;
+	unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
 
 	set_freezable();
 	while (!kthread_should_stop()) {
@@ -636,16 +638,23 @@ int xen_blkif_schedule(void *arg)
 		if (timeout == 0)
 			goto purge_gnt_list;
 
+		do_eoi = ring->waiting_reqs;
+
 		ring->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		ret = do_block_io_op(ring);
+		ret = do_block_io_op(ring, &eoi_flags);
 		if (ret > 0)
 			ring->waiting_reqs = 1;
 		if (ret == -EACCES)
 			wait_event_interruptible(ring->shutdown_wq,
 						 kthread_should_stop());
 
+		if (do_eoi && !ring->waiting_reqs) {
+			xen_irq_lateeoi(ring->irq, eoi_flags);
+			eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
+		}
+
 purge_gnt_list:
 		if (blkif->vbd.feature_gnt_persistent &&
 		    time_after(jiffies, ring->next_lru)) {
@@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio)
  * and transmute  it to the block API to hand it over to the proper block disk.
  */
 static int
-__do_block_io_op(struct xen_blkif_ring *ring)
+__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
 {
 	union blkif_back_rings *blk_rings = &ring->blk_rings;
 	struct blkif_request req;
@@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
 			break;
 
+		/* We've seen a request, so clear spurious eoi flag. */
+		*eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
 		if (kthread_should_stop()) {
 			more_to_do = 1;
 			break;
@@ -1202,13 +1214,13 @@ __do_block_io_op(struct xen_blkif_ring *ring)
 }
 
 static int
-do_block_io_op(struct xen_blkif_ring *ring)
+do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
 {
 	union blkif_back_rings *blk_rings = &ring->blk_rings;
 	int more_to_do;
 
 	do {
-		more_to_do = __do_block_io_op(ring);
+		more_to_do = __do_block_io_op(ring, eoi_flags);
 		if (more_to_do)
 			break;
 
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index b9aa5d1..5e7c36d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -246,9 +246,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
 	if (req_prod - rsp_prod > size)
 		goto fail;
 
-	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
-						    xen_blkif_be_int, 0,
-						    "blkif-backend", ring);
+	err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
+			evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
 	if (err < 0)
 		goto fail;
 	ring->irq = err;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e84070b..edc279b 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
 		pgoff_t *pgoff)
 {
-	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
+	sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
+	phys_addr_t phys_off = (start_sect + sector) * 512;
 
 	if (pgoff)
 		*pgoff = PHYS_PFN(phys_off);
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index da1887f..36ec1f7 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -106,7 +106,7 @@
 
 config EFI_ARMSTUB_DTB_LOADER
 	bool "Enable the DTB loader"
-	depends on EFI_GENERIC_STUB
+	depends on EFI_GENERIC_STUB && !RISCV
 	default y
 	help
 	  Select this config option to add support for the dtb= command
@@ -123,6 +123,7 @@
 	bool "Enable the command line initrd loader" if !X86
 	depends on EFI_STUB && (EFI_GENERIC_STUB || X86)
 	default y
+	depends on !RISCV
 	help
 	  Select this config option to add support for the initrd= command
 	  line parameter, allowing an initrd that resides on the same volume
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index e8da782..d6ca2da 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -36,6 +36,8 @@
 arm-obj-$(CONFIG_EFI)			:= efi-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
 obj-$(CONFIG_ARM64)			+= $(arm-obj-y)
+riscv-obj-$(CONFIG_EFI)			:= efi-init.o riscv-runtime.o
+obj-$(CONFIG_RISCV)			+= $(riscv-obj-y)
 obj-$(CONFIG_EFI_CAPSULE_LOADER)	+= capsule-loader.o
 obj-$(CONFIG_EFI_EARLYCON)		+= earlycon.o
 obj-$(CONFIG_UEFI_CPER_ARM)		+= cper-arm.o
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 039a9ac..8a94388 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -23,6 +23,8 @@
 cflags-$(CONFIG_ARM)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic \
 				   $(call cc-option,-mno-single-pic-base)
+cflags-$(CONFIG_RISCV)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
+				   -fpic
 
 cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt
 
@@ -64,6 +66,7 @@
 lib-$(CONFIG_ARM)		+= arm32-stub.o
 lib-$(CONFIG_ARM64)		+= arm64-stub.o
 lib-$(CONFIG_X86)		+= x86-stub.o
+lib-$(CONFIG_RISCV)		+= riscv-stub.o
 CFLAGS_arm32-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 # Even when -mbranch-protection=none is set, Clang will generate a
@@ -112,6 +115,13 @@
 				   --prefix-symbols=__efistub_
 STUBCOPY_RELOC-$(CONFIG_ARM64)	:= R_AARCH64_ABS
 
+# For RISC-V, we don't need anything special other than arm64. Keep all the
+# symbols in .init section and make sure that no absolute symbols references
+# doesn't exist.
+STUBCOPY_FLAGS-$(CONFIG_RISCV)	+= --prefix-alloc-sections=.init \
+				   --prefix-symbols=__efistub_
+STUBCOPY_RELOC-$(CONFIG_RISCV)	:= R_RISCV_HI20
+
 $(obj)/%.stub.o: $(obj)/%.o FORCE
 	$(call if_changed,stubcopy)
 
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index 311a168..914a343 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -17,7 +17,10 @@
 
 /*
  * This is the base address at which to start allocating virtual memory ranges
- * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use
+ * for UEFI Runtime Services.
+ *
+ * For ARM/ARM64:
+ * This is in the low TTBR0 range so that we can use
  * any allocation we choose, and eliminate the risk of a conflict after kexec.
  * The value chosen is the largest non-zero power of 2 suitable for this purpose
  * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can
@@ -25,6 +28,12 @@
  * Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split,
  * map everything below 1 GB. (512 MB is a reasonable upper bound for the
  * entire footprint of the UEFI runtime services memory regions)
+ *
+ * For RISC-V:
+ * There is no specific reason for which, this address (512MB) can't be used
+ * EFI runtime virtual address for RISC-V. It also helps to use EFI runtime
+ * services on both RV32/RV64. Keep the same runtime virtual address for RISC-V
+ * as well to minimize the code churn.
  */
 #define EFI_RT_VIRTUAL_BASE	SZ_512M
 #define EFI_RT_VIRTUAL_SIZE	SZ_512M
diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c
new file mode 100644
index 0000000..380e4e2
--- /dev/null
+++ b/drivers/firmware/efi/libstub/riscv-stub.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/efi.h>
+#include <linux/libfdt.h>
+
+#include <asm/efi.h>
+#include <asm/sections.h>
+
+#include "efistub.h"
+
+/*
+ * RISC-V requires the kernel image to placed 2 MB aligned base for 64 bit and
+ * 4MB for 32 bit.
+ */
+#ifdef CONFIG_64BIT
+#define MIN_KIMG_ALIGN		SZ_2M
+#else
+#define MIN_KIMG_ALIGN		SZ_4M
+#endif
+
+typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+
+static u32 hartid;
+
+static u32 get_boot_hartid_from_fdt(void)
+{
+	const void *fdt;
+	int chosen_node, len;
+	const fdt32_t *prop;
+
+	fdt = get_efi_config_table(DEVICE_TREE_GUID);
+	if (!fdt)
+		return U32_MAX;
+
+	chosen_node = fdt_path_offset(fdt, "/chosen");
+	if (chosen_node < 0)
+		return U32_MAX;
+
+	prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len);
+	if (!prop || len != sizeof(u32))
+		return U32_MAX;
+
+	return fdt32_to_cpu(*prop);
+}
+
+efi_status_t check_platform_features(void)
+{
+	hartid = get_boot_hartid_from_fdt();
+	if (hartid == U32_MAX) {
+		efi_err("/chosen/boot-hartid missing or invalid!\n");
+		return EFI_UNSUPPORTED;
+	}
+	return EFI_SUCCESS;
+}
+
+void __noreturn efi_enter_kernel(unsigned long entrypoint, unsigned long fdt,
+				 unsigned long fdt_size)
+{
+	unsigned long stext_offset = _start_kernel - _start;
+	unsigned long kernel_entry = entrypoint + stext_offset;
+	jump_kernel_func jump_kernel = (jump_kernel_func)kernel_entry;
+
+	/*
+	 * Jump to real kernel here with following constraints.
+	 * 1. MMU should be disabled.
+	 * 2. a0 should contain hartid
+	 * 3. a1 should DT address
+	 */
+	csr_write(CSR_SATP, 0);
+	jump_kernel(hartid, fdt);
+}
+
+efi_status_t handle_kernel_image(unsigned long *image_addr,
+				 unsigned long *image_size,
+				 unsigned long *reserve_addr,
+				 unsigned long *reserve_size,
+				 efi_loaded_image_t *image)
+{
+	unsigned long kernel_size = 0;
+	unsigned long preferred_addr;
+	efi_status_t status;
+
+	kernel_size = _edata - _start;
+	*image_addr = (unsigned long)_start;
+	*image_size = kernel_size + (_end - _edata);
+
+	/*
+	 * RISC-V kernel maps PAGE_OFFSET virtual address to the same physical
+	 * address where kernel is booted. That's why kernel should boot from
+	 * as low as possible to avoid wastage of memory. Currently, dram_base
+	 * is occupied by the firmware. So the preferred address for kernel to
+	 * boot is next aligned address. If preferred address is not available,
+	 * relocate_kernel will fall back to efi_low_alloc_above to allocate
+	 * lowest possible memory region as long as the address and size meets
+	 * the alignment constraints.
+	 */
+	preferred_addr = MIN_KIMG_ALIGN;
+	status = efi_relocate_kernel(image_addr, kernel_size, *image_size,
+				     preferred_addr, MIN_KIMG_ALIGN, 0x0);
+
+	if (status != EFI_SUCCESS) {
+		efi_err("Failed to relocate kernel\n");
+		*image_size = 0;
+	}
+	return status;
+}
diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c
new file mode 100644
index 0000000..d28e715
--- /dev/null
+++ b/drivers/firmware/efi/riscv-runtime.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Extensible Firmware Interface
+ *
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ *
+ * Based on Extensible Firmware Interface Specification version 2.4
+ * Adapted from drivers/firmware/efi/arm-runtime.c
+ *
+ */
+
+#include <linux/dmi.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/mm_types.h>
+#include <linux/preempt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pgtable.h>
+
+#include <asm/cacheflush.h>
+#include <asm/efi.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+
+static bool __init efi_virtmap_init(void)
+{
+	efi_memory_desc_t *md;
+
+	efi_mm.pgd = pgd_alloc(&efi_mm);
+	mm_init_cpumask(&efi_mm);
+	init_new_context(NULL, &efi_mm);
+
+	for_each_efi_memory_desc(md) {
+		phys_addr_t phys = md->phys_addr;
+		int ret;
+
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (md->virt_addr == 0)
+			return false;
+
+		ret = efi_create_mapping(&efi_mm, md);
+		if (ret) {
+			pr_warn("  EFI remap %pa: failed to create mapping (%d)\n",
+				&phys, ret);
+			return false;
+		}
+	}
+
+	if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
+		return false;
+
+	return true;
+}
+
+/*
+ * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
+ * non-early mapping of the UEFI system table and virtual mappings for all
+ * EFI_MEMORY_RUNTIME regions.
+ */
+static int __init riscv_enable_runtime_services(void)
+{
+	u64 mapsize;
+
+	if (!efi_enabled(EFI_BOOT)) {
+		pr_info("EFI services will not be available.\n");
+		return 0;
+	}
+
+	efi_memmap_unmap();
+
+	mapsize = efi.memmap.desc_size * efi.memmap.nr_map;
+
+	if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) {
+		pr_err("Failed to remap EFI memory map\n");
+		return 0;
+	}
+
+	if (efi_soft_reserve_enabled()) {
+		efi_memory_desc_t *md;
+
+		for_each_efi_memory_desc(md) {
+			int md_size = md->num_pages << EFI_PAGE_SHIFT;
+			struct resource *res;
+
+			if (!(md->attribute & EFI_MEMORY_SP))
+				continue;
+
+			res = kzalloc(sizeof(*res), GFP_KERNEL);
+			if (WARN_ON(!res))
+				break;
+
+			res->start	= md->phys_addr;
+			res->end	= md->phys_addr + md_size - 1;
+			res->name	= "Soft Reserved";
+			res->flags	= IORESOURCE_MEM;
+			res->desc	= IORES_DESC_SOFT_RESERVED;
+
+			insert_resource(&iomem_resource, res);
+		}
+	}
+
+	if (efi_runtime_disabled()) {
+		pr_info("EFI runtime services will be disabled.\n");
+		return 0;
+	}
+
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
+		pr_info("EFI runtime services access via paravirt.\n");
+		return 0;
+	}
+
+	pr_info("Remapping and enabling EFI services.\n");
+
+	if (!efi_virtmap_init()) {
+		pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
+		return -ENOMEM;
+	}
+
+	/* Set up runtime services function pointers */
+	efi_native_runtime_setup();
+	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+	return 0;
+}
+early_initcall(riscv_enable_runtime_services);
+
+void efi_virtmap_load(void)
+{
+	preempt_disable();
+	switch_mm(current->active_mm, &efi_mm, NULL);
+}
+
+void efi_virtmap_unload(void)
+{
+	switch_mm(&efi_mm, current->active_mm, NULL);
+	preempt_enable();
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 495c3d7..f3b7287 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -68,6 +68,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 
 	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
 	mutex_init(&adev->vcn.vcn_pg_lock);
+	mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
 	atomic_set(&adev->vcn.total_submission_cnt, 0);
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
 		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
@@ -237,6 +238,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 	}
 
 	release_firmware(adev->vcn.fw);
+	mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
 	mutex_destroy(&adev->vcn.vcn_pg_lock);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 7a9b804..1769115 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -220,6 +220,7 @@ struct amdgpu_vcn {
 	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
 	struct amdgpu_vcn_reg	 internal;
 	struct mutex		 vcn_pg_lock;
+	struct mutex		vcn1_jpeg1_workaround;
 	atomic_t		 total_submission_cnt;
 
 	unsigned	harvest_config;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index bc30028..c600b61 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -33,6 +33,7 @@
 
 static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
 
 static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
 {
@@ -564,8 +565,8 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
 	.insert_start = jpeg_v1_0_decode_ring_insert_start,
 	.insert_end = jpeg_v1_0_decode_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.begin_use = vcn_v1_0_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
+	.begin_use = jpeg_v1_0_ring_begin_use,
+	.end_use = vcn_v1_0_ring_end_use,
 	.emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
 	.emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -586,3 +587,22 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 	adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
 }
+
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct	amdgpu_device *adev = ring->adev;
+	bool	set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+	int		cnt = 0;
+
+	mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+	if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
+		DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
+
+	for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+		if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
+			DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
+	}
+
+	vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 73699ea..86e1ef7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -54,6 +54,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
 				int inst_idx, struct dpg_pause_state *new_state);
 
 static void vcn_v1_0_idle_work_handler(struct work_struct *work);
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -1804,10 +1805,23 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
 	}
 }
 
-void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct	amdgpu_device *adev = ring->adev;
+	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+	mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+	if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec))
+		DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
+
+	vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+
+}
+
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
 {
 	struct amdgpu_device *adev = ring->adev;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 
 	if (set_clocks) {
 		amdgpu_gfx_off_ctrl(adev, false);
@@ -1844,6 +1858,12 @@ void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
 	}
 }
 
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+	mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+}
+
 static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
 	.name = "vcn_v1_0",
 	.early_init = vcn_v1_0_early_init,
@@ -1891,7 +1911,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.insert_end = vcn_v1_0_dec_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = vcn_v1_0_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
+	.end_use = vcn_v1_0_ring_end_use,
 	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -1923,7 +1943,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.insert_end = vcn_v1_0_enc_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = vcn_v1_0_ring_begin_use,
-	.end_use = amdgpu_vcn_ring_end_use,
+	.end_use = vcn_v1_0_ring_end_use,
 	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
index f67d739..1f1cc7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -24,7 +24,8 @@
 #ifndef __VCN_V1_0_H__
 #define __VCN_V1_0_H__
 
-void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring);
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks);
 
 extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index d298152..5e2254b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1426,5 +1426,5 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
  */
 void kfd_destroy_crat_image(void *crat_image)
 {
-	kfree(crat_image);
+	kvfree(crat_image);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 9c1e003..34f6369 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -149,6 +149,8 @@ struct amdgpu_dm_backlight_caps {
  * @cached_state: Caches device atomic state for suspend/resume
  * @cached_dc_state: Cached state of content streams
  * @compressor: Frame buffer compression buffer. See &struct dm_comressor_info
+ * @force_timing_sync: set via debugfs. When set, indicates that all connected
+ *		       displays will be forced to synchronize.
  */
 struct amdgpu_display_manager {
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index db741e4..eee19ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -647,7 +647,7 @@ static void try_disable_dsc(struct drm_atomic_state *state,
 	for (i = 0; i < count; i++) {
 		if (vars[i].dsc_enabled
 				&& vars[i].bpp_x16 == params[i].bw_range.max_target_bpp_x16
-				&& !params[i].clock_force_enable == DSC_CLK_FORCE_DEFAULT) {
+				&& params[i].clock_force_enable == DSC_CLK_FORCE_DEFAULT) {
 			kbps_increase[i] = params[i].bw_range.stream_kbps - params[i].bw_range.max_kbps;
 			tried[i] = false;
 			remaining_to_try += 1;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 2a725a5..1eb29c3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -848,7 +848,7 @@ static void disable_vbios_mode_if_required(
 		struct dc *dc,
 		struct dc_state *context)
 {
-	unsigned int i;
+	unsigned int i, j;
 
 	/* check if timing_changed, disable stream*/
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -872,10 +872,10 @@ static void disable_vbios_mode_if_required(
 
 			enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
 			if (enc_inst != ENGINE_ID_UNKNOWN) {
-				for (i = 0; i < dc->res_pool->stream_enc_count; i++) {
-					if (dc->res_pool->stream_enc[i]->id == enc_inst) {
-						tg_inst = dc->res_pool->stream_enc[i]->funcs->dig_source_otg(
-							dc->res_pool->stream_enc[i]);
+				for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
+					if (dc->res_pool->stream_enc[j]->id == enc_inst) {
+						tg_inst = dc->res_pool->stream_enc[j]->funcs->dig_source_otg(
+							dc->res_pool->stream_enc[j]);
 						break;
 					}
 				}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 3bf8be4..1e8919b 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -2883,7 +2883,7 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
 		if (hwmgr->is_kicker)
 			switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
 		else
-			switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+			switch_limit_us = data->is_memory_gddr5 ? 200 : 150;
 		break;
 	case CHIP_VEGAM:
 		switch_limit_us = 30;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 9427939..fc4f95f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -417,6 +417,9 @@ static int smu_early_init(void *handle)
 	smu->pm_enabled = !!amdgpu_dpm;
 	smu->is_apu = false;
 	mutex_init(&smu->mutex);
+	mutex_init(&smu->smu_baco.mutex);
+	smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+	smu->smu_baco.platform_support = false;
 
 	return smu_set_funcs(adev);
 }
@@ -795,10 +798,6 @@ static int smu_sw_init(void *handle)
 	bitmap_zero(smu->smu_feature.enabled, SMU_FEATURE_MAX);
 	bitmap_zero(smu->smu_feature.allowed, SMU_FEATURE_MAX);
 
-	mutex_init(&smu->smu_baco.mutex);
-	smu->smu_baco.state = SMU_BACO_STATE_EXIT;
-	smu->smu_baco.platform_support = false;
-
 	mutex_init(&smu->sensor_lock);
 	mutex_init(&smu->metrics_lock);
 	mutex_init(&smu->message_lock);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 11fe9ff7..d6808f6 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -806,30 +806,27 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 				       struct page **pages, unsigned int nr_pages)
 {
-	struct sg_table *sg = NULL;
+	struct sg_table *sg;
+	struct scatterlist *sge;
 	size_t max_segment = 0;
-	int ret;
 
 	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
-	if (!sg) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
 
 	if (dev)
 		max_segment = dma_max_mapping_size(dev->dev);
 	if (max_segment == 0 || max_segment > SCATTERLIST_MAX_SEGMENT)
 		max_segment = SCATTERLIST_MAX_SEGMENT;
-	ret = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
+	sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
 					  nr_pages << PAGE_SHIFT,
-					  max_segment, GFP_KERNEL);
-	if (ret)
-		goto out;
-
+					  max_segment,
+					  NULL, 0, GFP_KERNEL);
+	if (IS_ERR(sge)) {
+		kfree(sg);
+		sg = ERR_CAST(sge);
+	}
 	return sg;
-out:
-	kfree(sg);
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(drm_prime_pages_to_sg);
 
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 9afa5c4..1e1cb24 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -25,6 +25,7 @@
 	select CRC32
 	select SND_HDA_I915 if SND_HDA_CORE
 	select CEC_CORE if CEC_NOTIFIER
+	select VMAP_PFN
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 4d06178..cdcb7b1 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2742,7 +2742,7 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
 	u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
 	int rate = 0;
 
-	if (type == INTEL_OUTPUT_HDMI) {
+	if (type != INTEL_OUTPUT_HDMI) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
 		rate = intel_dp->link_rate;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 631b433..a1fba7e 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -4093,8 +4093,7 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state)
 int skl_check_plane_surface(struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	int ret;
-	bool needs_aux = false;
+	int ret, i;
 
 	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
@@ -4108,7 +4107,6 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	 * it.
 	 */
 	if (is_ccs_modifier(fb->modifier)) {
-		needs_aux = true;
 		ret = skl_check_ccs_aux_surface(plane_state);
 		if (ret)
 			return ret;
@@ -4116,20 +4114,15 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 
 	if (intel_format_info_is_yuv_semiplanar(fb->format,
 						fb->modifier)) {
-		needs_aux = true;
 		ret = skl_check_nv12_aux_surface(plane_state);
 		if (ret)
 			return ret;
 	}
 
-	if (!needs_aux) {
-		int i;
-
-		for (i = 1; i < fb->format->num_planes; i++) {
-			plane_state->color_plane[i].offset = ~0xfff;
-			plane_state->color_plane[i].x = 0;
-			plane_state->color_plane[i].y = 0;
-		}
+	for (i = fb->format->num_planes; i < ARRAY_SIZE(plane_state->color_plane); i++) {
+		plane_state->color_plane[i].offset = ~0xfff;
+		plane_state->color_plane[i].x = 0;
+		plane_state->color_plane[i].y = 0;
 	}
 
 	ret = skl_check_main_surface(plane_state);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index d6eeefa..f60ca6d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -162,8 +162,6 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
 {
 	if (is_vmalloc_addr(ptr))
 		vunmap(ptr);
-	else
-		kunmap(kmap_to_page(ptr));
 }
 
 struct sg_table *
@@ -234,34 +232,21 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static inline pte_t iomap_pte(resource_size_t base,
-			      dma_addr_t offset,
-			      pgprot_t prot)
-{
-	return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot));
-}
-
 /* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
-				 enum i915_map_type type)
+static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
+		enum i915_map_type type)
 {
-	unsigned long n_pte = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->mm.pages;
-	pte_t *stack[32], **mem;
-	struct vm_struct *area;
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i;
+	struct page *stack[32], **pages = stack, *page;
+	struct sgt_iter iter;
 	pgprot_t pgprot;
+	void *vaddr;
 
-	if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC)
-		return NULL;
-
-	if (GEM_WARN_ON(type == I915_MAP_WC &&
-			!static_cpu_has(X86_FEATURE_PAT)))
-		return NULL;
-
-	/* A single page can always be kmapped */
-	if (n_pte == 1 && type == I915_MAP_WB) {
-		struct page *page = sg_page(sgt->sgl);
-
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		fallthrough;	/* to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
 		/*
 		 * On 32b, highmem using a finite set of indirect PTE (i.e.
 		 * vmap) to provide virtual mappings of the high pages.
@@ -277,33 +262,10 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 		 * forever.
 		 *
 		 * So if the page is beyond the 32b boundary, make an explicit
-		 * vmap. On 64b, this check will be optimised away as we can
-		 * directly kmap any page on the system.
+		 * vmap.
 		 */
-		if (!PageHighMem(page))
-			return kmap(page);
-	}
-
-	mem = stack;
-	if (n_pte > ARRAY_SIZE(stack)) {
-		/* Too big for stack -- allocate temporary array instead */
-		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
-		if (!mem)
-			return NULL;
-	}
-
-	area = alloc_vm_area(obj->base.size, mem);
-	if (!area) {
-		if (mem != stack)
-			kvfree(mem);
-		return NULL;
-	}
-
-	switch (type) {
-	default:
-		MISSING_CASE(type);
-		fallthrough;	/* to use PAGE_KERNEL anyway */
-	case I915_MAP_WB:
+		if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl)))
+			return page_address(sg_page(obj->mm.pages->sgl));
 		pgprot = PAGE_KERNEL;
 		break;
 	case I915_MAP_WC:
@@ -311,30 +273,50 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj,
 		break;
 	}
 
-	if (i915_gem_object_has_struct_page(obj)) {
-		struct sgt_iter iter;
-		struct page *page;
-		pte_t **ptes = mem;
-
-		for_each_sgt_page(page, iter, sgt)
-			**ptes++ = mk_pte(page, pgprot);
-	} else {
-		resource_size_t iomap;
-		struct sgt_iter iter;
-		pte_t **ptes = mem;
-		dma_addr_t addr;
-
-		iomap = obj->mm.region->iomap.base;
-		iomap -= obj->mm.region->region.start;
-
-		for_each_sgt_daddr(addr, iter, sgt)
-			**ptes++ = iomap_pte(iomap, addr, pgprot);
+	if (n_pages > ARRAY_SIZE(stack)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
 	}
 
-	if (mem != stack)
-		kvfree(mem);
+	i = 0;
+	for_each_sgt_page(page, iter, obj->mm.pages)
+		pages[i++] = page;
+	vaddr = vmap(pages, n_pages, 0, pgprot);
+	if (pages != stack)
+		kvfree(pages);
+	return vaddr;
+}
 
-	return area->addr;
+static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
+		enum i915_map_type type)
+{
+	resource_size_t iomap = obj->mm.region->iomap.base -
+		obj->mm.region->region.start;
+	unsigned long n_pfn = obj->base.size >> PAGE_SHIFT;
+	unsigned long stack[32], *pfns = stack, i;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+	void *vaddr;
+
+	if (type != I915_MAP_WC)
+		return NULL;
+
+	if (n_pfn > ARRAY_SIZE(stack)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
+		if (!pfns)
+			return NULL;
+	}
+
+	i = 0;
+	for_each_sgt_daddr(addr, iter, obj->mm.pages)
+		pfns[i++] = (iomap + addr) >> PAGE_SHIFT;
+	vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
+	if (pfns != stack)
+		kvfree(pfns);
+	return vaddr;
 }
 
 /* get, pin, and map the pages of the object into kernel space */
@@ -386,7 +368,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 
 	if (!ptr) {
-		ptr = i915_gem_object_map(obj, type);
+		if (GEM_WARN_ON(type == I915_MAP_WC &&
+				!static_cpu_has(X86_FEATURE_PAT)))
+			ptr = NULL;
+		else if (i915_gem_object_has_struct_page(obj))
+			ptr = i915_gem_object_map_page(obj, type);
+		else
+			ptr = i915_gem_object_map_pfn(obj, type);
 		if (!ptr) {
 			err = -ENOMEM;
 			goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12b3007..f2eaed6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 	unsigned int max_segment = i915_sg_segment_size();
 	struct sg_table *st;
 	unsigned int sg_page_sizes;
+	struct scatterlist *sg;
 	int ret;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-ENOMEM);
 
 alloc_table:
-	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
-					  0, num_pages << PAGE_SHIFT,
-					  max_segment,
-					  GFP_KERNEL);
-	if (ret) {
+	sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
+					 num_pages << PAGE_SHIFT, max_segment,
+					 NULL, 0, GFP_KERNEL);
+	if (IS_ERR(sg)) {
 		kfree(st);
-		return ERR_PTR(ret);
+		return ERR_CAST(sg);
 	}
 
 	ret = i915_gem_gtt_prepare_pages(obj, st);
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 43c7acb..f011ea4 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -49,80 +49,40 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
 	return file;
 }
 
-static size_t shmem_npte(struct file *file)
-{
-	return file->f_mapping->host->i_size >> PAGE_SHIFT;
-}
-
-static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
-{
-	unsigned long pfn;
-
-	vunmap(ptr);
-
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
-
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (!WARN_ON(IS_ERR(page))) {
-			put_page(page);
-			put_page(page);
-		}
-	}
-}
-
 void *shmem_pin_map(struct file *file)
 {
-	const size_t n_pte = shmem_npte(file);
-	pte_t *stack[32], **ptes, **mem;
-	struct vm_struct *area;
-	unsigned long pfn;
+	struct page **pages;
+	size_t n_pages, i;
+	void *vaddr;
 
-	mem = stack;
-	if (n_pte > ARRAY_SIZE(stack)) {
-		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
-		if (!mem)
-			return NULL;
-	}
-
-	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
-	if (!area) {
-		if (mem != stack)
-			kvfree(mem);
+	n_pages = file->f_mapping->host->i_size >> PAGE_SHIFT;
+	pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
 		return NULL;
-	}
 
-	ptes = mem;
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
-
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (IS_ERR(page))
+	for (i = 0; i < n_pages; i++) {
+		pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
+						       GFP_KERNEL);
+		if (IS_ERR(pages[i]))
 			goto err_page;
-
-		**ptes++ = mk_pte(page,  PAGE_KERNEL);
 	}
 
-	if (mem != stack)
-		kvfree(mem);
-
+	vaddr = vmap(pages, n_pages, VM_MAP_PUT_PAGES, PAGE_KERNEL);
+	if (!vaddr)
+		goto err_page;
 	mapping_set_unevictable(file->f_mapping);
-	return area->addr;
-
+	return vaddr;
 err_page:
-	if (mem != stack)
-		kvfree(mem);
-
-	__shmem_unpin_map(file, area->addr, pfn);
+	while (--i >= 0)
+		put_page(pages[i]);
+	kvfree(pages);
 	return NULL;
 }
 
 void shmem_unpin_map(struct file *file, void *ptr)
 {
 	mapping_clear_unevictable(file->f_mapping);
-	__shmem_unpin_map(file, ptr, shmem_npte(file));
+	vfree(ptr);
 }
 
 static int __shmem_rw(struct file *file, loff_t off,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 7f03104..73116ec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -432,6 +432,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 	int ret = 0;
 	static size_t sgl_size;
 	static size_t sgt_size;
+	struct scatterlist *sg;
 
 	if (vmw_tt->mapped)
 		return 0;
@@ -454,13 +455,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 		if (unlikely(ret != 0))
 			return ret;
 
-		ret = __sg_alloc_table_from_pages
-			(&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
-			 (unsigned long) vsgt->num_pages << PAGE_SHIFT,
-			 dma_get_max_seg_size(dev_priv->dev->dev),
-			 GFP_KERNEL);
-		if (unlikely(ret != 0))
+		sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+				vsgt->num_pages, 0,
+				(unsigned long) vsgt->num_pages << PAGE_SHIFT,
+				dma_get_max_seg_size(dev_priv->dev->dev),
+				NULL, 0, GFP_KERNEL);
+		if (IS_ERR(sg)) {
+			ret = PTR_ERR(sg);
 			goto out_sg_alloc_fail;
+		}
 
 		if (vsgt->num_pages > vmw_tt->sgt.orig_nents) {
 			uint64_t over_alloc =
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 97f2e29..1c6b78a 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1373,7 +1373,9 @@ static int i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev,
 	enum i3c_addr_slot_status status;
 	int ret;
 
-	if (dev->info.dyn_addr != old_dyn_addr) {
+	if (dev->info.dyn_addr != old_dyn_addr &&
+	    (!dev->boardinfo ||
+	     dev->info.dyn_addr != dev->boardinfo->init_dyn_addr)) {
 		status = i3c_bus_get_addr_slot_status(&master->bus,
 						      dev->info.dyn_addr);
 		if (status != I3C_ADDR_SLOT_FREE)
@@ -1432,33 +1434,49 @@ static void i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev)
 		master->ops->detach_i2c_dev(dev);
 }
 
-static void i3c_master_pre_assign_dyn_addr(struct i3c_dev_desc *dev)
+static int i3c_master_early_i3c_dev_add(struct i3c_master_controller *master,
+					  struct i3c_dev_boardinfo *boardinfo)
 {
-	struct i3c_master_controller *master = i3c_dev_get_master(dev);
+	struct i3c_device_info info = {
+		.static_addr = boardinfo->static_addr,
+	};
+	struct i3c_dev_desc *i3cdev;
 	int ret;
 
-	if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr ||
-	    !dev->boardinfo->static_addr)
-		return;
+	i3cdev = i3c_master_alloc_i3c_dev(master, &info);
+	if (IS_ERR(i3cdev))
+		return -ENOMEM;
 
-	ret = i3c_master_setdasa_locked(master, dev->info.static_addr,
-					dev->boardinfo->init_dyn_addr);
+	i3cdev->boardinfo = boardinfo;
+
+	ret = i3c_master_attach_i3c_dev(master, i3cdev);
 	if (ret)
-		return;
+		goto err_free_dev;
 
-	dev->info.dyn_addr = dev->boardinfo->init_dyn_addr;
-	ret = i3c_master_reattach_i3c_dev(dev, 0);
+	ret = i3c_master_setdasa_locked(master, i3cdev->info.static_addr,
+					i3cdev->boardinfo->init_dyn_addr);
+	if (ret)
+		goto err_detach_dev;
+
+	i3cdev->info.dyn_addr = i3cdev->boardinfo->init_dyn_addr;
+	ret = i3c_master_reattach_i3c_dev(i3cdev, 0);
 	if (ret)
 		goto err_rstdaa;
 
-	ret = i3c_master_retrieve_dev_info(dev);
+	ret = i3c_master_retrieve_dev_info(i3cdev);
 	if (ret)
 		goto err_rstdaa;
 
-	return;
+	return 0;
 
 err_rstdaa:
-	i3c_master_rstdaa_locked(master, dev->boardinfo->init_dyn_addr);
+	i3c_master_rstdaa_locked(master, i3cdev->boardinfo->init_dyn_addr);
+err_detach_dev:
+	i3c_master_detach_i3c_dev(i3cdev);
+err_free_dev:
+	i3c_master_free_i3c_dev(i3cdev);
+
+	return ret;
 }
 
 static void
@@ -1625,8 +1643,8 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master)
  * This function is following all initialisation steps described in the I3C
  * specification:
  *
- * 1. Attach I2C and statically defined I3C devs to the master so that the
- *    master can fill its internal device table appropriately
+ * 1. Attach I2C devs to the master so that the master can fill its internal
+ *    device table appropriately
  *
  * 2. Call &i3c_master_controller_ops->bus_init() method to initialize
  *    the master controller. That's usually where the bus mode is selected
@@ -1638,8 +1656,10 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master)
  *
  * 4. Disable all slave events.
  *
- * 5. Pre-assign dynamic addresses requested by the FW with SETDASA for I3C
- *    devices that have a static address
+ * 5. Reserve address slots for I3C devices with init_dyn_addr. And if devices
+ *    also have static_addr, try to pre-assign dynamic addresses requested by
+ *    the FW with SETDASA and attach corresponding statically defined I3C
+ *    devices to the master.
  *
  * 6. Do a DAA (Dynamic Address Assignment) to assign dynamic addresses to all
  *    remaining I3C devices
@@ -1653,7 +1673,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
 	enum i3c_addr_slot_status status;
 	struct i2c_dev_boardinfo *i2cboardinfo;
 	struct i3c_dev_boardinfo *i3cboardinfo;
-	struct i3c_dev_desc *i3cdev;
 	struct i2c_dev_desc *i2cdev;
 	int ret;
 
@@ -1685,34 +1704,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
 			goto err_detach_devs;
 		}
 	}
-	list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
-		struct i3c_device_info info = {
-			.static_addr = i3cboardinfo->static_addr,
-		};
-
-		if (i3cboardinfo->init_dyn_addr) {
-			status = i3c_bus_get_addr_slot_status(&master->bus,
-						i3cboardinfo->init_dyn_addr);
-			if (status != I3C_ADDR_SLOT_FREE) {
-				ret = -EBUSY;
-				goto err_detach_devs;
-			}
-		}
-
-		i3cdev = i3c_master_alloc_i3c_dev(master, &info);
-		if (IS_ERR(i3cdev)) {
-			ret = PTR_ERR(i3cdev);
-			goto err_detach_devs;
-		}
-
-		i3cdev->boardinfo = i3cboardinfo;
-
-		ret = i3c_master_attach_i3c_dev(master, i3cdev);
-		if (ret) {
-			i3c_master_free_i3c_dev(i3cdev);
-			goto err_detach_devs;
-		}
-	}
 
 	/*
 	 * Now execute the controller specific ->bus_init() routine, which
@@ -1749,11 +1740,43 @@ static int i3c_master_bus_init(struct i3c_master_controller *master)
 		goto err_bus_cleanup;
 
 	/*
-	 * Pre-assign dynamic address and retrieve device information if
-	 * needed.
+	 * Reserve init_dyn_addr first, and then try to pre-assign dynamic
+	 * address and retrieve device information if needed.
+	 * In case pre-assign dynamic address fails, setting dynamic address to
+	 * the requested init_dyn_addr is retried after DAA is done in
+	 * i3c_master_add_i3c_dev_locked().
 	 */
-	i3c_bus_for_each_i3cdev(&master->bus, i3cdev)
-		i3c_master_pre_assign_dyn_addr(i3cdev);
+	list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
+
+		/*
+		 * We don't reserve a dynamic address for devices that
+		 * don't explicitly request one.
+		 */
+		if (!i3cboardinfo->init_dyn_addr)
+			continue;
+
+		ret = i3c_bus_get_addr_slot_status(&master->bus,
+						   i3cboardinfo->init_dyn_addr);
+		if (ret != I3C_ADDR_SLOT_FREE) {
+			ret = -EBUSY;
+			goto err_rstdaa;
+		}
+
+		i3c_bus_set_addr_slot_status(&master->bus,
+					     i3cboardinfo->init_dyn_addr,
+					     I3C_ADDR_SLOT_I3C_DEV);
+
+		/*
+		 * Only try to create/attach devices that have a static
+		 * address. Other devices will be created/attached when
+		 * DAA happens, and the requested dynamic address will
+		 * be set using SETNEWDA once those devices become
+		 * addressable.
+		 */
+
+		if (i3cboardinfo->static_addr)
+			i3c_master_early_i3c_dev_add(master, i3cboardinfo);
+	}
 
 	ret = i3c_master_do_daa(master);
 	if (ret)
@@ -1782,6 +1805,21 @@ static void i3c_master_bus_cleanup(struct i3c_master_controller *master)
 	i3c_master_detach_free_devs(master);
 }
 
+static void i3c_master_attach_boardinfo(struct i3c_dev_desc *i3cdev)
+{
+	struct i3c_master_controller *master = i3cdev->common.master;
+	struct i3c_dev_boardinfo *i3cboardinfo;
+
+	list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) {
+		if (i3cdev->info.pid != i3cboardinfo->pid)
+			continue;
+
+		i3cdev->boardinfo = i3cboardinfo;
+		i3cdev->info.static_addr = i3cboardinfo->static_addr;
+		return;
+	}
+}
+
 static struct i3c_dev_desc *
 i3c_master_search_i3c_dev_duplicate(struct i3c_dev_desc *refdev)
 {
@@ -1837,10 +1875,10 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master,
 	if (ret)
 		goto err_detach_dev;
 
+	i3c_master_attach_boardinfo(newdev);
+
 	olddev = i3c_master_search_i3c_dev_duplicate(newdev);
 	if (olddev) {
-		newdev->boardinfo = olddev->boardinfo;
-		newdev->info.static_addr = olddev->info.static_addr;
 		newdev->dev = olddev->dev;
 		if (newdev->dev)
 			newdev->dev->desc = newdev;
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index 3fee8bd..3f22269 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -1635,8 +1635,10 @@ static int cdns_i3c_master_probe(struct platform_device *pdev)
 	master->ibi.slots = devm_kcalloc(&pdev->dev, master->ibi.num_slots,
 					 sizeof(*master->ibi.slots),
 					 GFP_KERNEL);
-	if (!master->ibi.slots)
+	if (!master->ibi.slots) {
+		ret = -ENOMEM;
 		goto err_disable_sysclk;
+	}
 
 	writel(IBIR_THR(1), master->regs + CMD_IBI_THR_CTRL);
 	writel(MST_INT_IBIR_THR, master->regs + MST_IER);
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 91b0233..32a5143 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -48,6 +48,7 @@
 	depends on INFINIBAND_USER_MEM
 	select MMU_NOTIFIER
 	select INTERVAL_TREE
+	select HMM_MIRROR
 	default y
 	help
 	  On demand paging support for the InfiniBand subsystem.
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 24cb71a..ccf2670 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -17,7 +17,7 @@
 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
 ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
 
-ib_cm-y :=			cm.o
+ib_cm-y :=			cm.o cm_trace.o
 
 iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o
 
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439..0abce00 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
 	req->callback = NULL;
 
 	spin_lock_bh(&lock);
+	/*
+	 * Although the work will normally have been canceled by the workqueue,
+	 * it can still be requeued as long as it is on the req_list.
+	 */
+	cancel_delayed_work(&req->work);
 	if (!list_empty(&req->list)) {
-		/*
-		 * Although the work will normally have been canceled by the
-		 * workqueue, it can still be requeued as long as it is on the
-		 * req_list.
-		 */
-		cancel_delayed_work(&req->work);
 		list_del_init(&req->list);
 		kfree(req);
 	}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5a76611..8017c40 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 }
 
 static const char * const gid_type_str[] = {
+	/* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+	 * user space compatibility reasons.
+	 */
 	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
+	[IB_GID_TYPE_ROCE]	= "IB/RoCE v1",
 	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
 };
 
@@ -1220,7 +1224,7 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
 const struct ib_gid_attr *
 rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
 {
-	const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+	const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
 	struct ib_gid_table *table;
 	unsigned long flags;
 
@@ -1244,6 +1248,67 @@ rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
 EXPORT_SYMBOL(rdma_get_gid_attr);
 
 /**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ * @num_entries: Updated to the number of entries that were successfully read.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+			     struct ib_uverbs_gid_entry *entries,
+			     size_t max_entries)
+{
+	const struct ib_gid_attr *gid_attr;
+	ssize_t num_entries = 0, ret;
+	struct ib_gid_table *table;
+	unsigned int port_num, i;
+	struct net_device *ndev;
+	unsigned long flags;
+
+	rdma_for_each_port(device, port_num) {
+		if (!rdma_ib_or_roce(device, port_num))
+			continue;
+
+		table = rdma_gid_table(device, port_num);
+		read_lock_irqsave(&table->rwlock, flags);
+		for (i = 0; i < table->sz; i++) {
+			if (!is_gid_entry_valid(table->data_vec[i]))
+				continue;
+			if (num_entries >= max_entries) {
+				ret = -EINVAL;
+				goto err;
+			}
+
+			gid_attr = &table->data_vec[i]->attr;
+
+			memcpy(&entries->gid, &gid_attr->gid,
+			       sizeof(gid_attr->gid));
+			entries->gid_index = gid_attr->index;
+			entries->port_num = gid_attr->port_num;
+			entries->gid_type = gid_attr->gid_type;
+			ndev = rcu_dereference_protected(
+				gid_attr->ndev,
+				lockdep_is_held(&table->rwlock));
+			if (ndev)
+				entries->netdev_ifindex = ndev->ifindex;
+
+			num_entries++;
+			entries++;
+		}
+		read_unlock_irqrestore(&table->rwlock, flags);
+	}
+
+	return num_entries;
+err:
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
  * rdma_put_gid_attr - Release reference to the GID attribute
  * @attr:		Pointer to the GID attribute whose reference
  *			needs to be released.
@@ -1299,7 +1364,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
 	struct ib_gid_table_entry *entry =
 			container_of(attr, struct ib_gid_table_entry, attr);
 	struct ib_device *device = entry->attr.device;
-	struct net_device *ndev = ERR_PTR(-ENODEV);
+	struct net_device *ndev = ERR_PTR(-EINVAL);
 	u8 port_num = entry->attr.port_num;
 	struct ib_gid_table *table;
 	unsigned long flags;
@@ -1311,8 +1376,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
 	valid = is_gid_entry_valid(table->data_vec[attr->index]);
 	if (valid) {
 		ndev = rcu_dereference(attr->ndev);
-		if (!ndev ||
-		    (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+		if (!ndev)
 			ndev = ERR_PTR(-ENODEV);
 	}
 	read_unlock_irqrestore(&table->rwlock, flags);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index fbc28f1..5740d1b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -27,6 +27,7 @@
 #include <rdma/ib_cm.h>
 #include "cm_msgs.h"
 #include "core_priv.h"
+#include "cm_trace.h"
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
@@ -201,7 +202,6 @@ static struct attribute *cm_counter_default_attrs[] = {
 struct cm_port {
 	struct cm_device *cm_dev;
 	struct ib_mad_agent *mad_agent;
-	struct kobject port_obj;
 	u8 port_num;
 	struct list_head cm_priv_prim_list;
 	struct list_head cm_priv_altr_list;
@@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 	cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
 	cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 
+	trace_icm_send_req(&cm_id_priv->id);
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
 	if (ret) {
@@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port,
 		IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
 	}
 
+	trace_icm_issue_rej(
+		IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+		IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
@@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work,
 	}
 	spin_unlock_irq(&cm_id_priv->lock);
 
+	trace_icm_send_dup_req(&cm_id_priv->id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
@@ -2124,8 +2129,7 @@ static int cm_req_handler(struct cm_work *work)
 
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
-		pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
-			 be32_to_cpu(cm_id_priv->id.local_id));
+		trace_icm_no_listener_err(&cm_id_priv->id);
 		cm_id_priv->id.state = IB_CM_IDLE;
 		ret = -EINVAL;
 		goto destroy;
@@ -2274,8 +2278,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id->state != IB_CM_REQ_RCVD &&
 	    cm_id->state != IB_CM_MRA_REQ_SENT) {
-		pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
-			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+		trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -2289,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
 	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
 
+	trace_icm_send_rep(cm_id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2348,8 +2352,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id->state != IB_CM_REP_RCVD &&
 	    cm_id->state != IB_CM_MRA_REP_SENT) {
-		pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
-			 be32_to_cpu(cm_id->local_id), cm_id->state);
+		trace_icm_send_cm_rtu_err(cm_id);
 		ret = -EINVAL;
 		goto error;
 	}
@@ -2361,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 		      private_data, private_data_len);
 
+	trace_icm_send_rtu(cm_id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2442,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
 		goto unlock;
 	spin_unlock_irq(&cm_id_priv->lock);
 
+	trace_icm_send_dup_rep(&cm_id_priv->id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
@@ -2465,7 +2470,7 @@ static int cm_rep_handler(struct cm_work *work)
 		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
 	if (!cm_id_priv) {
 		cm_dup_rep_handler(work);
-		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+		trace_icm_remote_no_priv_err(
 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 		return -EINVAL;
 	}
@@ -2479,11 +2484,10 @@ static int cm_rep_handler(struct cm_work *work)
 		break;
 	default:
 		ret = -EINVAL;
-		pr_debug(
-			"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
-			__func__, cm_id_priv->id.state,
+		trace_icm_rep_unknown_err(
 			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
-			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+			cm_id_priv->id.state);
 		spin_unlock_irq(&cm_id_priv->lock);
 		goto error;
 	}
@@ -2500,7 +2504,7 @@ static int cm_rep_handler(struct cm_work *work)
 		spin_unlock(&cm.lock);
 		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
-		pr_debug("%s: Failed to insert remote id %d\n", __func__,
+		trace_icm_insert_failed_err(
 			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 		goto error;
 	}
@@ -2517,9 +2521,8 @@ static int cm_rep_handler(struct cm_work *work)
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     NULL, 0);
 		ret = -EINVAL;
-		pr_debug(
-			"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
-			__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+		trace_icm_staleconn_err(
+			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
 			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
 
 		if (cur_cm_id_priv) {
@@ -2646,9 +2649,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
 		return -EINVAL;
 
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
-		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-			 be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_dreq_skipped(&cm_id_priv->id);
 		return -EINVAL;
 	}
 
@@ -2667,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
 	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
 
+	trace_icm_send_dreq(&cm_id_priv->id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_enter_timewait(cm_id_priv);
@@ -2722,10 +2724,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
 		return -EINVAL;
 
 	if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
-		pr_debug(
-			"%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
-			__func__, be32_to_cpu(cm_id_priv->id.local_id),
-			cm_id_priv->id.state);
+		trace_icm_send_drep_err(&cm_id_priv->id);
 		kfree(private_data);
 		return -EINVAL;
 	}
@@ -2740,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
 
+	trace_icm_send_drep(&cm_id_priv->id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_free_msg(msg);
@@ -2789,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port,
 	IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
 		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 
+	trace_icm_issue_drep(
+		IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+		IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
@@ -2810,9 +2813,8 @@ static int cm_dreq_handler(struct cm_work *work)
 		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
 				counter[CM_DREQ_COUNTER]);
 		cm_issue_drep(work->port, work->mad_recv_wc);
-		pr_debug(
-			"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
-			__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+		trace_icm_no_priv_err(
+			IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
 			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
 		return -EINVAL;
 	}
@@ -2858,9 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work)
 				counter[CM_DREQ_COUNTER]);
 		goto unlock;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_dreq_unknown_err(&cm_id_priv->id);
 		goto unlock;
 	}
 	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2945,12 +2945,11 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
 			      state);
 		break;
 	default:
-		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-			 be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_send_unknown_rej_err(&cm_id_priv->id);
 		return -EINVAL;
 	}
 
+	trace_icm_send_rej(&cm_id_priv->id, reason);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_free_msg(msg);
@@ -3060,9 +3059,7 @@ static int cm_rej_handler(struct cm_work *work)
 		}
 		fallthrough;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_rej_unknown_err(&cm_id_priv->id);
 		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 	}
@@ -3118,9 +3115,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 		}
 		fallthrough;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_send_mra_unknown_err(&cm_id_priv->id);
 		ret = -EINVAL;
 		goto error1;
 	}
@@ -3133,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      msg_response, service_timeout,
 			      private_data, private_data_len);
+		trace_icm_send_mra(cm_id);
 		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
@@ -3229,9 +3225,7 @@ static int cm_mra_handler(struct cm_work *work)
 				counter[CM_MRA_COUNTER]);
 		fallthrough;
 	default:
-		pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_mra_unknown_err(&cm_id_priv->id);
 		goto out;
 	}
 
@@ -3505,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	if (cm_id->state == IB_CM_IDLE)
+	if (cm_id->state == IB_CM_IDLE) {
+		trace_icm_send_sidr_req(&cm_id_priv->id);
 		ret = ib_post_send_mad(msg, NULL);
-	else
+	} else {
 		ret = -EINVAL;
+	}
 
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3670,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
 
 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
 			   param);
+	trace_icm_send_sidr_rep(&cm_id_priv->id);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_free_msg(msg);
@@ -3767,8 +3764,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
 		goto discard;
 
-	pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
-			     state, ib_wc_status_msg(wc_status));
+	trace_icm_mad_send_err(state, wc_status);
 	switch (state) {
 	case IB_CM_REQ_SENT:
 	case IB_CM_MRA_REQ_RCVD:
@@ -3891,7 +3887,7 @@ static void cm_work_handler(struct work_struct *_work)
 		ret = cm_timewait_handler(work);
 		break;
 	default:
-		pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+		trace_icm_handler_err(work->cm_event.event);
 		ret = -EINVAL;
 		break;
 	}
@@ -3927,8 +3923,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
 		ret = -EISCONN;
 		break;
 	default:
-		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
-			 be32_to_cpu(cm_id->local_id), cm_id->state);
+		trace_icm_establish_err(cm_id);
 		ret = -EINVAL;
 		break;
 	}
@@ -4125,9 +4120,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 		ret = 0;
 		break;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_qp_init_err(&cm_id_priv->id);
 		ret = -EINVAL;
 		break;
 	}
@@ -4175,9 +4168,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
 		ret = 0;
 		break;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_qp_rtr_err(&cm_id_priv->id);
 		ret = -EINVAL;
 		break;
 	}
@@ -4237,9 +4228,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
 		ret = 0;
 		break;
 	default:
-		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
-			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
-			 cm_id_priv->id.state);
+		trace_icm_qp_rts_err(&cm_id_priv->id);
 		ret = -EINVAL;
 		break;
 	}
@@ -4295,20 +4284,6 @@ static struct kobj_type cm_counter_obj_type = {
 	.default_attrs = cm_counter_default_attrs
 };
 
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
-	if (mode)
-		*mode = 0666;
-	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
-}
-
-struct class cm_class = {
-	.owner   = THIS_MODULE,
-	.name    = "infiniband_cm",
-	.devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
 static int cm_create_port_fs(struct cm_port *port)
 {
 	int i, ret;
@@ -4511,12 +4486,6 @@ static int __init ib_cm_init(void)
 	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
 	INIT_LIST_HEAD(&cm.timewait_list);
 
-	ret = class_register(&cm_class);
-	if (ret) {
-		ret = -ENOMEM;
-		goto error1;
-	}
-
 	cm.wq = alloc_workqueue("ib_cm", 0, 1);
 	if (!cm.wq) {
 		ret = -ENOMEM;
@@ -4531,8 +4500,6 @@ static int __init ib_cm_init(void)
 error3:
 	destroy_workqueue(cm.wq);
 error2:
-	class_unregister(&cm_class);
-error1:
 	return ret;
 }
 
@@ -4553,7 +4520,6 @@ static void __exit ib_cm_cleanup(void)
 		kfree(timewait_info);
 	}
 
-	class_unregister(&cm_class);
 	WARN_ON(!xa_empty(&cm.local_id_table));
 }
 
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 0000000..8f3482f
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 0000000..e9d2826
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/events/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST					\
+	ib_cm_state(IDLE)					\
+	ib_cm_state(LISTEN)					\
+	ib_cm_state(REQ_SENT)					\
+	ib_cm_state(REQ_RCVD)					\
+	ib_cm_state(MRA_REQ_SENT)				\
+	ib_cm_state(MRA_REQ_RCVD)				\
+	ib_cm_state(REP_SENT)					\
+	ib_cm_state(REP_RCVD)					\
+	ib_cm_state(MRA_REP_SENT)				\
+	ib_cm_state(MRA_REP_RCVD)				\
+	ib_cm_state(ESTABLISHED)				\
+	ib_cm_state(DREQ_SENT)					\
+	ib_cm_state(DREQ_RCVD)					\
+	ib_cm_state(TIMEWAIT)					\
+	ib_cm_state(SIDR_REQ_SENT)				\
+	ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef  ib_cm_state
+#undef  ib_cm_state_end
+#define ib_cm_state(x)		TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef  ib_cm_state
+#undef  ib_cm_state_end
+#define ib_cm_state(x)		{ IB_CM_##x, #x },
+#define ib_cm_state_end(x)	{ IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+		__print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST					\
+	ib_cm_lap_state(LAP_UNINIT)				\
+	ib_cm_lap_state(LAP_IDLE)				\
+	ib_cm_lap_state(LAP_SENT)				\
+	ib_cm_lap_state(LAP_RCVD)				\
+	ib_cm_lap_state(MRA_LAP_SENT)				\
+	ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef  ib_cm_lap_state
+#undef  ib_cm_lap_state_end
+#define ib_cm_lap_state(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef  ib_cm_lap_state
+#undef  ib_cm_lap_state_end
+#define ib_cm_lap_state(x)	{ IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x)	{ IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+		__print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST					\
+	ib_cm_rej_reason(REJ_NO_QP)				\
+	ib_cm_rej_reason(REJ_NO_EEC)				\
+	ib_cm_rej_reason(REJ_NO_RESOURCES)			\
+	ib_cm_rej_reason(REJ_TIMEOUT)				\
+	ib_cm_rej_reason(REJ_UNSUPPORTED)			\
+	ib_cm_rej_reason(REJ_INVALID_COMM_ID)			\
+	ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE)		\
+	ib_cm_rej_reason(REJ_INVALID_SERVICE_ID)		\
+	ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE)		\
+	ib_cm_rej_reason(REJ_STALE_CONN)			\
+	ib_cm_rej_reason(REJ_RDC_NOT_EXIST)			\
+	ib_cm_rej_reason(REJ_INVALID_GID)			\
+	ib_cm_rej_reason(REJ_INVALID_LID)			\
+	ib_cm_rej_reason(REJ_INVALID_SL)			\
+	ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS)		\
+	ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT)			\
+	ib_cm_rej_reason(REJ_INVALID_PACKET_RATE)		\
+	ib_cm_rej_reason(REJ_INVALID_ALT_GID)			\
+	ib_cm_rej_reason(REJ_INVALID_ALT_LID)			\
+	ib_cm_rej_reason(REJ_INVALID_ALT_SL)			\
+	ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS)		\
+	ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT)		\
+	ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE)		\
+	ib_cm_rej_reason(REJ_PORT_CM_REDIRECT)			\
+	ib_cm_rej_reason(REJ_PORT_REDIRECT)			\
+	ib_cm_rej_reason(REJ_INVALID_MTU)			\
+	ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES)	\
+	ib_cm_rej_reason(REJ_CONSUMER_DEFINED)			\
+	ib_cm_rej_reason(REJ_INVALID_RNR_RETRY)			\
+	ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID)		\
+	ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION)		\
+	ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL)		\
+	ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL)		\
+	ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef  ib_cm_rej_reason
+#undef  ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x)	{ IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x)	{ IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+		__print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+	TP_PROTO(
+		const struct ib_cm_id *cm_id
+	),
+
+	TP_ARGS(cm_id),
+
+	TP_STRUCT__entry(
+		__field(const void *, cm_id)	/* for eBPF scripts */
+		__field(unsigned int, local_id)
+		__field(unsigned int, remote_id)
+		__field(unsigned long, state)
+		__field(unsigned long, lap_state)
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = cm_id;
+		__entry->local_id = be32_to_cpu(cm_id->local_id);
+		__entry->remote_id = be32_to_cpu(cm_id->remote_id);
+		__entry->state = cm_id->state;
+		__entry->lap_state = cm_id->lap_state;
+	),
+
+	TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+		__entry->local_id, __entry->remote_id,
+		show_ib_cm_state(__entry->state),
+		show_ib_cm_lap_state(__entry->lap_state)
+	)
+);
+
+#define DEFINE_CM_SEND_EVENT(name)					\
+		DEFINE_EVENT(icm_id_class,				\
+				icm_send_##name,				\
+				TP_PROTO(				\
+					const struct ib_cm_id *cm_id	\
+				),					\
+				TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+	TP_PROTO(
+		const struct ib_cm_id *cm_id,
+		enum ib_cm_rej_reason reason
+	),
+
+	TP_ARGS(cm_id, reason),
+
+	TP_STRUCT__entry(
+		__field(const void *, cm_id)
+		__field(u32, local_id)
+		__field(u32, remote_id)
+		__field(unsigned long, state)
+		__field(unsigned long, reason)
+	),
+
+	TP_fast_assign(
+		__entry->cm_id = cm_id;
+		__entry->local_id = be32_to_cpu(cm_id->local_id);
+		__entry->remote_id = be32_to_cpu(cm_id->remote_id);
+		__entry->state = cm_id->state;
+		__entry->reason = reason;
+	),
+
+	TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+		__entry->local_id, __entry->remote_id,
+		show_ib_cm_state(__entry->state),
+		show_ib_cm_rej_reason(__entry->reason)
+	)
+);
+
+#define DEFINE_CM_ERR_EVENT(name)					\
+		DEFINE_EVENT(icm_id_class,				\
+				icm_##name##_err,			\
+				TP_PROTO(				\
+					const struct ib_cm_id *cm_id	\
+				),					\
+				TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(send_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class,						\
+	icm_dreq_skipped,						\
+	TP_PROTO(							\
+		const struct ib_cm_id *cm_id				\
+	),								\
+	TP_ARGS(cm_id)							\
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+	TP_PROTO(
+		unsigned int local_id,
+		unsigned int remote_id
+	),
+
+	TP_ARGS(local_id, remote_id),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, local_id)
+		__field(unsigned int, remote_id)
+	),
+
+	TP_fast_assign(
+		__entry->local_id = local_id;
+		__entry->remote_id = remote_id;
+	),
+
+	TP_printk("local_id=%u remote_id=%u",
+		__entry->local_id, __entry->remote_id
+	)
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name)					\
+		DEFINE_EVENT(icm_local_class,				\
+				icm_##name,				\
+				TP_PROTO(				\
+					unsigned int local_id,			\
+					unsigned int remote_id			\
+				),					\
+				TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+	TP_PROTO(
+		u32 remote_id
+	),
+
+	TP_ARGS(remote_id),
+
+	TP_STRUCT__entry(
+		__field(u32, remote_id)
+	),
+
+	TP_fast_assign(
+		__entry->remote_id = remote_id;
+	),
+
+	TP_printk("remote_id=%u",
+		__entry->remote_id
+	)
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name)					\
+		DEFINE_EVENT(icm_remote_class,				\
+				icm_##name,				\
+				TP_PROTO(				\
+					u32 remote_id			\
+				),					\
+				TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+	TP_PROTO(
+		__be32 local_id,
+		enum ib_cm_state state
+	),
+
+	TP_ARGS(local_id, state),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, local_id)
+		__field(unsigned long, state)
+	),
+
+	TP_fast_assign(
+		__entry->local_id = be32_to_cpu(local_id);
+		__entry->state = state;
+	),
+
+	TP_printk("local_id=%u state=%s",
+		__entry->local_id, show_ib_cm_state(__entry->state)
+	)
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+	TP_PROTO(
+		unsigned int local_id,
+		unsigned int remote_id,
+		enum ib_cm_state state
+	),
+
+	TP_ARGS(local_id, remote_id, state),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, local_id)
+		__field(unsigned int, remote_id)
+		__field(unsigned long, state)
+	),
+
+	TP_fast_assign(
+		__entry->local_id = local_id;
+		__entry->remote_id = remote_id;
+		__entry->state = state;
+	),
+
+	TP_printk("local_id=%u remote_id=%u state=%s",
+		__entry->local_id, __entry->remote_id,
+		show_ib_cm_state(__entry->state)
+	)
+);
+
+TRACE_EVENT(icm_handler_err,
+	TP_PROTO(
+		enum ib_cm_event_type event
+	),
+
+	TP_ARGS(event),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, event)
+	),
+
+	TP_fast_assign(
+		__entry->event = event;
+	),
+
+	TP_printk("unhandled event=%s",
+		rdma_show_ib_cm_event(__entry->event)
+	)
+);
+
+TRACE_EVENT(icm_mad_send_err,
+	TP_PROTO(
+		enum ib_cm_state state,
+		enum ib_wc_status wc_status
+	),
+
+	TP_ARGS(state, wc_status),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, state)
+		__field(unsigned long, wc_status)
+	),
+
+	TP_fast_assign(
+		__entry->state = state;
+		__entry->wc_status = wc_status;
+	),
+
+	TP_printk("state=%s completion status=%s",
+		show_ib_cm_state(__entry->state),
+		rdma_show_wc_status(__entry->wc_status)
+	)
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5888311..7c2ab1f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -68,6 +68,9 @@ static const char * const cma_events[] = {
 	[RDMA_CM_EVENT_TIMEWAIT_EXIT]	 = "timewait exit",
 };
 
+static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
+			 union ib_gid *mgid);
+
 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
 {
 	size_t index = event;
@@ -301,6 +304,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
 	if (!rdma_is_port_valid(cma_dev->device, port))
 		return -EINVAL;
 
+	if (default_gid_type == IB_GID_TYPE_IB &&
+	    rdma_protocol_roce_eth_encap(cma_dev->device, port))
+		default_gid_type = IB_GID_TYPE_ROCE;
+
 	supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
 
 	if (!(supported_gids & 1 << default_gid_type))
@@ -345,13 +352,10 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
 
 struct cma_multicast {
 	struct rdma_id_private *id_priv;
-	union {
-		struct ib_sa_multicast *ib;
-	} multicast;
+	struct ib_sa_multicast *sa_mc;
 	struct list_head	list;
 	void			*context;
 	struct sockaddr_storage	addr;
-	struct kref		mcref;
 	u8			join_state;
 };
 
@@ -363,18 +367,6 @@ struct cma_work {
 	struct rdma_cm_event	event;
 };
 
-struct cma_ndev_work {
-	struct work_struct	work;
-	struct rdma_id_private	*id;
-	struct rdma_cm_event	event;
-};
-
-struct iboe_mcast_work {
-	struct work_struct	 work;
-	struct rdma_id_private	*id;
-	struct cma_multicast	*mc;
-};
-
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
@@ -404,23 +396,21 @@ struct cma_req_info {
 	u16 pkey;
 };
 
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&id_priv->lock, flags);
-	ret = (id_priv->state == comp);
-	spin_unlock_irqrestore(&id_priv->lock, flags);
-	return ret;
-}
-
 static int cma_comp_exch(struct rdma_id_private *id_priv,
 			 enum rdma_cm_state comp, enum rdma_cm_state exch)
 {
 	unsigned long flags;
 	int ret;
 
+	/*
+	 * The FSM uses a funny double locking where state is protected by both
+	 * the handler_mutex and the spinlock. State is not allowed to change
+	 * away from a handler_mutex protected value without also holding
+	 * handler_mutex.
+	 */
+	if (comp == RDMA_CM_CONNECT)
+		lockdep_assert_held(&id_priv->handler_mutex);
+
 	spin_lock_irqsave(&id_priv->lock, flags);
 	if ((ret = (id_priv->state == comp)))
 		id_priv->state = exch;
@@ -467,10 +457,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 	id_priv->id.route.addr.dev_addr.transport =
 		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
-	if (id_priv->res.kern_name)
-		rdma_restrack_kadd(&id_priv->res);
-	else
-		rdma_restrack_uadd(&id_priv->res);
+	rdma_restrack_add(&id_priv->res);
+
 	trace_cm_id_attach(id_priv, cma_dev->device);
 }
 
@@ -483,14 +471,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 					  rdma_start_port(cma_dev->device)];
 }
 
-static inline void release_mc(struct kref *kref)
-{
-	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
-	kfree(mc->multicast.ib);
-	kfree(mc);
-}
-
 static void cma_release_dev(struct rdma_id_private *id_priv)
 {
 	mutex_lock(&lock);
@@ -844,10 +824,10 @@ static void cma_id_put(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-struct rdma_cm_id *__rdma_create_id(struct net *net,
-				    rdma_cm_event_handler event_handler,
-				    void *context, enum rdma_ucm_port_space ps,
-				    enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+		 void *context, enum rdma_ucm_port_space ps,
+		 enum ib_qp_type qp_type, const struct rdma_id_private *parent)
 {
 	struct rdma_id_private *id_priv;
 
@@ -855,8 +835,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	rdma_restrack_set_task(&id_priv->res, caller);
-	id_priv->res.type = RDMA_RESTRACK_CM_ID;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
@@ -876,9 +854,45 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 	id_priv->id.route.addr.dev_addr.net = get_net(net);
 	id_priv->seq_num &= 0x00ffffff;
 
-	return &id_priv->id;
+	rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+	if (parent)
+		rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+	return id_priv;
 }
-EXPORT_SYMBOL(__rdma_create_id);
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+			void *context, enum rdma_ucm_port_space ps,
+			enum ib_qp_type qp_type, const char *caller)
+{
+	struct rdma_id_private *ret;
+
+	ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+	if (IS_ERR(ret))
+		return ERR_CAST(ret);
+
+	rdma_restrack_set_name(&ret->res, caller);
+	return &ret->id;
+}
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+				       void *context,
+				       enum rdma_ucm_port_space ps,
+				       enum ib_qp_type qp_type)
+{
+	struct rdma_id_private *ret;
+
+	ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+			       ps, qp_type, NULL);
+	if (IS_ERR(ret))
+		return ERR_CAST(ret);
+
+	rdma_restrack_set_name(&ret->res, NULL);
+	return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
@@ -1783,19 +1797,30 @@ static void cma_release_port(struct rdma_id_private *id_priv)
 	mutex_unlock(&lock);
 }
 
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
-				    struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+		       struct cma_multicast *mc)
 {
-	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-	struct net_device *ndev = NULL;
+	if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+		ib_sa_free_multicast(mc->sa_mc);
 
-	if (dev_addr->bound_dev_if)
-		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-	if (ndev) {
-		cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
-		dev_put(ndev);
+	if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+		struct rdma_dev_addr *dev_addr =
+			&id_priv->id.route.addr.dev_addr;
+		struct net_device *ndev = NULL;
+
+		if (dev_addr->bound_dev_if)
+			ndev = dev_get_by_index(dev_addr->net,
+						dev_addr->bound_dev_if);
+		if (ndev) {
+			union ib_gid mgid;
+
+			cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
+				     &mgid);
+			cma_igmp_send(ndev, &mgid, false);
+			dev_put(ndev);
+		}
 	}
-	kref_put(&mc->mcref, release_mc);
+	kfree(mc);
 }
 
 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1803,16 +1828,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 	struct cma_multicast *mc;
 
 	while (!list_empty(&id_priv->mc_list)) {
-		mc = container_of(id_priv->mc_list.next,
-				  struct cma_multicast, list);
+		mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+				      list);
 		list_del(&mc->list);
-		if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
-				      id_priv->id.port_num)) {
-			ib_sa_free_multicast(mc->multicast.ib);
-			kfree(mc);
-		} else {
-			cma_leave_roce_mc_group(id_priv, mc);
-		}
+		destroy_mc(id_priv, mc);
 	}
 }
 
@@ -1821,7 +1840,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 {
 	cma_cancel_operation(id_priv, state);
 
-	rdma_restrack_del(&id_priv->res);
 	if (id_priv->cma_dev) {
 		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
 			if (id_priv->cm_id.ib)
@@ -1847,6 +1865,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 		rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
 
 	put_net(id_priv->id.route.addr.dev_addr.net);
+	rdma_restrack_del(&id_priv->res);
 	kfree(id_priv);
 }
 
@@ -1949,13 +1968,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
 {
 	struct rdma_id_private *id_priv = cm_id->context;
 	struct rdma_cm_event event = {};
+	enum rdma_cm_state state;
 	int ret;
 
 	mutex_lock(&id_priv->handler_mutex);
+	state = READ_ONCE(id_priv->state);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-	     id_priv->state != RDMA_CM_CONNECT) ||
+	     state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-	     id_priv->state != RDMA_CM_DISCONNECT))
+	     state != RDMA_CM_DISCONNECT))
 		goto out;
 
 	switch (ib_event->event) {
@@ -1965,7 +1986,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
 		event.status = -ETIMEDOUT;
 		break;
 	case IB_CM_REP_RECEIVED:
-		if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+		if (state == RDMA_CM_CONNECT &&
 		    (id_priv->id.qp_type != IB_QPT_UD)) {
 			trace_cm_send_mra(id_priv);
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
@@ -2043,14 +2064,15 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
 	int ret;
 
 	listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
-	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
-			    listen_id->event_handler, listen_id->context,
-			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
-			    listen_id_priv->res.kern_name);
-	if (IS_ERR(id))
+	id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+				   listen_id->event_handler, listen_id->context,
+				   listen_id->ps,
+				   ib_event->param.req_rcvd.qp_type,
+				   listen_id_priv);
+	if (IS_ERR(id_priv))
 		return NULL;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
+	id = &id_priv->id;
 	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
 			      (struct sockaddr *)&id->route.addr.dst_addr,
 			      listen_id, ib_event, ss_family, service_id))
@@ -2104,13 +2126,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
 	int ret;
 
 	listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
-	id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
-			      listen_id->ps, IB_QPT_UD,
-			      listen_id_priv->res.kern_name);
-	if (IS_ERR(id))
+	id_priv = __rdma_create_id(net, listen_id->event_handler,
+				   listen_id->context, listen_id->ps, IB_QPT_UD,
+				   listen_id_priv);
+	if (IS_ERR(id_priv))
 		return NULL;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
+	id = &id_priv->id;
 	if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
 			      (struct sockaddr *)&id->route.addr.dst_addr,
 			      listen_id, ib_event, ss_family,
@@ -2184,7 +2206,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 	}
 
 	mutex_lock(&listen_id->handler_mutex);
-	if (listen_id->state != RDMA_CM_LISTEN) {
+	if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
 		goto err_unlock;
 	}
@@ -2226,8 +2248,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
 		goto net_dev_put;
 	}
 
-	if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
-	    (conn_id->id.qp_type != IB_QPT_UD)) {
+	if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+	    conn_id->id.qp_type != IB_QPT_UD) {
 		trace_cm_send_mra(cm_id->context);
 		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 	}
@@ -2288,7 +2310,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != RDMA_CM_CONNECT)
+	if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
 		goto out;
 
 	switch (iw_event->event) {
@@ -2346,7 +2368,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 			       struct iw_cm_event *iw_event)
 {
-	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event = {};
 	int ret = -ECONNABORTED;
@@ -2362,20 +2383,18 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	listen_id = cm_id->context;
 
 	mutex_lock(&listen_id->handler_mutex);
-	if (listen_id->state != RDMA_CM_LISTEN)
+	if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
 		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
-	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
-				     listen_id->id.event_handler,
-				     listen_id->id.context,
-				     RDMA_PS_TCP, IB_QPT_RC,
-				     listen_id->res.kern_name);
-	if (IS_ERR(new_cm_id)) {
+	conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+				   listen_id->id.event_handler,
+				   listen_id->id.context, RDMA_PS_TCP,
+				   IB_QPT_RC, listen_id);
+	if (IS_ERR(conn_id)) {
 		ret = -ENOMEM;
 		goto out;
 	}
-	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	conn_id->state = RDMA_CM_CONNECT;
 
@@ -2480,7 +2499,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
 	struct rdma_id_private *dev_id_priv;
-	struct rdma_cm_id *id;
 	struct net *net = id_priv->id.route.addr.dev_addr.net;
 	int ret;
 
@@ -2489,13 +2507,12 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
 		return;
 
-	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
-			      id_priv->id.qp_type, id_priv->res.kern_name);
-	if (IS_ERR(id))
+	dev_id_priv =
+		__rdma_create_id(net, cma_listen_handler, id_priv,
+				 id_priv->id.ps, id_priv->id.qp_type, id_priv);
+	if (IS_ERR(dev_id_priv))
 		return;
 
-	dev_id_priv = container_of(id, struct rdma_id_private, id);
-
 	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
 	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
 	       rdma_addr_size(cma_src_addr(id_priv)));
@@ -2508,7 +2525,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	dev_id_priv->tos_set = id_priv->tos_set;
 	dev_id_priv->tos = id_priv->tos;
 
-	ret = rdma_listen(id, id_priv->backlog);
+	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
 	if (ret)
 		dev_warn(&cma_dev->device->dev,
 			 "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
@@ -2647,8 +2664,13 @@ static void cma_work_handler(struct work_struct *_work)
 	struct rdma_id_private *id_priv = work->id;
 
 	mutex_lock(&id_priv->handler_mutex);
-	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+	if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+	    READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
 		goto out_unlock;
+	if (work->old_state != 0 || work->new_state != 0) {
+		if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+			goto out_unlock;
+	}
 
 	if (cma_cm_event_handler(id_priv, &work->event)) {
 		cma_id_put(id_priv);
@@ -2660,29 +2682,8 @@ static void cma_work_handler(struct work_struct *_work)
 	mutex_unlock(&id_priv->handler_mutex);
 	cma_id_put(id_priv);
 out_free:
-	kfree(work);
-}
-
-static void cma_ndev_work_handler(struct work_struct *_work)
-{
-	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
-	struct rdma_id_private *id_priv = work->id;
-
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state == RDMA_CM_DESTROYING ||
-	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
-		goto out_unlock;
-
-	if (cma_cm_event_handler(id_priv, &work->event)) {
-		cma_id_put(id_priv);
-		destroy_id_handler_unlock(id_priv);
-		goto out_free;
-	}
-
-out_unlock:
-	mutex_unlock(&id_priv->handler_mutex);
-	cma_id_put(id_priv);
-out_free:
+	if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+		rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
 	kfree(work);
 }
 
@@ -3240,32 +3241,54 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 	return rdma_bind_addr(id, src_addr);
 }
 
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      const struct sockaddr *dst_addr, unsigned long timeout_ms)
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+			       struct sockaddr *src_addr,
+			       const struct sockaddr *dst_addr)
 {
-	struct rdma_id_private *id_priv;
 	int ret;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
 	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
-	if (id_priv->state == RDMA_CM_IDLE) {
-		ret = cma_bind_addr(id, src_addr, dst_addr);
-		if (ret) {
-			memset(cma_dst_addr(id_priv), 0,
-			       rdma_addr_size(dst_addr));
-			return ret;
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+		/* For a well behaved ULP state will be RDMA_CM_IDLE */
+		ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+		if (ret)
+			goto err_dst;
+		if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+					   RDMA_CM_ADDR_QUERY))) {
+			ret = -EINVAL;
+			goto err_dst;
 		}
 	}
 
 	if (cma_family(id_priv) != dst_addr->sa_family) {
-		memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_state;
 	}
+	return 0;
 
-	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
-		memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
-		return -EINVAL;
-	}
+err_state:
+	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+err_dst:
+	memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+	return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
+	int ret;
+
+	ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+	if (ret)
+		return ret;
 
 	if (cma_any_addr(dst_addr)) {
 		ret = cma_resolve_loopback(id_priv);
@@ -3297,7 +3320,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irqsave(&id_priv->lock, flags);
-	if (reuse || id_priv->state == RDMA_CM_IDLE) {
+	if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+	    id_priv->state == RDMA_CM_IDLE) {
 		id_priv->reuseaddr = reuse;
 		ret = 0;
 	} else {
@@ -3491,8 +3515,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
 		if (id_priv == cur_id)
 			continue;
 
-		if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
-		    cur_id->reuseaddr)
+		if (reuseaddr && cur_id->reuseaddr)
 			continue;
 
 		cur_addr = cma_src_addr(cur_id);
@@ -3533,18 +3556,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
 	return ret;
 }
 
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
-	struct rdma_bind_list *bind_list = id_priv->bind_list;
-	int ret = 0;
-
-	mutex_lock(&lock);
-	if (bind_list->owners.first->next)
-		ret = cma_check_port(bind_list, id_priv, 0);
-	mutex_unlock(&lock);
-	return ret;
-}
-
 static enum rdma_ucm_port_space
 cma_select_inet_ps(struct rdma_id_private *id_priv)
 {
@@ -3638,22 +3649,31 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 
 int rdma_listen(struct rdma_cm_id *id, int backlog)
 {
-	struct rdma_id_private *id_priv;
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
 	int ret;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (id_priv->state == RDMA_CM_IDLE) {
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+		/* For a well behaved ULP state will be RDMA_CM_IDLE */
 		id->route.addr.src_addr.ss_family = AF_INET;
 		ret = rdma_bind_addr(id, cma_src_addr(id_priv));
 		if (ret)
 			return ret;
+		if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+					   RDMA_CM_LISTEN)))
+			return -EINVAL;
 	}
 
-	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
-		return -EINVAL;
-
+	/*
+	 * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+	 * any more, and has to be unique in the bind list.
+	 */
 	if (id_priv->reuseaddr) {
-		ret = cma_bind_listen(id_priv);
+		mutex_lock(&lock);
+		ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+		if (!ret)
+			id_priv->reuseaddr = 0;
+		mutex_unlock(&lock);
 		if (ret)
 			goto err;
 	}
@@ -3678,6 +3698,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	return 0;
 err:
 	id_priv->backlog = 0;
+	/*
+	 * All the failure paths that lead here will not allow the req_handler's
+	 * to have run.
+	 */
 	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
 	return ret;
 }
@@ -3732,7 +3756,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	rdma_restrack_del(&id_priv->res);
 	if (id_priv->cma_dev)
 		cma_release_dev(id_priv);
 err1:
@@ -3781,7 +3804,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	int ret;
 
 	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != RDMA_CM_CONNECT)
+	if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
 		goto out;
 
 	switch (ib_event->event) {
@@ -4017,12 +4040,15 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
-	struct rdma_id_private *id_priv;
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
 	int ret;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
-		return -EINVAL;
+	mutex_lock(&id_priv->handler_mutex);
+	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
 
 	if (!id->qp) {
 		id_priv->qp_num = conn_param->qp_num;
@@ -4039,11 +4065,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	else
 		ret = -ENOSYS;
 	if (ret)
-		goto err;
-
+		goto err_state;
+	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
-err:
+err_state:
 	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
+err_unlock:
+	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_connect);
@@ -4155,17 +4183,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-		  const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection.  This must be
+ *   provided if accepting a connection request.  If accepting a connection
+ *   response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request.  It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
-	struct rdma_id_private *id_priv;
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
 	int ret;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
+	lockdep_assert_held(&id_priv->handler_mutex);
 
-	rdma_restrack_set_task(&id_priv->res, caller);
-
-	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+	if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
 		return -EINVAL;
 
 	if (!id->qp && conn_param) {
@@ -4203,10 +4247,10 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 	rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
 	return ret;
 }
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
 
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
-		      const char *caller, struct rdma_ucm_ece *ece)
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		    struct rdma_ucm_ece *ece)
 {
 	struct rdma_id_private *id_priv =
 		container_of(id, struct rdma_id_private, id);
@@ -4214,9 +4258,27 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
 	id_priv->ece.vendor_id = ece->vendor_id;
 	id_priv->ece.attr_mod = ece->attr_mod;
 
-	return __rdma_accept(id, conn_param, caller);
+	return rdma_accept(id, conn_param);
 }
-EXPORT_SYMBOL(__rdma_accept_ece);
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
+
+	mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
+
+	mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
@@ -4299,63 +4361,66 @@ int rdma_disconnect(struct rdma_cm_id *id)
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
-static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+			      struct ib_sa_multicast *multicast,
+			      struct rdma_cm_event *event,
+			      struct cma_multicast *mc)
 {
-	struct rdma_id_private *id_priv;
-	struct cma_multicast *mc = multicast->context;
-	struct rdma_cm_event event = {};
-	int ret = 0;
-
-	id_priv = mc->id_priv;
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
-	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
-		goto out;
+	struct rdma_dev_addr *dev_addr;
+	enum ib_gid_type gid_type;
+	struct net_device *ndev;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
 	else
 		pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
 				     status);
-	mutex_lock(&id_priv->qp_mutex);
-	if (!status && id_priv->id.qp) {
-		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
-					 be16_to_cpu(multicast->rec.mlid));
-		if (status)
-			pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
-					     status);
+
+	event->status = status;
+	event->param.ud.private_data = mc->context;
+	if (status) {
+		event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+		return;
 	}
-	mutex_unlock(&id_priv->qp_mutex);
 
-	event.status = status;
-	event.param.ud.private_data = mc->context;
-	if (!status) {
-		struct rdma_dev_addr *dev_addr =
-			&id_priv->id.route.addr.dev_addr;
-		struct net_device *ndev =
-			dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
-		enum ib_gid_type gid_type =
-			id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
-			rdma_start_port(id_priv->cma_dev->device)];
+	dev_addr = &id_priv->id.route.addr.dev_addr;
+	ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+	gid_type =
+		id_priv->cma_dev
+			->default_gid_type[id_priv->id.port_num -
+					   rdma_start_port(
+						   id_priv->cma_dev->device)];
 
-		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
-		ret = ib_init_ah_from_mcmember(id_priv->id.device,
-					       id_priv->id.port_num,
-					       &multicast->rec,
-					       ndev, gid_type,
-					       &event.param.ud.ah_attr);
-		if (ret)
-			event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+	event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+	if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+				     &multicast->rec, ndev, gid_type,
+				     &event->param.ud.ah_attr)) {
+		event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+		goto out;
+	}
 
-		event.param.ud.qp_num = 0xFFFFFF;
-		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
-		if (ndev)
-			dev_put(ndev);
-	} else
-		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+	event->param.ud.qp_num = 0xFFFFFF;
+	event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
 
+out:
+	if (ndev)
+		dev_put(ndev);
+}
+
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+	struct cma_multicast *mc = multicast->context;
+	struct rdma_id_private *id_priv = mc->id_priv;
+	struct rdma_cm_event event = {};
+	int ret = 0;
+
+	mutex_lock(&id_priv->handler_mutex);
+	if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+	    READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
+		goto out;
+
+	cma_make_mc_event(status, id_priv, multicast, &event, mc);
 	ret = cma_cm_event_handler(id_priv, &event);
-
 	rdma_destroy_ah_attr(&event.param.ud.ah_attr);
 	if (ret) {
 		destroy_id_handler_unlock(id_priv);
@@ -4445,23 +4510,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 			     IB_SA_MCMEMBER_REC_MTU |
 			     IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
-	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
-						id_priv->id.port_num, &rec,
-						comp_mask, GFP_KERNEL,
-						cma_ib_mc_handler, mc);
-	return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
-	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
-	struct cma_multicast *mc = mw->mc;
-	struct ib_sa_multicast *m = mc->multicast.ib;
-
-	mc->multicast.ib->context = mc;
-	cma_ib_mc_handler(0, m);
-	kref_put(&mc->mcref, release_mc);
-	kfree(mw);
+	mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+					 id_priv->id.port_num, &rec, comp_mask,
+					 GFP_KERNEL, cma_ib_mc_handler, mc);
+	return PTR_ERR_OR_ZERO(mc->sa_mc);
 }
 
 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4496,52 +4548,47 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 				   struct cma_multicast *mc)
 {
-	struct iboe_mcast_work *work;
+	struct cma_work *work;
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int err = 0;
 	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
 	struct net_device *ndev = NULL;
+	struct ib_sa_multicast ib;
 	enum ib_gid_type gid_type;
 	bool send_only;
 
 	send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
 
-	if (cma_zero_addr((struct sockaddr *)&mc->addr))
+	if (cma_zero_addr(addr))
 		return -EINVAL;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
-	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
-	if (!mc->multicast.ib) {
-		err = -ENOMEM;
-		goto out1;
-	}
-
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
-	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
+	cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
 
-	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
+	ib.rec.pkey = cpu_to_be16(0xffff);
 	if (id_priv->id.ps == RDMA_PS_UDP)
-		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+		ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
 
 	if (dev_addr->bound_dev_if)
 		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
 	if (!ndev) {
 		err = -ENODEV;
-		goto out2;
+		goto err_free;
 	}
-	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
-	mc->multicast.ib->rec.hop_limit = 1;
-	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+	ib.rec.rate = iboe_get_rate(ndev);
+	ib.rec.hop_limit = 1;
+	ib.rec.mtu = iboe_get_mtu(ndev->mtu);
 
 	if (addr->sa_family == AF_INET) {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			if (!send_only) {
-				err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+				err = cma_igmp_send(ndev, &ib.rec.mgid,
 						    true);
 			}
 		}
@@ -4550,24 +4597,22 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 			err = -ENOTSUPP;
 	}
 	dev_put(ndev);
-	if (err || !mc->multicast.ib->rec.mtu) {
+	if (err || !ib.rec.mtu) {
 		if (!err)
 			err = -EINVAL;
-		goto out2;
+		goto err_free;
 	}
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
-		    &mc->multicast.ib->rec.port_gid);
+		    &ib.rec.port_gid);
 	work->id = id_priv;
-	work->mc = mc;
-	INIT_WORK(&work->work, iboe_mcast_work_handler);
-	kref_get(&mc->mcref);
+	INIT_WORK(&work->work, cma_work_handler);
+	cma_make_mc_event(0, id_priv, &ib, &work->event, mc);
+	/* Balances with cma_id_put() in cma_work_handler */
+	cma_id_get(id_priv);
 	queue_work(cma_wq, &work->work);
-
 	return 0;
 
-out2:
-	kfree(mc->multicast.ib);
-out1:
+err_free:
 	kfree(work);
 	return err;
 }
@@ -4575,19 +4620,21 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 			u8 join_state, void *context)
 {
-	struct rdma_id_private *id_priv;
+	struct rdma_id_private *id_priv =
+		container_of(id, struct rdma_id_private, id);
 	struct cma_multicast *mc;
 	int ret;
 
-	if (!id->device)
+	/* Not supported for kernel QPs */
+	if (WARN_ON(id->qp))
 		return -EINVAL;
 
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+	/* ULP is calling this wrong. */
+	if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+			    READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
 		return -EINVAL;
 
-	mc = kmalloc(sizeof *mc, GFP_KERNEL);
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 	if (!mc)
 		return -ENOMEM;
 
@@ -4597,7 +4644,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	mc->join_state = join_state;
 
 	if (rdma_protocol_roce(id->device, id->port_num)) {
-		kref_init(&mc->mcref);
 		ret = cma_iboe_join_multicast(id_priv, mc);
 		if (ret)
 			goto out_err;
@@ -4629,25 +4675,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irq(&id_priv->lock);
 	list_for_each_entry(mc, &id_priv->mc_list, list) {
-		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
-			list_del(&mc->list);
-			spin_unlock_irq(&id_priv->lock);
+		if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+			continue;
+		list_del(&mc->list);
+		spin_unlock_irq(&id_priv->lock);
 
-			if (id->qp)
-				ib_detach_mcast(id->qp,
-						&mc->multicast.ib->rec.mgid,
-						be16_to_cpu(mc->multicast.ib->rec.mlid));
-
-			BUG_ON(id_priv->cma_dev->device != id->device);
-
-			if (rdma_cap_ib_mcast(id->device, id->port_num)) {
-				ib_sa_free_multicast(mc->multicast.ib);
-				kfree(mc);
-			} else if (rdma_protocol_roce(id->device, id->port_num)) {
-				cma_leave_roce_mc_group(id_priv, mc);
-			}
-			return;
-		}
+		WARN_ON(id_priv->cma_dev->device != id->device);
+		destroy_mc(id_priv, mc);
+		return;
 	}
 	spin_unlock_irq(&id_priv->lock);
 }
@@ -4656,7 +4691,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr;
-	struct cma_ndev_work *work;
+	struct cma_work *work;
 
 	dev_addr = &id_priv->id.route.addr.dev_addr;
 
@@ -4669,7 +4704,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
 		if (!work)
 			return -ENOMEM;
 
-		INIT_WORK(&work->work, cma_ndev_work_handler);
+		INIT_WORK(&work->work, cma_work_handler);
 		work->id = id_priv;
 		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
 		cma_id_get(id_priv);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 3c1e2ca..7ec4af2 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -123,16 +123,17 @@ static ssize_t default_roce_mode_store(struct config_item *item,
 {
 	struct cma_device *cma_dev;
 	struct cma_dev_port_group *group;
-	int gid_type = ib_cache_gid_parse_type_str(buf);
+	int gid_type;
 	ssize_t ret;
 
-	if (gid_type < 0)
-		return -EINVAL;
-
 	ret = cma_configfs_params_get(item, &cma_dev, &group);
 	if (ret)
 		return ret;
 
+	gid_type = ib_cache_gid_parse_type_str(buf);
+	if (gid_type < 0)
+		return -EINVAL;
+
 	ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
 
 	cma_configfs_params_put(cma_dev);
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index e6e20c3..e452642 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -17,46 +17,6 @@
 #include <linux/tracepoint.h>
 #include <trace/events/rdma.h>
 
-/*
- * enum ib_cm_event_type, from include/rdma/ib_cm.h
- */
-#define IB_CM_EVENT_LIST			\
-	ib_cm_event(REQ_ERROR)			\
-	ib_cm_event(REQ_RECEIVED)		\
-	ib_cm_event(REP_ERROR)			\
-	ib_cm_event(REP_RECEIVED)		\
-	ib_cm_event(RTU_RECEIVED)		\
-	ib_cm_event(USER_ESTABLISHED)		\
-	ib_cm_event(DREQ_ERROR)			\
-	ib_cm_event(DREQ_RECEIVED)		\
-	ib_cm_event(DREP_RECEIVED)		\
-	ib_cm_event(TIMEWAIT_EXIT)		\
-	ib_cm_event(MRA_RECEIVED)		\
-	ib_cm_event(REJ_RECEIVED)		\
-	ib_cm_event(LAP_ERROR)			\
-	ib_cm_event(LAP_RECEIVED)		\
-	ib_cm_event(APR_RECEIVED)		\
-	ib_cm_event(SIDR_REQ_ERROR)		\
-	ib_cm_event(SIDR_REQ_RECEIVED)		\
-	ib_cm_event_end(SIDR_REP_RECEIVED)
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x)		TRACE_DEFINE_ENUM(IB_CM_##x);
-#define ib_cm_event_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
-
-IB_CM_EVENT_LIST
-
-#undef ib_cm_event
-#undef ib_cm_event_end
-
-#define ib_cm_event(x)		{ IB_CM_##x, #x },
-#define ib_cm_event_end(x)	{ IB_CM_##x, #x }
-
-#define rdma_show_ib_cm_event(x) \
-		__print_symbolic(x, IB_CM_EVENT_LIST)
-
 
 DECLARE_EVENT_CLASS(cma_fsm_class,
 	TP_PROTO(
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a1e6a67..e84b0fe 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,6 +44,7 @@
 #include <rdma/ib_mad.h>
 #include <rdma/restrack.h>
 #include "mad_priv.h"
+#include "restrack.h"
 
 /* Total number of ports combined across all struct ib_devices's */
 #define RDMA_MAX_PORTS 8192
@@ -352,6 +353,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
 	INIT_LIST_HEAD(&qp->rdma_mrs);
 	INIT_LIST_HEAD(&qp->sig_mrs);
 
+	rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
 	/*
 	 * We don't track XRC QPs for now, because they don't have PD
 	 * and more importantly they are created internaly by driver,
@@ -359,14 +361,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
 	 */
 	is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
 	if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
-		qp->res.type = RDMA_RESTRACK_QP;
-		if (uobj)
-			rdma_restrack_uadd(&qp->res);
-		else
-			rdma_restrack_kadd(&qp->res);
-	} else
-		qp->res.valid = false;
-
+		rdma_restrack_parent_name(&qp->res, &pd->res);
+		rdma_restrack_add(&qp->res);
+	}
 	return qp;
 }
 
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 6361668..e4ff0d3 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -80,8 +80,9 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
 
 	counter->device    = dev;
 	counter->port      = port;
-	counter->res.type  = RDMA_RESTRACK_COUNTER;
-	counter->stats     = dev->ops.counter_alloc_stats(counter);
+
+	rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+	counter->stats = dev->ops.counter_alloc_stats(counter);
 	if (!counter->stats)
 		goto err_stats;
 
@@ -107,6 +108,7 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
 	mutex_unlock(&port_counter->lock);
 	kfree(counter->stats);
 err_stats:
+	rdma_restrack_put(&counter->res);
 	kfree(counter);
 	return NULL;
 }
@@ -248,13 +250,8 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
 static void rdma_counter_res_add(struct rdma_counter *counter,
 				 struct ib_qp *qp)
 {
-	if (rdma_is_kernel_res(&qp->res)) {
-		rdma_restrack_set_task(&counter->res, qp->res.kern_name);
-		rdma_restrack_kadd(&counter->res);
-	} else {
-		rdma_restrack_attach_task(&counter->res, qp->res.task);
-		rdma_restrack_uadd(&counter->res);
-	}
+	rdma_restrack_parent_name(&counter->res, &qp->res);
+	rdma_restrack_add(&counter->res);
 }
 
 static void counter_release(struct kref *kref)
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a92fc3f..12ebacf 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -197,24 +197,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 }
 
 /**
- * __ib_alloc_cq_user - allocate a completion queue
+ * __ib_alloc_cq        allocate a completion queue
  * @dev:		device to allocate the CQ for
  * @private:		driver private data, accessible from cq->cq_context
  * @nr_cqe:		number of CQEs to allocate
  * @comp_vector:	HCA completion vectors for this CQ
  * @poll_ctx:		context to poll the CQ from.
  * @caller:		module owner name.
- * @udata:		Valid user data or NULL for kernel object
  *
  * This is the proper interface to allocate a CQ for in-kernel users. A
  * CQ allocated with this interface will automatically be polled from the
  * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
  * to use this CQ abstraction.
  */
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
-				 int nr_cqe, int comp_vector,
-				 enum ib_poll_context poll_ctx,
-				 const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+			    int comp_vector, enum ib_poll_context poll_ctx,
+			    const char *caller)
 {
 	struct ib_cq_init_attr cq_attr = {
 		.cqe		= nr_cqe,
@@ -237,15 +235,13 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
 	if (!cq->wc)
 		goto out_free_cq;
 
-	cq->res.type = RDMA_RESTRACK_CQ;
-	rdma_restrack_set_task(&cq->res, caller);
+	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+	rdma_restrack_set_name(&cq->res, caller);
 
 	ret = dev->ops.create_cq(cq, &cq_attr, NULL);
 	if (ret)
 		goto out_free_wc;
 
-	rdma_restrack_kadd(&cq->res);
-
 	rdma_dim_init(cq);
 
 	switch (cq->poll_ctx) {
@@ -271,21 +267,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
 		goto out_destroy_cq;
 	}
 
+	rdma_restrack_add(&cq->res);
 	trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
 	return cq;
 
 out_destroy_cq:
 	rdma_dim_destroy(cq);
-	rdma_restrack_del(&cq->res);
-	cq->device->ops.destroy_cq(cq, udata);
+	cq->device->ops.destroy_cq(cq, NULL);
 out_free_wc:
+	rdma_restrack_put(&cq->res);
 	kfree(cq->wc);
 out_free_cq:
 	kfree(cq);
 	trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
 
 /**
  * __ib_alloc_cq_any - allocate a completion queue
@@ -310,18 +307,19 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
 			atomic_inc_return(&counter) %
 			min_t(int, dev->num_comp_vectors, num_online_cpus());
 
-	return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
-				  caller, NULL);
+	return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+			     caller);
 }
 EXPORT_SYMBOL(__ib_alloc_cq_any);
 
 /**
- * ib_free_cq_user - free a completion queue
+ * ib_free_cq - free a completion queue
  * @cq:		completion queue to free.
- * @udata:	User data or NULL for kernel object
  */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
 {
+	int ret;
+
 	if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
 		return;
 	if (WARN_ON_ONCE(cq->cqe_used))
@@ -343,12 +341,13 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 
 	rdma_dim_destroy(cq);
 	trace_cq_free(cq);
+	ret = cq->device->ops.destroy_cq(cq, NULL);
+	WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
 	rdma_restrack_del(&cq->res);
-	cq->device->ops.destroy_cq(cq, udata);
 	kfree(cq->wc);
 	kfree(cq);
 }
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_free_cq);
 
 void ib_cq_pool_init(struct ib_device *dev)
 {
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 23ee65a..a3b1fc8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1177,58 +1177,23 @@ static int assign_name(struct ib_device *device, const char *name)
 	return ret;
 }
 
-static void setup_dma_device(struct ib_device *device)
+static void setup_dma_device(struct ib_device *device,
+			     struct device *dma_device)
 {
-	struct device *parent = device->dev.parent;
-
-	WARN_ON_ONCE(device->dma_device);
-
-#ifdef CONFIG_DMA_OPS
-	if (device->dev.dma_ops) {
-		/*
-		 * The caller provided custom DMA operations. Copy the
-		 * DMA-related fields that are used by e.g. dma_alloc_coherent()
-		 * into device->dev.
-		 */
-		device->dma_device = &device->dev;
-		if (!device->dev.dma_mask) {
-			if (parent)
-				device->dev.dma_mask = parent->dma_mask;
-			else
-				WARN_ON_ONCE(true);
-		}
-		if (!device->dev.coherent_dma_mask) {
-			if (parent)
-				device->dev.coherent_dma_mask =
-					parent->coherent_dma_mask;
-			else
-				WARN_ON_ONCE(true);
-		}
-	} else
-#endif /* CONFIG_DMA_OPS */
-	{
-		/*
-		 * The caller did not provide custom DMA operations. Use the
-		 * DMA mapping operations of the parent device.
-		 */
-		WARN_ON_ONCE(!parent);
-		device->dma_device = parent;
+	/*
+	 * If the caller does not provide a DMA capable device then the IB
+	 * device will be used. In this case the caller should fully setup the
+	 * ibdev for DMA. This usually means using dma_virt_ops.
+	 */
+#ifdef CONFIG_DMA_VIRT_OPS
+	if (!dma_device) {
+		device->dev.dma_ops = &dma_virt_ops;
+		dma_device = &device->dev;
 	}
-
-	if (!device->dev.dma_parms) {
-		if (parent) {
-			/*
-			 * The caller did not provide DMA parameters, so
-			 * 'parent' probably represents a PCI device. The PCI
-			 * core sets the maximum segment size to 64
-			 * KB. Increase this parameter to 2 GB.
-			 */
-			device->dev.dma_parms = parent->dma_parms;
-			dma_set_max_seg_size(device->dma_device, SZ_2G);
-		} else {
-			WARN_ON_ONCE(true);
-		}
-	}
+#endif
+	WARN_ON(!dma_device);
+	device->dma_device = dma_device;
+	WARN_ON(!device->dma_device->dma_parms);
 }
 
 /*
@@ -1241,7 +1206,6 @@ static int setup_device(struct ib_device *device)
 	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 	int ret;
 
-	setup_dma_device(device);
 	ib_device_check_mandatory(device);
 
 	ret = setup_port_data(device);
@@ -1354,7 +1318,10 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
  * ib_register_device - Register an IB device with IB core
  * @device: Device to register
  * @name: unique string device name. This may include a '%' which will
- * cause a unique index to be added to the passed device name.
+ * 	  cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ *	        device will be used. In this case the caller should fully
+ *		setup the ibdev for DMA. This usually means using dma_virt_ops.
  *
  * Low-level drivers use ib_register_device() to register their
  * devices with the IB core.  All registered clients will receive a
@@ -1365,7 +1332,8 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
  * asynchronously then the device pointer may become freed as soon as this
  * function returns.
  */
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+		       struct device *dma_device)
 {
 	int ret;
 
@@ -1373,6 +1341,7 @@ int ib_register_device(struct ib_device *device, const char *name)
 	if (ret)
 		return ret;
 
+	setup_dma_device(device, dma_device);
 	ret = setup_device(device);
 	if (ret)
 		return ret;
@@ -2697,7 +2666,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_OBJ_SIZE(dev_ops, ib_ah);
 	SET_OBJ_SIZE(dev_ops, ib_counters);
 	SET_OBJ_SIZE(dev_ops, ib_cq);
+	SET_OBJ_SIZE(dev_ops, ib_mw);
 	SET_OBJ_SIZE(dev_ops, ib_pd);
+	SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
 	SET_OBJ_SIZE(dev_ops, ib_srq);
 	SET_OBJ_SIZE(dev_ops, ib_ucontext);
 	SET_OBJ_SIZE(dev_ops, ib_xrcd);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6d3ed7c..ffe11b0 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -130,17 +130,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	lockdep_assert_held(&ufile->hw_destroy_rwsem);
 	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
-	if (reason == RDMA_REMOVE_ABORT_HWOBJ) {
-		reason = RDMA_REMOVE_ABORT;
-		ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
-								attrs);
-		/*
-		 * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see
-		 * ib_is_destroy_retryable, cleanup_retryable == false here.
-		 */
-		WARN_ON(ret);
-	}
-
 	if (reason == RDMA_REMOVE_ABORT) {
 		WARN_ON(!list_empty(&uobj->list));
 		WARN_ON(!uobj->context);
@@ -674,11 +663,22 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
 			      bool hw_obj_valid)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
+	int ret;
 
-	uverbs_destroy_uobject(uobj,
-			       hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ :
-					      RDMA_REMOVE_ABORT,
-			       attrs);
+	if (hw_obj_valid) {
+		ret = uobj->uapi_object->type_class->destroy_hw(
+			uobj, RDMA_REMOVE_ABORT, attrs);
+		/*
+		 * If the driver couldn't destroy the object then go ahead and
+		 * commit it. Leaking objects that can't be destroyed is only
+		 * done during FD close after the driver has a few more tries to
+		 * destroy it.
+		 */
+		if (WARN_ON(ret))
+			return rdma_alloc_commit_uobject(uobj, attrs);
+	}
+
+	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
 
 	/* Matches the down_read in rdma_alloc_begin_uobject */
 	up_read(&ufile->hw_destroy_rwsem);
@@ -889,14 +889,14 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 	if (!ufile->ucontext)
 		goto done;
 
-	ufile->ucontext->closing = true;
 	ufile->ucontext->cleanup_retryable = true;
 	while (!list_empty(&ufile->uobjects))
 		if (__uverbs_cleanup_ufile(ufile, reason)) {
 			/*
 			 * No entry was cleaned-up successfully during this
-			 * iteration
+			 * iteration. It is a driver bug to fail destruction.
 			 */
+			WARN_ON(!list_empty(&ufile->uobjects));
 			break;
 		}
 
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 62fbb0a..4aeeaae 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -123,32 +123,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
 }
 EXPORT_SYMBOL(rdma_restrack_count);
 
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
-	struct ib_pd *pd;
-
-	switch (res->type) {
-	case RDMA_RESTRACK_QP:
-		pd = container_of(res, struct ib_qp, res)->pd;
-		if (!pd) {
-			WARN_ONCE(true, "XRC QPs are not supported\n");
-			/* Survive, despite the programmer's error */
-			res->kern_name = " ";
-		}
-		break;
-	case RDMA_RESTRACK_MR:
-		pd = container_of(res, struct ib_mr, res)->pd;
-		break;
-	default:
-		/* Other types set kern_name directly */
-		pd = NULL;
-		break;
-	}
-
-	if (pd)
-		res->kern_name = pd->res.kern_name;
-}
-
 static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 {
 	switch (res->type) {
@@ -173,36 +147,77 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	}
 }
 
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
-			    const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res:  resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+				      struct task_struct *task)
+{
+	if (WARN_ON_ONCE(!task))
+		return;
+
+	if (res->task)
+		put_task_struct(res->task);
+	get_task_struct(task);
+	res->task = task;
+	res->user = true;
+}
+
+/**
+ * rdma_restrack_set_name() - set the task for this resource
+ * @res:  resource entry
+ * @caller: kernel name, the current task will be used if the caller is NULL.
+ */
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
 {
 	if (caller) {
 		res->kern_name = caller;
 		return;
 	}
 
-	if (res->task)
-		put_task_struct(res->task);
-	get_task_struct(current);
-	res->task = current;
+	rdma_restrack_attach_task(res, current);
 }
-EXPORT_SYMBOL(rdma_restrack_set_task);
+EXPORT_SYMBOL(rdma_restrack_set_name);
 
 /**
- * rdma_restrack_attach_task() - attach the task onto this resource
- * @res:  resource entry
- * @task: the task to attach, the current task will be used if it is NULL.
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
  */
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
-			       struct task_struct *task)
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+			       const struct rdma_restrack_entry *parent)
 {
-	if (res->task)
-		put_task_struct(res->task);
-	get_task_struct(task);
-	res->task = task;
+	if (rdma_is_kernel_res(parent))
+		dst->kern_name = parent->kern_name;
+	else
+		rdma_restrack_attach_task(dst, parent->task);
 }
+EXPORT_SYMBOL(rdma_restrack_parent_name);
 
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res - Entry to initialize
+ * @type - REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+		       enum rdma_restrack_type type)
+{
+	kref_init(&res->kref);
+	init_completion(&res->comp);
+	res->type = type;
+}
+EXPORT_SYMBOL(rdma_restrack_new);
+
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res:  resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
 	struct rdma_restrack_root *rt;
@@ -213,8 +228,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
 
 	rt = &dev->res[res->type];
 
-	kref_init(&res->kref);
-	init_completion(&res->comp);
 	if (res->type == RDMA_RESTRACK_QP) {
 		/* Special case to ensure that LQPN points to right QP */
 		struct ib_qp *qp = container_of(res, struct ib_qp, res);
@@ -236,38 +249,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
 	if (!ret)
 		res->valid = true;
 }
-
-/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res:  resource entry
- */
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
-{
-	res->task = NULL;
-	set_kern_name(res);
-	res->user = false;
-	rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_kadd);
-
-/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
- * @res:  resource entry
- */
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
-{
-	if ((res->type != RDMA_RESTRACK_CM_ID) &&
-	    (res->type != RDMA_RESTRACK_COUNTER))
-		res->task = NULL;
-
-	if (!res->task)
-		rdma_restrack_set_task(res, NULL);
-	res->kern_name = NULL;
-
-	res->user = true;
-	rdma_restrack_add(res);
-}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
 
 int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
 {
@@ -305,6 +287,10 @@ static void restrack_release(struct kref *kref)
 	struct rdma_restrack_entry *res;
 
 	res = container_of(kref, struct rdma_restrack_entry, kref);
+	if (res->task) {
+		put_task_struct(res->task);
+		res->task = NULL;
+	}
 	complete(&res->comp);
 }
 
@@ -314,14 +300,23 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
 }
 EXPORT_SYMBOL(rdma_restrack_put);
 
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res:  resource entry
+ */
 void rdma_restrack_del(struct rdma_restrack_entry *res)
 {
 	struct rdma_restrack_entry *old;
 	struct rdma_restrack_root *rt;
 	struct ib_device *dev;
 
-	if (!res->valid)
-		goto out;
+	if (!res->valid) {
+		if (res->task) {
+			put_task_struct(res->task);
+			res->task = NULL;
+		}
+		return;
+	}
 
 	dev = res_to_dev(res);
 	if (WARN_ON(!dev))
@@ -330,16 +325,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	rt = &dev->res[res->type];
 
 	old = xa_erase(&rt->xa, res->id);
+	if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
+		return;
 	WARN_ON(old != res);
 	res->valid = false;
 
 	rdma_restrack_put(res);
 	wait_for_completion(&res->comp);
-
-out:
-	if (res->task) {
-		put_task_struct(res->task);
-		res->task = NULL;
-	}
 }
 EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index d084e5f8..6a04fc4 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -25,6 +25,12 @@ struct rdma_restrack_root {
 
 int rdma_restrack_init(struct ib_device *dev);
 void rdma_restrack_clean(struct ib_device *dev);
-void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
-			       struct task_struct *task);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+		       enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+			    const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+			       const struct rdma_restrack_entry *parent);
 #endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c11e505..914cdde 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -59,7 +59,7 @@ struct ib_port {
 	struct gid_attr_group *gid_attr_group;
 	struct attribute_group gid_group;
 	struct attribute_group *pkey_group;
-	struct attribute_group *pma_table;
+	const struct attribute_group *pma_table;
 	struct attribute_group *hw_stats_ag;
 	struct rdma_hw_stats   *hw_stats;
 	u8                     port_num;
@@ -387,7 +387,8 @@ static ssize_t _show_port_gid_attr(
 
 	gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
 	if (IS_ERR(gid_attr))
-		return PTR_ERR(gid_attr);
+		/* -EINVAL is returned for user space compatibility reasons. */
+		return -EINVAL;
 
 	ret = print(gid_attr, buf);
 	rdma_put_gid_attr(gid_attr);
@@ -653,17 +654,17 @@ static struct attribute *pma_attrs_noietf[] = {
 	NULL
 };
 
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
 	.name  = "counters",
 	.attrs  = pma_attrs
 };
 
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
 	.name  = "counters",
 	.attrs  = pma_attrs_ext
 };
 
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
 	.name  = "counters",
 	.attrs  = pma_attrs_noietf
 };
@@ -778,8 +779,8 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
  * Figure out which counter table to use depending on
  * the device capabilities.
  */
-static struct attribute_group *get_counter_table(struct ib_device *dev,
-						 int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+						       int port_num)
 {
 	struct ib_class_port_info cpi;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 1d184ea..ffe2563 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -80,7 +80,6 @@ struct ucma_file {
 	struct list_head	ctx_list;
 	struct list_head	event_list;
 	wait_queue_head_t	poll_wait;
-	struct workqueue_struct	*close_wq;
 };
 
 struct ucma_context {
@@ -88,7 +87,7 @@ struct ucma_context {
 	struct completion	comp;
 	refcount_t		ref;
 	int			events_reported;
-	int			backlog;
+	atomic_t		backlog;
 
 	struct ucma_file	*file;
 	struct rdma_cm_id	*cm_id;
@@ -96,11 +95,6 @@ struct ucma_context {
 	u64			uid;
 
 	struct list_head	list;
-	struct list_head	mc_list;
-	/* mark that device is in process of destroying the internal HW
-	 * resources, protected by the ctx_table lock
-	 */
-	int			closing;
 	/* sync between removal event and id destroy, protected by file mut */
 	int			destroying;
 	struct work_struct	close_work;
@@ -113,23 +107,22 @@ struct ucma_multicast {
 
 	u64			uid;
 	u8			join_state;
-	struct list_head	list;
 	struct sockaddr_storage	addr;
 };
 
 struct ucma_event {
 	struct ucma_context	*ctx;
+	struct ucma_context	*conn_req_ctx;
 	struct ucma_multicast	*mc;
 	struct list_head	list;
-	struct rdma_cm_id	*cm_id;
 	struct rdma_ucm_event_resp resp;
-	struct work_struct	close_work;
 };
 
 static DEFINE_XARRAY_ALLOC(ctx_table);
 static DEFINE_XARRAY_ALLOC(multicast_table);
 
 static const struct file_operations ucma_fops;
+static int __destroy_id(struct ucma_context *ctx);
 
 static inline struct ucma_context *_ucma_find_context(int id,
 						      struct ucma_file *file)
@@ -139,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
 	ctx = xa_load(&ctx_table, id);
 	if (!ctx)
 		ctx = ERR_PTR(-ENOENT);
-	else if (ctx->file != file || !ctx->cm_id)
+	else if (ctx->file != file)
 		ctx = ERR_PTR(-EINVAL);
 	return ctx;
 }
@@ -150,12 +143,9 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 
 	xa_lock(&ctx_table);
 	ctx = _ucma_find_context(id, file);
-	if (!IS_ERR(ctx)) {
-		if (ctx->closing)
-			ctx = ERR_PTR(-EIO);
-		else
-			refcount_inc(&ctx->ref);
-	}
+	if (!IS_ERR(ctx))
+		if (!refcount_inc_not_zero(&ctx->ref))
+			ctx = ERR_PTR(-ENXIO);
 	xa_unlock(&ctx_table);
 	return ctx;
 }
@@ -183,14 +173,6 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
 	return ctx;
 }
 
-static void ucma_close_event_id(struct work_struct *work)
-{
-	struct ucma_event *uevent_close =  container_of(work, struct ucma_event, close_work);
-
-	rdma_destroy_id(uevent_close->cm_id);
-	kfree(uevent_close);
-}
-
 static void ucma_close_id(struct work_struct *work)
 {
 	struct ucma_context *ctx =  container_of(work, struct ucma_context, close_work);
@@ -203,6 +185,14 @@ static void ucma_close_id(struct work_struct *work)
 	wait_for_completion(&ctx->comp);
 	/* No new events will be generated after destroying the id. */
 	rdma_destroy_id(ctx->cm_id);
+
+	/*
+	 * At this point ctx->ref is zero so the only place the ctx can be is in
+	 * a uevent or in __destroy_id(). Since the former doesn't touch
+	 * ctx->cm_id and the latter sync cancels this, there is no races with
+	 * this store.
+	 */
+	ctx->cm_id = NULL;
 }
 
 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -216,39 +206,23 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 	INIT_WORK(&ctx->close_work, ucma_close_id);
 	refcount_set(&ctx->ref, 1);
 	init_completion(&ctx->comp);
-	INIT_LIST_HEAD(&ctx->mc_list);
+	/* So list_del() will work if we don't do ucma_finish_ctx() */
+	INIT_LIST_HEAD(&ctx->list);
 	ctx->file = file;
 	mutex_init(&ctx->mutex);
 
-	if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
-		goto error;
-
-	list_add_tail(&ctx->list, &file->ctx_list);
+	if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+		kfree(ctx);
+		return NULL;
+	}
 	return ctx;
-
-error:
-	kfree(ctx);
-	return NULL;
 }
 
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_finish_ctx(struct ucma_context *ctx)
 {
-	struct ucma_multicast *mc;
-
-	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
-	if (!mc)
-		return NULL;
-
-	mc->ctx = ctx;
-	if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
-		goto error;
-
-	list_add_tail(&mc->list, &ctx->mc_list);
-	return mc;
-
-error:
-	kfree(mc);
-	return NULL;
+	lockdep_assert_held(&ctx->file->mut);
+	list_add_tail(&ctx->list, &ctx->file->ctx_list);
+	xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
 }
 
 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -280,10 +254,15 @@ static void ucma_copy_ud_event(struct ib_device *device,
 	dst->qkey = src->qkey;
 }
 
-static void ucma_set_event_context(struct ucma_context *ctx,
-				   struct rdma_cm_event *event,
-				   struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+					     struct rdma_cm_event *event)
 {
+	struct ucma_event *uevent;
+
+	uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+	if (!uevent)
+		return NULL;
+
 	uevent->ctx = ctx;
 	switch (event->event) {
 	case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -298,64 +277,10 @@ static void ucma_set_event_context(struct ucma_context *ctx,
 		uevent->resp.id = ctx->id;
 		break;
 	}
-}
-
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
-{
-	struct ucma_context *ctx = cm_id->context;
-	struct ucma_event *con_req_eve;
-	int event_found = 0;
-
-	if (ctx->destroying)
-		return;
-
-	/* only if context is pointing to cm_id that it owns it and can be
-	 * queued to be closed, otherwise that cm_id is an inflight one that
-	 * is part of that context event list pending to be detached and
-	 * reattached to its new context as part of ucma_get_event,
-	 * handled separately below.
-	 */
-	if (ctx->cm_id == cm_id) {
-		xa_lock(&ctx_table);
-		ctx->closing = 1;
-		xa_unlock(&ctx_table);
-		queue_work(ctx->file->close_wq, &ctx->close_work);
-		return;
-	}
-
-	list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
-		if (con_req_eve->cm_id == cm_id &&
-		    con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-			list_del(&con_req_eve->list);
-			INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
-			queue_work(ctx->file->close_wq, &con_req_eve->close_work);
-			event_found = 1;
-			break;
-		}
-	}
-	if (!event_found)
-		pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
-}
-
-static int ucma_event_handler(struct rdma_cm_id *cm_id,
-			      struct rdma_cm_event *event)
-{
-	struct ucma_event *uevent;
-	struct ucma_context *ctx = cm_id->context;
-	int ret = 0;
-
-	uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
-	if (!uevent)
-		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
-
-	mutex_lock(&ctx->file->mut);
-	uevent->cm_id = cm_id;
-	ucma_set_event_context(ctx, event, uevent);
 	uevent->resp.event = event->event;
 	uevent->resp.status = event->status;
-	if (cm_id->qp_type == IB_QPT_UD)
-		ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
+	if (ctx->cm_id->qp_type == IB_QPT_UD)
+		ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
 				   &event->param.ud);
 	else
 		ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -363,46 +288,84 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
 
 	uevent->resp.ece.vendor_id = event->ece.vendor_id;
 	uevent->resp.ece.attr_mod = event->ece.attr_mod;
+	return uevent;
+}
 
-	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-		if (!ctx->backlog) {
-			ret = -ENOMEM;
-			kfree(uevent);
-			goto out;
-		}
-		ctx->backlog--;
-	} else if (!ctx->uid || ctx->cm_id != cm_id) {
-		/*
-		 * We ignore events for new connections until userspace has set
-		 * their context.  This can only happen if an error occurs on a
-		 * new connection before the user accepts it.  This is okay,
-		 * since the accept will just fail later. However, we do need
-		 * to release the underlying HW resources in case of a device
-		 * removal event.
-		 */
-		if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
-			ucma_removal_event_handler(cm_id);
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+				      struct rdma_cm_event *event)
+{
+	struct ucma_context *listen_ctx = cm_id->context;
+	struct ucma_context *ctx;
+	struct ucma_event *uevent;
 
-		kfree(uevent);
-		goto out;
+	if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+		return -ENOMEM;
+	ctx = ucma_alloc_ctx(listen_ctx->file);
+	if (!ctx)
+		goto err_backlog;
+	ctx->cm_id = cm_id;
+
+	uevent = ucma_create_uevent(listen_ctx, event);
+	if (!uevent)
+		goto err_alloc;
+	uevent->conn_req_ctx = ctx;
+	uevent->resp.id = ctx->id;
+
+	ctx->cm_id->context = ctx;
+
+	mutex_lock(&ctx->file->mut);
+	ucma_finish_ctx(ctx);
+	list_add_tail(&uevent->list, &ctx->file->event_list);
+	mutex_unlock(&ctx->file->mut);
+	wake_up_interruptible(&ctx->file->poll_wait);
+	return 0;
+
+err_alloc:
+	xa_erase(&ctx_table, ctx->id);
+	kfree(ctx);
+err_backlog:
+	atomic_inc(&listen_ctx->backlog);
+	/* Returning error causes the new ID to be destroyed */
+	return -ENOMEM;
+}
+
+static int ucma_event_handler(struct rdma_cm_id *cm_id,
+			      struct rdma_cm_event *event)
+{
+	struct ucma_event *uevent;
+	struct ucma_context *ctx = cm_id->context;
+
+	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+		return ucma_connect_event_handler(cm_id, event);
+
+	/*
+	 * We ignore events for new connections until userspace has set their
+	 * context.  This can only happen if an error occurs on a new connection
+	 * before the user accepts it.  This is okay, since the accept will just
+	 * fail later. However, we do need to release the underlying HW
+	 * resources in case of a device removal event.
+	 */
+	if (ctx->uid) {
+		uevent = ucma_create_uevent(ctx, event);
+		if (!uevent)
+			return 0;
+
+		mutex_lock(&ctx->file->mut);
+		list_add_tail(&uevent->list, &ctx->file->event_list);
+		mutex_unlock(&ctx->file->mut);
+		wake_up_interruptible(&ctx->file->poll_wait);
 	}
 
-	list_add_tail(&uevent->list, &ctx->file->event_list);
-	wake_up_interruptible(&ctx->file->poll_wait);
-	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
-		ucma_removal_event_handler(cm_id);
-out:
-	mutex_unlock(&ctx->file->mut);
-	return ret;
+	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
+		queue_work(system_unbound_wq, &ctx->close_work);
+	return 0;
 }
 
 static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 			      int in_len, int out_len)
 {
-	struct ucma_context *ctx;
 	struct rdma_ucm_get_event cmd;
 	struct ucma_event *uevent;
-	int ret = 0;
 
 	/*
 	 * Old 32 bit user space does not send the 4 byte padding in the
@@ -429,35 +392,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 		mutex_lock(&file->mut);
 	}
 
-	uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
-	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
-		ctx = ucma_alloc_ctx(file);
-		if (!ctx) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		uevent->ctx->backlog++;
-		ctx->cm_id = uevent->cm_id;
-		ctx->cm_id->context = ctx;
-		uevent->resp.id = ctx->id;
-	}
+	uevent = list_first_entry(&file->event_list, struct ucma_event, list);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &uevent->resp,
 			 min_t(size_t, out_len, sizeof(uevent->resp)))) {
-		ret = -EFAULT;
-		goto done;
+		mutex_unlock(&file->mut);
+		return -EFAULT;
 	}
 
 	list_del(&uevent->list);
 	uevent->ctx->events_reported++;
 	if (uevent->mc)
 		uevent->mc->events_reported++;
-	kfree(uevent);
-done:
+	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+		atomic_inc(&uevent->ctx->backlog);
 	mutex_unlock(&file->mut);
-	return ret;
+
+	kfree(uevent);
+	return 0;
 }
 
 static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -498,58 +451,60 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 	if (ret)
 		return ret;
 
-	mutex_lock(&file->mut);
 	ctx = ucma_alloc_ctx(file);
-	mutex_unlock(&file->mut);
 	if (!ctx)
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	cm_id = __rdma_create_id(current->nsproxy->net_ns,
-				 ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+	cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
 	if (IS_ERR(cm_id)) {
 		ret = PTR_ERR(cm_id);
 		goto err1;
 	}
+	ctx->cm_id = cm_id;
 
 	resp.id = ctx->id;
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &resp, sizeof(resp))) {
-		ret = -EFAULT;
-		goto err2;
+		xa_erase(&ctx_table, ctx->id);
+		__destroy_id(ctx);
+		return -EFAULT;
 	}
 
-	ctx->cm_id = cm_id;
+	mutex_lock(&file->mut);
+	ucma_finish_ctx(ctx);
+	mutex_unlock(&file->mut);
 	return 0;
 
-err2:
-	rdma_destroy_id(cm_id);
 err1:
 	xa_erase(&ctx_table, ctx->id);
-	mutex_lock(&file->mut);
-	list_del(&ctx->list);
-	mutex_unlock(&file->mut);
 	kfree(ctx);
 	return ret;
 }
 
 static void ucma_cleanup_multicast(struct ucma_context *ctx)
 {
-	struct ucma_multicast *mc, *tmp;
+	struct ucma_multicast *mc;
+	unsigned long index;
 
-	mutex_lock(&ctx->file->mut);
-	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
-		list_del(&mc->list);
-		xa_erase(&multicast_table, mc->id);
+	xa_for_each(&multicast_table, index, mc) {
+		if (mc->ctx != ctx)
+			continue;
+		/*
+		 * At this point mc->ctx->ref is 0 so the mc cannot leave the
+		 * lock on the reader and this is enough serialization
+		 */
+		xa_erase(&multicast_table, index);
 		kfree(mc);
 	}
-	mutex_unlock(&ctx->file->mut);
 }
 
 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 {
 	struct ucma_event *uevent, *tmp;
 
+	rdma_lock_handler(mc->ctx->cm_id);
+	mutex_lock(&mc->ctx->file->mut);
 	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
 		if (uevent->mc != mc)
 			continue;
@@ -557,6 +512,8 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
 		list_del(&uevent->list);
 		kfree(uevent);
 	}
+	mutex_unlock(&mc->ctx->file->mut);
+	rdma_unlock_handler(mc->ctx->cm_id);
 }
 
 /*
@@ -564,10 +521,6 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
  * this point, no new events will be reported from the hardware. However, we
  * still need to cleanup the UCMA context for this ID. Specifically, there
  * might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing.  We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
  * mutex. After that we release them as needed.
  */
 static int ucma_free_ctx(struct ucma_context *ctx)
@@ -576,31 +529,57 @@ static int ucma_free_ctx(struct ucma_context *ctx)
 	struct ucma_event *uevent, *tmp;
 	LIST_HEAD(list);
 
-
 	ucma_cleanup_multicast(ctx);
 
 	/* Cleanup events not yet reported to the user. */
 	mutex_lock(&ctx->file->mut);
 	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
-		if (uevent->ctx == ctx)
+		if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx)
 			list_move_tail(&uevent->list, &list);
 	}
 	list_del(&ctx->list);
+	events_reported = ctx->events_reported;
 	mutex_unlock(&ctx->file->mut);
 
+	/*
+	 * If this was a listening ID then any connections spawned from it
+	 * that have not been delivered to userspace are cleaned up too.
+	 * Must be done outside any locks.
+	 */
 	list_for_each_entry_safe(uevent, tmp, &list, list) {
 		list_del(&uevent->list);
-		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
-			rdma_destroy_id(uevent->cm_id);
+		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+		    uevent->conn_req_ctx != ctx)
+			__destroy_id(uevent->conn_req_ctx);
 		kfree(uevent);
 	}
 
-	events_reported = ctx->events_reported;
 	mutex_destroy(&ctx->mutex);
 	kfree(ctx);
 	return events_reported;
 }
 
+static int __destroy_id(struct ucma_context *ctx)
+{
+	/*
+	 * If the refcount is already 0 then ucma_close_id() has already
+	 * destroyed the cm_id, otherwise holding the refcount keeps cm_id
+	 * valid. Prevent queue_work() from being called.
+	 */
+	if (refcount_inc_not_zero(&ctx->ref)) {
+		rdma_lock_handler(ctx->cm_id);
+		ctx->destroying = 1;
+		rdma_unlock_handler(ctx->cm_id);
+		ucma_put_ctx(ctx);
+	}
+
+	cancel_work_sync(&ctx->close_work);
+	/* At this point it's guaranteed that there is no inflight closing task */
+	if (ctx->cm_id)
+		ucma_close_id(&ctx->close_work);
+	return ucma_free_ctx(ctx);
+}
+
 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 			       int in_len, int out_len)
 {
@@ -624,24 +603,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	mutex_lock(&ctx->file->mut);
-	ctx->destroying = 1;
-	mutex_unlock(&ctx->file->mut);
-
-	flush_workqueue(ctx->file->close_wq);
-	/* At this point it's guaranteed that there is no inflight
-	 * closing task */
-	xa_lock(&ctx_table);
-	if (!ctx->closing) {
-		xa_unlock(&ctx_table);
-		ucma_put_ctx(ctx);
-		wait_for_completion(&ctx->comp);
-		rdma_destroy_id(ctx->cm_id);
-	} else {
-		xa_unlock(&ctx_table);
-	}
-
-	resp.events_reported = ucma_free_ctx(ctx);
+	resp.events_reported = __destroy_id(ctx);
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &resp, sizeof(resp)))
 		ret = -EFAULT;
@@ -1124,10 +1086,12 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
-		       cmd.backlog : max_backlog;
+	if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+		cmd.backlog = max_backlog;
+	atomic_set(&ctx->backlog, cmd.backlog);
+
 	mutex_lock(&ctx->mutex);
-	ret = rdma_listen(ctx->cm_id, ctx->backlog);
+	ret = rdma_listen(ctx->cm_id, cmd.backlog);
 	mutex_unlock(&ctx->mutex);
 	ucma_put_ctx(ctx);
 	return ret;
@@ -1160,16 +1124,20 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
 
 	if (cmd.conn_param.valid) {
 		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
-		mutex_lock(&file->mut);
 		mutex_lock(&ctx->mutex);
-		ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
-		mutex_unlock(&ctx->mutex);
-		if (!ret)
+		rdma_lock_handler(ctx->cm_id);
+		ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+		if (!ret) {
+			/* The uid must be set atomically with the handler */
 			ctx->uid = cmd.uid;
-		mutex_unlock(&file->mut);
+		}
+		rdma_unlock_handler(ctx->cm_id);
+		mutex_unlock(&ctx->mutex);
 	} else {
 		mutex_lock(&ctx->mutex);
-		ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
+		rdma_lock_handler(ctx->cm_id);
+		ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+		rdma_unlock_handler(ctx->cm_id);
 		mutex_unlock(&ctx->mutex);
 	}
 	ucma_put_ctx(ctx);
@@ -1482,44 +1450,52 @@ static ssize_t ucma_process_join(struct ucma_file *file,
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	mutex_lock(&file->mut);
-	mc = ucma_alloc_multicast(ctx);
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 	if (!mc) {
 		ret = -ENOMEM;
-		goto err1;
+		goto err_put_ctx;
 	}
+
+	mc->ctx = ctx;
 	mc->join_state = join_state;
 	mc->uid = cmd->uid;
 	memcpy(&mc->addr, addr, cmd->addr_size);
+
+	if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+		     GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto err_free_mc;
+	}
+
 	mutex_lock(&ctx->mutex);
 	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
 				  join_state, mc);
 	mutex_unlock(&ctx->mutex);
 	if (ret)
-		goto err2;
+		goto err_xa_erase;
 
 	resp.id = mc->id;
 	if (copy_to_user(u64_to_user_ptr(cmd->response),
 			 &resp, sizeof(resp))) {
 		ret = -EFAULT;
-		goto err3;
+		goto err_leave_multicast;
 	}
 
 	xa_store(&multicast_table, mc->id, mc, 0);
 
-	mutex_unlock(&file->mut);
 	ucma_put_ctx(ctx);
 	return 0;
 
-err3:
+err_leave_multicast:
+	mutex_lock(&ctx->mutex);
 	rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+	mutex_unlock(&ctx->mutex);
 	ucma_cleanup_mc_events(mc);
-err2:
+err_xa_erase:
 	xa_erase(&multicast_table, mc->id);
-	list_del(&mc->list);
+err_free_mc:
 	kfree(mc);
-err1:
-	mutex_unlock(&file->mut);
+err_put_ctx:
 	ucma_put_ctx(ctx);
 	return ret;
 }
@@ -1581,7 +1557,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
 	mc = xa_load(&multicast_table, cmd.id);
 	if (!mc)
 		mc = ERR_PTR(-ENOENT);
-	else if (mc->ctx->file != file)
+	else if (READ_ONCE(mc->ctx->file) != file)
 		mc = ERR_PTR(-EINVAL);
 	else if (!refcount_inc_not_zero(&mc->ctx->ref))
 		mc = ERR_PTR(-ENXIO);
@@ -1598,10 +1574,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
 	rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
 	mutex_unlock(&mc->ctx->mutex);
 
-	mutex_lock(&mc->ctx->file->mut);
 	ucma_cleanup_mc_events(mc);
-	list_del(&mc->list);
-	mutex_unlock(&mc->ctx->file->mut);
 
 	ucma_put_ctx(mc->ctx);
 	resp.events_reported = mc->events_reported;
@@ -1614,45 +1587,15 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
 	return ret;
 }
 
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
-	/* Acquire mutex's based on pointer comparison to prevent deadlock. */
-	if (file1 < file2) {
-		mutex_lock(&file1->mut);
-		mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
-	} else {
-		mutex_lock(&file2->mut);
-		mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
-	}
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
-	if (file1 < file2) {
-		mutex_unlock(&file2->mut);
-		mutex_unlock(&file1->mut);
-	} else {
-		mutex_unlock(&file1->mut);
-		mutex_unlock(&file2->mut);
-	}
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
-	struct ucma_event *uevent, *tmp;
-
-	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
-		if (uevent->ctx == ctx)
-			list_move_tail(&uevent->list, &file->event_list);
-}
-
 static ssize_t ucma_migrate_id(struct ucma_file *new_file,
 			       const char __user *inbuf,
 			       int in_len, int out_len)
 {
 	struct rdma_ucm_migrate_id cmd;
 	struct rdma_ucm_migrate_resp resp;
+	struct ucma_event *uevent, *tmp;
 	struct ucma_context *ctx;
+	LIST_HEAD(event_list);
 	struct fd f;
 	struct ucma_file *cur_file;
 	int ret = 0;
@@ -1668,40 +1611,53 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
 		ret = -EINVAL;
 		goto file_put;
 	}
+	cur_file = f.file->private_data;
 
 	/* Validate current fd and prevent destruction of id. */
-	ctx = ucma_get_ctx(f.file->private_data, cmd.id);
+	ctx = ucma_get_ctx(cur_file, cmd.id);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
 		goto file_put;
 	}
 
-	cur_file = ctx->file;
-	if (cur_file == new_file) {
-		resp.events_reported = ctx->events_reported;
-		goto response;
-	}
-
+	rdma_lock_handler(ctx->cm_id);
 	/*
-	 * Migrate events between fd's, maintaining order, and avoiding new
-	 * events being added before existing events.
+	 * ctx->file can only be changed under the handler & xa_lock. xa_load()
+	 * must be checked again to ensure the ctx hasn't begun destruction
+	 * since the ucma_get_ctx().
 	 */
-	ucma_lock_files(cur_file, new_file);
 	xa_lock(&ctx_table);
-
-	list_move_tail(&ctx->list, &new_file->ctx_list);
-	ucma_move_events(ctx, new_file);
+	if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+		xa_unlock(&ctx_table);
+		ret = -ENOENT;
+		goto err_unlock;
+	}
 	ctx->file = new_file;
-	resp.events_reported = ctx->events_reported;
-
 	xa_unlock(&ctx_table);
-	ucma_unlock_files(cur_file, new_file);
 
-response:
+	mutex_lock(&cur_file->mut);
+	list_del(&ctx->list);
+	/*
+	 * At this point lock_handler() prevents addition of new uevents for
+	 * this ctx.
+	 */
+	list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+		if (uevent->ctx == ctx)
+			list_move_tail(&uevent->list, &event_list);
+	resp.events_reported = ctx->events_reported;
+	mutex_unlock(&cur_file->mut);
+
+	mutex_lock(&new_file->mut);
+	list_add_tail(&ctx->list, &new_file->ctx_list);
+	list_splice_tail(&event_list, &new_file->event_list);
+	mutex_unlock(&new_file->mut);
+
 	if (copy_to_user(u64_to_user_ptr(cmd.response),
 			 &resp, sizeof(resp)))
 		ret = -EFAULT;
 
+err_unlock:
+	rdma_unlock_handler(ctx->cm_id);
 	ucma_put_ctx(ctx);
 file_put:
 	fdput(f);
@@ -1801,13 +1757,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
 	if (!file)
 		return -ENOMEM;
 
-	file->close_wq = alloc_ordered_workqueue("ucma_close_id",
-						 WQ_MEM_RECLAIM);
-	if (!file->close_wq) {
-		kfree(file);
-		return -ENOMEM;
-	}
-
 	INIT_LIST_HEAD(&file->event_list);
 	INIT_LIST_HEAD(&file->ctx_list);
 	init_waitqueue_head(&file->poll_wait);
@@ -1822,37 +1771,22 @@ static int ucma_open(struct inode *inode, struct file *filp)
 static int ucma_close(struct inode *inode, struct file *filp)
 {
 	struct ucma_file *file = filp->private_data;
-	struct ucma_context *ctx, *tmp;
 
-	mutex_lock(&file->mut);
-	list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
-		ctx->destroying = 1;
-		mutex_unlock(&file->mut);
+	/*
+	 * All paths that touch ctx_list or ctx_list starting from write() are
+	 * prevented by this being a FD release function. The list_add_tail() in
+	 * ucma_connect_event_handler() can run concurrently, however it only
+	 * adds to the list *after* a listening ID. By only reading the first of
+	 * the list, and relying on __destroy_id() to block
+	 * ucma_connect_event_handler(), no additional locking is needed.
+	 */
+	while (!list_empty(&file->ctx_list)) {
+		struct ucma_context *ctx = list_first_entry(
+			&file->ctx_list, struct ucma_context, list);
 
 		xa_erase(&ctx_table, ctx->id);
-		flush_workqueue(file->close_wq);
-		/* At that step once ctx was marked as destroying and workqueue
-		 * was flushed we are safe from any inflights handlers that
-		 * might put other closing task.
-		 */
-		xa_lock(&ctx_table);
-		if (!ctx->closing) {
-			xa_unlock(&ctx_table);
-			ucma_put_ctx(ctx);
-			wait_for_completion(&ctx->comp);
-			/* rdma_destroy_id ensures that no event handlers are
-			 * inflight for that id before releasing it.
-			 */
-			rdma_destroy_id(ctx->cm_id);
-		} else {
-			xa_unlock(&ctx_table);
-		}
-
-		ucma_free_ctx(ctx);
-		mutex_lock(&file->mut);
+		__destroy_id(ctx);
 	}
-	mutex_unlock(&file->mut);
-	destroy_workqueue(file->close_wq);
 	kfree(file);
 	return 0;
 }
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 831bff8..e9fecbd 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/count_zeros.h>
 #include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
@@ -60,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	sg_free_table(&umem->sg_head);
 }
 
-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
-						struct page **page_list,
-						unsigned long npages,
-						unsigned int max_seg_sz,
-						int *nents)
-{
-	unsigned long first_pfn;
-	unsigned long i = 0;
-	bool update_cur_sg = false;
-	bool first = !sg_page(sg);
-
-	/* Check if new page_list is contiguous with end of previous page_list.
-	 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
-	 */
-	if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
-		       page_to_pfn(page_list[0])))
-		update_cur_sg = true;
-
-	while (i != npages) {
-		unsigned long len;
-		struct page *first_page = page_list[i];
-
-		first_pfn = page_to_pfn(first_page);
-
-		/* Compute the number of contiguous pages we have starting
-		 * at i
-		 */
-		for (len = 0; i != npages &&
-			      first_pfn + len == page_to_pfn(page_list[i]) &&
-			      len < (max_seg_sz >> PAGE_SHIFT);
-		     len++)
-			i++;
-
-		/* Squash N contiguous pages from page_list into current sge */
-		if (update_cur_sg) {
-			if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
-				sg_set_page(sg, sg_page(sg),
-					    sg->length + (len << PAGE_SHIFT),
-					    0);
-				update_cur_sg = false;
-				continue;
-			}
-			update_cur_sg = false;
-		}
-
-		/* Squash N contiguous pages into next sge or first sge */
-		if (!first)
-			sg = sg_next(sg);
-
-		(*nents)++;
-		sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
-		first = false;
-	}
-
-	return sg;
-}
-
 /**
  * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
  *
@@ -146,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 				     unsigned long virt)
 {
 	struct scatterlist *sg;
-	unsigned int best_pg_bit;
 	unsigned long va, pgoff;
 	dma_addr_t mask;
 	int i;
 
+	/* rdma_for_each_block() has a bug if the page size is smaller than the
+	 * page size used to build the umem. For now prevent smaller page sizes
+	 * from being returned.
+	 */
+	pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
 	/* At minimum, drivers must support PAGE_SIZE or smaller */
 	if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
 		return 0;
 
-	va = virt;
-	/* max page size not to exceed MR length */
-	mask = roundup_pow_of_two(umem->length);
+	umem->iova = va = virt;
+	/* The best result is the smallest page size that results in the minimum
+	 * number of required pages. Compute the largest page size that could
+	 * work based on VA address bits that don't change.
+	 */
+	mask = pgsz_bitmap &
+	       GENMASK(BITS_PER_LONG - 1,
+		       bits_per((umem->length - 1 + virt) ^ virt));
 	/* offset into first SGL */
 	pgoff = umem->address & ~PAGE_MASK;
 
@@ -175,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 			mask |= va;
 		pgoff = 0;
 	}
-	best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
 
-	return BIT_ULL(best_pg_bit);
+	/* The mask accumulates 1's in each position where the VA and physical
+	 * address differ, thus the length of trailing 0 is the largest page
+	 * size that can pass the VA through to the physical.
+	 */
+	if (mask)
+		pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+	return rounddown_pow_of_two(pgsz_bitmap);
 }
 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 
@@ -201,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	struct mm_struct *mm;
 	unsigned long npages;
 	int ret;
-	struct scatterlist *sg;
+	struct scatterlist *sg = NULL;
 	unsigned int gup_flags = FOLL_WRITE;
 
 	/*
@@ -224,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	umem->ibdev      = device;
 	umem->length     = size;
 	umem->address    = addr;
+	/*
+	 * Drivers should call ib_umem_find_best_pgsz() to set the iova
+	 * correctly.
+	 */
+	umem->iova = addr;
 	umem->writable   = ib_access_writable(access);
 	umem->owning_mm = mm = current->mm;
 	mmgrab(mm);
@@ -251,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
 	cur_base = addr & PAGE_MASK;
 
-	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
-	if (ret)
-		goto vma;
-
 	if (!umem->writable)
 		gup_flags |= FOLL_FORCE;
 
-	sg = umem->sg_head.sgl;
-
 	while (npages) {
 		cond_resched();
 		ret = pin_user_pages_fast(cur_base,
@@ -271,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 			goto umem_release;
 
 		cur_base += ret * PAGE_SIZE;
-		npages   -= ret;
-
-		sg = ib_umem_add_sg_table(sg, page_list, ret,
-			dma_get_max_seg_size(device->dma_device),
-			&umem->sg_nents);
+		npages -= ret;
+		sg = __sg_alloc_table_from_pages(
+			&umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
+			dma_get_max_seg_size(device->dma_device), sg, npages,
+			GFP_KERNEL);
+		umem->sg_nents = umem->sg_head.nents;
+		if (IS_ERR(sg)) {
+			unpin_user_pages_dirty_lock(page_list, ret, 0);
+			ret = PTR_ERR(sg);
+			goto umem_release;
+		}
 	}
 
-	sg_mark_end(sg);
-
 	if (access & IB_ACCESS_RELAXED_ORDERING)
 		dma_attr |= DMA_ATTR_WEAK_ORDERING;
 
@@ -297,7 +249,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
 umem_release:
 	__ib_umem_release(device, umem, 0);
-vma:
 	atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
 out:
 	free_page((unsigned long) page_list);
@@ -329,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem)
 }
 EXPORT_SYMBOL(ib_umem_release);
 
-int ib_umem_page_count(struct ib_umem *umem)
-{
-	int i, n = 0;
-	struct scatterlist *sg;
-
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
-		n += sg_dma_len(sg) >> PAGE_SHIFT;
-
-	return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
 /*
  * Copy from the given ib_umem's pages to the given buffer.
  *
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cc6b4be..323f6cf 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -40,6 +40,7 @@
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
 #include <linux/interval_tree.h>
+#include <linux/hmm.h>
 #include <linux/pagemap.h>
 
 #include <rdma/ib_verbs.h>
@@ -60,7 +61,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 		size_t page_size = 1UL << umem_odp->page_shift;
 		unsigned long start;
 		unsigned long end;
-		size_t pages;
+		size_t ndmas, npfns;
 
 		start = ALIGN_DOWN(umem_odp->umem.address, page_size);
 		if (check_add_overflow(umem_odp->umem.address,
@@ -71,20 +72,21 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 		if (unlikely(end < page_size))
 			return -EOVERFLOW;
 
-		pages = (end - start) >> umem_odp->page_shift;
-		if (!pages)
+		ndmas = (end - start) >> umem_odp->page_shift;
+		if (!ndmas)
 			return -EINVAL;
 
-		umem_odp->page_list = kvcalloc(
-			pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
-		if (!umem_odp->page_list)
+		npfns = (end - start) >> PAGE_SHIFT;
+		umem_odp->pfn_list = kvcalloc(
+			npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+		if (!umem_odp->pfn_list)
 			return -ENOMEM;
 
 		umem_odp->dma_list = kvcalloc(
-			pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+			ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
 		if (!umem_odp->dma_list) {
 			ret = -ENOMEM;
-			goto out_page_list;
+			goto out_pfn_list;
 		}
 
 		ret = mmu_interval_notifier_insert(&umem_odp->notifier,
@@ -98,8 +100,8 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 
 out_dma_list:
 	kvfree(umem_odp->dma_list);
-out_page_list:
-	kvfree(umem_odp->page_list);
+out_pfn_list:
+	kvfree(umem_odp->pfn_list);
 	return ret;
 }
 
@@ -276,7 +278,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 		mutex_unlock(&umem_odp->umem_mutex);
 		mmu_interval_notifier_remove(&umem_odp->notifier);
 		kvfree(umem_odp->dma_list);
-		kvfree(umem_odp->page_list);
+		kvfree(umem_odp->pfn_list);
 	}
 	put_pid(umem_odp->tgid);
 	kfree(umem_odp);
@@ -287,87 +289,56 @@ EXPORT_SYMBOL(ib_umem_odp_release);
  * Map for DMA and insert a single page into the on-demand paging page tables.
  *
  * @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
+ * @dma_index: index in the umem to add the dma to.
  * @page: the page struct to map and add.
  * @access_mask: access permissions needed for this page.
  * @current_seq: sequence number for synchronization with invalidations.
  *               the sequence number is taken from
  *               umem_odp->notifiers_seq.
  *
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ * The function returns -EFAULT if the DMA mapping operation fails.
  *
- * The page is released via put_page even if the operation failed. For on-demand
- * pinning, the page is released whenever it isn't stored in the umem.
  */
 static int ib_umem_odp_map_dma_single_page(
 		struct ib_umem_odp *umem_odp,
-		unsigned int page_index,
+		unsigned int dma_index,
 		struct page *page,
-		u64 access_mask,
-		unsigned long current_seq)
+		u64 access_mask)
 {
 	struct ib_device *dev = umem_odp->umem.ibdev;
-	dma_addr_t dma_addr;
-	int ret = 0;
+	dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
 
-	if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
-		ret = -EAGAIN;
-		goto out;
-	}
-	if (!(umem_odp->dma_list[page_index])) {
-		dma_addr =
-			ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
-					DMA_BIDIRECTIONAL);
-		if (ib_dma_mapping_error(dev, dma_addr)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		umem_odp->dma_list[page_index] = dma_addr | access_mask;
-		umem_odp->page_list[page_index] = page;
-		umem_odp->npages++;
-	} else if (umem_odp->page_list[page_index] == page) {
-		umem_odp->dma_list[page_index] |= access_mask;
-	} else {
+	if (*dma_addr) {
 		/*
-		 * This is a race here where we could have done:
-		 *
-		 *         CPU0                             CPU1
-		 *   get_user_pages()
-		 *                                       invalidate()
-		 *                                       page_fault()
-		 *   mutex_lock(umem_mutex)
-		 *    page from GUP != page in ODP
-		 *
-		 * It should be prevented by the retry test above as reading
-		 * the seq number should be reliable under the
-		 * umem_mutex. Thus something is really not working right if
-		 * things get here.
+		 * If the page is already dma mapped it means it went through
+		 * a non-invalidating trasition, like read-only to writable.
+		 * Resync the flags.
 		 */
-		WARN(true,
-		     "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
-		     umem_odp->page_list[page_index], page);
-		ret = -EAGAIN;
+		*dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask;
+		return 0;
 	}
 
-out:
-	put_page(page);
-	return ret;
+	*dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
+				    DMA_BIDIRECTIONAL);
+	if (ib_dma_mapping_error(dev, *dma_addr)) {
+		*dma_addr = 0;
+		return -EFAULT;
+	}
+	umem_odp->npages++;
+	*dma_addr |= access_mask;
+	return 0;
 }
 
 /**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
  *
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
  *
  * Returns the number of pages mapped in success, negative error code
  * for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
  * @umem_odp: the umem to map and pin
  * @user_virt: the address from which we need to map.
  * @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -376,21 +347,19 @@ static int ib_umem_odp_map_dma_single_page(
  *        the return value.
  * @access_mask: bit mask of the requested access permissions for the given
  *               range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- *               invalidations. the sequance number is read from
- *               umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
  */
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
-			      u64 bcnt, u64 access_mask,
-			      unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+				 u64 bcnt, u64 access_mask, bool fault)
+			__acquires(&umem_odp->umem_mutex)
 {
 	struct task_struct *owning_process  = NULL;
 	struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
-	struct page       **local_page_list = NULL;
-	u64 page_mask, off;
-	int j, k, ret = 0, start_idx, npages = 0;
-	unsigned int flags = 0, page_shift;
-	phys_addr_t p = 0;
+	int pfn_index, dma_index, ret = 0, start_idx;
+	unsigned int page_shift, hmm_order, pfn_start_idx;
+	unsigned long num_pfns, current_seq;
+	struct hmm_range range = {};
+	unsigned long timeout;
 
 	if (access_mask == 0)
 		return -EINVAL;
@@ -399,15 +368,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
 	    user_virt + bcnt > ib_umem_end(umem_odp))
 		return -EFAULT;
 
-	local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
-	if (!local_page_list)
-		return -ENOMEM;
-
 	page_shift = umem_odp->page_shift;
-	page_mask = ~(BIT(page_shift) - 1);
-	off = user_virt & (~page_mask);
-	user_virt = user_virt & page_mask;
-	bcnt += off; /* Charge for the first page offset as well. */
 
 	/*
 	 * owning_process is allowed to be NULL, this means somehow the mm is
@@ -420,99 +381,104 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
 		goto out_put_task;
 	}
 
-	if (access_mask & ODP_WRITE_ALLOWED_BIT)
-		flags |= FOLL_WRITE;
+	range.notifier = &umem_odp->notifier;
+	range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+	range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+	pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+	num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+	if (fault) {
+		range.default_flags = HMM_PFN_REQ_FAULT;
 
-	start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
-	k = start_idx;
+		if (access_mask & ODP_WRITE_ALLOWED_BIT)
+			range.default_flags |= HMM_PFN_REQ_WRITE;
+	}
 
-	while (bcnt > 0) {
-		const size_t gup_num_pages = min_t(size_t,
-				ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
-				PAGE_SIZE / sizeof(struct page *));
+	range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
+	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 
-		mmap_read_lock(owning_mm);
-		/*
-		 * Note: this might result in redundent page getting. We can
-		 * avoid this by checking dma_list to be 0 before calling
-		 * get_user_pages. However, this make the code much more
-		 * complex (and doesn't gain us much performance in most use
-		 * cases).
-		 */
-		npages = get_user_pages_remote(owning_mm,
-				user_virt, gup_num_pages,
-				flags, local_page_list, NULL, NULL);
-		mmap_read_unlock(owning_mm);
+retry:
+	current_seq = range.notifier_seq =
+		mmu_interval_read_begin(&umem_odp->notifier);
 
-		if (npages < 0) {
-			if (npages != -EAGAIN)
-				pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
-			else
-				pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
-			break;
-		}
+	mmap_read_lock(owning_mm);
+	ret = hmm_range_fault(&range);
+	mmap_read_unlock(owning_mm);
+	if (unlikely(ret)) {
+		if (ret == -EBUSY && !time_after(jiffies, timeout))
+			goto retry;
+		goto out_put_mm;
+	}
 
-		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
-		mutex_lock(&umem_odp->umem_mutex);
-		for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
-			if (user_virt & ~page_mask) {
-				p += PAGE_SIZE;
-				if (page_to_phys(local_page_list[j]) != p) {
-					ret = -EFAULT;
-					break;
-				}
-				put_page(local_page_list[j]);
+	start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+	dma_index = start_idx;
+
+	mutex_lock(&umem_odp->umem_mutex);
+	if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
+		mutex_unlock(&umem_odp->umem_mutex);
+		goto retry;
+	}
+
+	for (pfn_index = 0; pfn_index < num_pfns;
+		pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
+
+		if (fault) {
+			/*
+			 * Since we asked for hmm_range_fault() to populate
+			 * pages it shouldn't return an error entry on success.
+			 */
+			WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+			WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+		} else {
+			if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) {
+				WARN_ON(umem_odp->dma_list[dma_index]);
 				continue;
 			}
-
-			ret = ib_umem_odp_map_dma_single_page(
-					umem_odp, k, local_page_list[j],
-					access_mask, current_seq);
-			if (ret < 0) {
-				if (ret != -EAGAIN)
-					pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
-				else
-					pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
-				break;
-			}
-
-			p = page_to_phys(local_page_list[j]);
-			k++;
+			access_mask = ODP_READ_ALLOWED_BIT;
+			if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE)
+				access_mask |= ODP_WRITE_ALLOWED_BIT;
 		}
-		mutex_unlock(&umem_odp->umem_mutex);
 
+		hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+		/* If a hugepage was detected and ODP wasn't set for, the umem
+		 * page_shift will be used, the opposite case is an error.
+		 */
+		if (hmm_order + PAGE_SHIFT < page_shift) {
+			ret = -EINVAL;
+			ibdev_dbg(umem_odp->umem.ibdev,
+				  "%s: un-expected hmm_order %d, page_shift %d\n",
+				  __func__, hmm_order, page_shift);
+			break;
+		}
+
+		ret = ib_umem_odp_map_dma_single_page(
+				umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]),
+				access_mask);
 		if (ret < 0) {
-			/*
-			 * Release pages, remembering that the first page
-			 * to hit an error was already released by
-			 * ib_umem_odp_map_dma_single_page().
-			 */
-			if (npages - (j + 1) > 0)
-				release_pages(&local_page_list[j+1],
-					      npages - (j + 1));
+			ibdev_dbg(umem_odp->umem.ibdev,
+				  "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
 			break;
 		}
 	}
+	/* upon sucesss lock should stay on hold for the callee */
+	if (!ret)
+		ret = dma_index - start_idx;
+	else
+		mutex_unlock(&umem_odp->umem_mutex);
 
-	if (ret >= 0) {
-		if (npages < 0 && k == start_idx)
-			ret = npages;
-		else
-			ret = k - start_idx;
-	}
-
+out_put_mm:
 	mmput(owning_mm);
 out_put_task:
 	if (owning_process)
 		put_task_struct(owning_process);
-	free_page((unsigned long)local_page_list);
 	return ret;
 }
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
 
 void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 				 u64 bound)
 {
+	dma_addr_t dma_addr;
+	dma_addr_t dma;
 	int idx;
 	u64 addr;
 	struct ib_device *dev = umem_odp->umem.ibdev;
@@ -521,20 +487,16 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 
 	virt = max_t(u64, virt, ib_umem_start(umem_odp));
 	bound = min_t(u64, bound, ib_umem_end(umem_odp));
-	/* Note that during the run of this function, the
-	 * notifiers_count of the MR is > 0, preventing any racing
-	 * faults from completion. We might be racing with other
-	 * invalidations, so we must make sure we free each page only
-	 * once. */
 	for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
 		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
-		if (umem_odp->page_list[idx]) {
-			struct page *page = umem_odp->page_list[idx];
-			dma_addr_t dma = umem_odp->dma_list[idx];
-			dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+		dma = umem_odp->dma_list[idx];
 
-			WARN_ON(!dma_addr);
+		/* The access flags guaranteed a valid DMA address in case was NULL */
+		if (dma) {
+			unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+			struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
 
+			dma_addr = dma & ODP_DMA_ADDR_MASK;
 			ib_dma_unmap_page(dev, dma_addr,
 					  BIT(umem_odp->page_shift),
 					  DMA_BIDIRECTIONAL);
@@ -551,7 +513,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 				 */
 				set_page_dirty(head_page);
 			}
-			umem_odp->page_list[idx] = NULL;
 			umem_odp->dma_list[idx] = 0;
 			umem_odp->npages--;
 		}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2fbc583..418d133 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -218,10 +218,12 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
 	if (!ucontext)
 		return -ENOMEM;
 
-	ucontext->res.type = RDMA_RESTRACK_CTX;
 	ucontext->device = ib_dev;
 	ucontext->ufile = ufile;
 	xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+	rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+	rdma_restrack_set_name(&ucontext->res, NULL);
 	attrs->context = ucontext;
 	return 0;
 }
@@ -250,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
 	if (ret)
 		goto err_uncharge;
 
-	rdma_restrack_uadd(&ucontext->res);
+	rdma_restrack_add(&ucontext->res);
 
 	/*
 	 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
@@ -313,6 +315,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 err_uobj:
 	rdma_alloc_abort_uobject(uobj, attrs, false);
 err_ucontext:
+	rdma_restrack_put(&attrs->context->res);
 	kfree(attrs->context);
 	attrs->context = NULL;
 	return ret;
@@ -439,12 +442,14 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 	pd->device  = ib_dev;
 	pd->uobject = uobj;
 	atomic_set(&pd->usecnt, 0);
-	pd->res.type = RDMA_RESTRACK_PD;
+
+	rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+	rdma_restrack_set_name(&pd->res, NULL);
 
 	ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
 	if (ret)
 		goto err_alloc;
-	rdma_restrack_uadd(&pd->res);
+	rdma_restrack_add(&pd->res);
 
 	uobj->object = pd;
 	uobj_finalize_uobj_create(uobj, attrs);
@@ -453,6 +458,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 	return uverbs_response(attrs, &resp, sizeof(resp));
 
 err_alloc:
+	rdma_restrack_put(&pd->res);
 	kfree(pd);
 err:
 	uobj_alloc_abort(uobj, attrs);
@@ -742,9 +748,11 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
 	mr->sig_attrs = NULL;
 	mr->uobject = uobj;
 	atomic_inc(&pd->usecnt);
-	mr->res.type = RDMA_RESTRACK_MR;
 	mr->iova = cmd.hca_va;
-	rdma_restrack_uadd(&mr->res);
+
+	rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+	rdma_restrack_set_name(&mr->res, NULL);
+	rdma_restrack_add(&mr->res);
 
 	uobj->object = mr;
 	uobj_put_obj_read(pd);
@@ -858,7 +866,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
 static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uverbs_alloc_mw      cmd;
-	struct ib_uverbs_alloc_mw_resp resp;
+	struct ib_uverbs_alloc_mw_resp resp = {};
 	struct ib_uobject             *uobj;
 	struct ib_pd                  *pd;
 	struct ib_mw                  *mw;
@@ -884,15 +892,21 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 		goto err_put;
 	}
 
-	mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
-	if (IS_ERR(mw)) {
-		ret = PTR_ERR(mw);
+	mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+	if (!mw) {
+		ret = -ENOMEM;
 		goto err_put;
 	}
 
-	mw->device  = pd->device;
-	mw->pd      = pd;
+	mw->device = ib_dev;
+	mw->pd = pd;
 	mw->uobject = uobj;
+	mw->type = cmd.mw_type;
+
+	ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+	if (ret)
+		goto err_alloc;
+
 	atomic_inc(&pd->usecnt);
 
 	uobj->object = mw;
@@ -903,6 +917,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
 	resp.mw_handle = uobj->id;
 	return uverbs_response(attrs, &resp, sizeof(resp));
 
+err_alloc:
+	kfree(mw);
 err_put:
 	uobj_put_obj_read(pd);
 err_free:
@@ -994,12 +1010,14 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
 	cq->event_handler = ib_uverbs_cq_event_handler;
 	cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
 	atomic_set(&cq->usecnt, 0);
-	cq->res.type = RDMA_RESTRACK_CQ;
+
+	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+	rdma_restrack_set_name(&cq->res, NULL);
 
 	ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
 	if (ret)
 		goto err_free;
-	rdma_restrack_uadd(&cq->res);
+	rdma_restrack_add(&cq->res);
 
 	obj->uevent.uobject.object = cq;
 	obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
@@ -1013,6 +1031,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs,
 	return uverbs_response(attrs, &resp, sizeof(resp));
 
 err_free:
+	rdma_restrack_put(&cq->res);
 	kfree(cq);
 err_file:
 	if (ev_file)
@@ -1237,8 +1256,21 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
 	bool has_sq = true;
 	struct ib_device *ib_dev;
 
-	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
-		return -EPERM;
+	switch (cmd->qp_type) {
+	case IB_QPT_RAW_PACKET:
+		if (!capable(CAP_NET_RAW))
+			return -EPERM;
+		break;
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+	case IB_QPT_UD:
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:
+	case IB_QPT_DRIVER:
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
 						 &ib_dev);
@@ -2985,11 +3017,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uverbs_ex_create_rwq_ind_table cmd;
 	struct ib_uverbs_ex_create_rwq_ind_table_resp  resp = {};
-	struct ib_uobject		  *uobj;
+	struct ib_uobject *uobj;
 	int err;
 	struct ib_rwq_ind_table_init_attr init_attr = {};
 	struct ib_rwq_ind_table *rwq_ind_tbl;
-	struct ib_wq	**wqs = NULL;
+	struct ib_wq **wqs = NULL;
 	u32 *wqs_handles = NULL;
 	struct ib_wq	*wq = NULL;
 	int i, num_read_wqs;
@@ -3047,17 +3079,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 		goto put_wqs;
 	}
 
-	init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
-	init_attr.ind_tbl = wqs;
-
-	rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
-						       &attrs->driver_udata);
-
-	if (IS_ERR(rwq_ind_tbl)) {
-		err = PTR_ERR(rwq_ind_tbl);
+	rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+	if (!rwq_ind_tbl) {
+		err = -ENOMEM;
 		goto err_uobj;
 	}
 
+	init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+	init_attr.ind_tbl = wqs;
+
 	rwq_ind_tbl->ind_tbl = wqs;
 	rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
 	rwq_ind_tbl->uobject = uobj;
@@ -3065,6 +3095,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 	rwq_ind_tbl->device = ib_dev;
 	atomic_set(&rwq_ind_tbl->usecnt, 0);
 
+	err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+					       &attrs->driver_udata);
+	if (err)
+		goto err_create;
+
 	for (i = 0; i < num_wq_handles; i++)
 		rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
 					UVERBS_LOOKUP_READ);
@@ -3076,6 +3111,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
 	resp.response_length = uverbs_response_length(attrs, sizeof(resp));
 	return uverbs_response(attrs, &resp, sizeof(resp));
 
+err_create:
+	kfree(rwq_ind_tbl);
 err_uobj:
 	uobj_alloc_abort(uobj, attrs);
 put_wqs:
@@ -3232,8 +3269,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
 		goto err_free;
 	}
 
-	flow_id = qp->device->ops.create_flow(
-		qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+	flow_id = qp->device->ops.create_flow(qp, flow_attr,
+					      &attrs->driver_udata);
 
 	if (IS_ERR(flow_id)) {
 		err = PTR_ERR(flow_id);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index a4ba0b8..4bb7c64 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -108,8 +108,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
 	int ret;
 
 	ret = mw->device->ops.dealloc_mw(mw);
-	if (!ret)
-		atomic_dec(&pd->usecnt);
+	if (ret)
+		return ret;
+
+	atomic_dec(&pd->usecnt);
+	kfree(mw);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 08c39cf..0658101 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -81,12 +81,20 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
 {
 	struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
 	struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-	int ret;
+	u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+	int ret, i;
 
-	ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+	if (atomic_read(&rwq_ind_tbl->usecnt))
+		return -EBUSY;
+
+	ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
 	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
+	for (i = 0; i < table_size; i++)
+		atomic_dec(&ind_tbl[i]->usecnt);
+
+	kfree(rwq_ind_tbl);
 	kfree(ind_tbl);
 	return ret;
 }
@@ -122,8 +130,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 	if (ret)
 		return ret;
 
-	ib_dealloc_pd_user(pd, &attrs->driver_udata);
-	return 0;
+	return ib_dealloc_pd_user(pd, &attrs->driver_udata);
 }
 
 void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index c7e7438..b3c6c06 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
 	if (ret)
 		return ret;
 
-	counters->device->ops.destroy_counters(counters);
+	ret = counters->device->ops.destroy_counters(counters);
+	if (ret)
+		return ret;
 	kfree(counters);
 	return 0;
 }
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index b1c7dacc..8dabd05 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -33,6 +33,7 @@
 #include <rdma/uverbs_std_types.h>
 #include "rdma_core.h"
 #include "uverbs.h"
+#include "restrack.h"
 
 static int uverbs_free_cq(struct ib_uobject *uobject,
 			  enum rdma_remove_reason why,
@@ -123,7 +124,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	cq->event_handler = ib_uverbs_cq_event_handler;
 	cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
 	atomic_set(&cq->usecnt, 0);
-	cq->res.type = RDMA_RESTRACK_CQ;
+
+	rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+	rdma_restrack_set_name(&cq->res, NULL);
 
 	ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
 	if (ret)
@@ -131,7 +134,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 
 	obj->uevent.uobject.object = cq;
 	obj->uevent.uobject.user_handle = user_handle;
-	rdma_restrack_uadd(&cq->res);
+	rdma_restrack_add(&cq->res);
 	uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
 
 	ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -139,6 +142,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
 	return ret;
 
 err_free:
+	rdma_restrack_put(&cq->res);
 	kfree(cq);
 err_event_file:
 	if (obj->uevent.event_file)
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 75df2094..f367d52 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -3,11 +3,13 @@
  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
  */
 
+#include <linux/overflow.h>
 #include <rdma/uverbs_std_types.h>
 #include "rdma_core.h"
 #include "uverbs.h"
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
 
 /*
  * This ioctl method allows calling any defined write or write_ex
@@ -165,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
 	resp->subnet_timeout = attr->subnet_timeout;
 	resp->init_type_reply = attr->init_type_reply;
 	resp->active_width = attr->active_width;
-	resp->active_speed = attr->active_speed;
+	/* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+	resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
 	resp->phys_state = attr->phys_state;
 	resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
 }
@@ -265,6 +268,172 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
 	return ucontext->device->ops.query_ucontext(ucontext, attrs);
 }
 
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+				    struct ib_uverbs_gid_entry *entries,
+				    size_t num_entries, size_t user_entry_size)
+{
+	const struct uverbs_attr *attr;
+	void __user *user_entries;
+	size_t copy_len;
+	int ret;
+	int i;
+
+	if (user_entry_size == sizeof(*entries)) {
+		ret = uverbs_copy_to(attrs,
+				     UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+				     entries, sizeof(*entries) * num_entries);
+		return ret;
+	}
+
+	copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+	attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+	for (i = 0; i < num_entries; i++) {
+		if (copy_to_user(user_entries, entries, copy_len))
+			return -EFAULT;
+
+		if (user_entry_size > sizeof(*entries)) {
+			if (clear_user(user_entries + sizeof(*entries),
+				       user_entry_size - sizeof(*entries)))
+				return -EFAULT;
+		}
+
+		entries++;
+		user_entries += user_entry_size;
+	}
+
+	return uverbs_output_written(attrs,
+				     UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_gid_entry *entries;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+	size_t user_entry_size;
+	ssize_t num_entries;
+	size_t max_entries;
+	size_t num_bytes;
+	u32 flags;
+	int ret;
+
+	ret = uverbs_get_flags32(&flags, attrs,
+				 UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&user_entry_size, attrs,
+			       UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+	if (ret)
+		return ret;
+
+	max_entries = uverbs_attr_ptr_get_array_size(
+		attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+		user_entry_size);
+	if (max_entries <= 0)
+		return -EINVAL;
+
+	ucontext = ib_uverbs_get_ucontext(attrs);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
+	if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
+		return -EINVAL;
+
+	entries = uverbs_zalloc(attrs, num_bytes);
+	if (!entries)
+		return -ENOMEM;
+
+	num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+	if (num_entries < 0)
+		return -EINVAL;
+
+	ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+				       user_entry_size);
+	if (ret)
+		return ret;
+
+	ret = uverbs_copy_to(attrs,
+			     UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+			     &num_entries, sizeof(num_entries));
+	return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_gid_entry entry = {};
+	const struct ib_gid_attr *gid_attr;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+	struct net_device *ndev;
+	u32 gid_index;
+	u32 port_num;
+	u32 flags;
+	int ret;
+
+	ret = uverbs_get_flags32(&flags, attrs,
+				 UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&port_num, attrs,
+			       UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+	if (ret)
+		return ret;
+
+	ret = uverbs_get_const(&gid_index, attrs,
+			       UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+	if (ret)
+		return ret;
+
+	ucontext = ib_uverbs_get_ucontext(attrs);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
+	if (!rdma_is_port_valid(ib_dev, port_num))
+		return -EINVAL;
+
+	if (!rdma_ib_or_roce(ib_dev, port_num))
+		return -EOPNOTSUPP;
<