Merge tag 'core-debugobjects-2023-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull debugobjects fixes from Thomas Gleixner:
"Two fixes for debugobjects:
- Prevent the allocation path from waking up kswapd.
That's a long standing issue due to the GFP_ATOMIC allocation flag.
As debug objects can be invoked from pretty much any context waking
kswapd can end up in arbitrary lock chains versus the waitqueue
lock
- Correct the explicit lockdep wait-type violation in
debug_object_fill_pool()"
* tag 'core-debugobjects-2023-05-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
debugobjects: Don't wake up kswapd from fill_pool()
debugobjects,locking: Annotate debug_object_fill_pool() wait type violation
diff --git a/.clang-format b/.clang-format
index d988e9f..0d1ed87 100644
--- a/.clang-format
+++ b/.clang-format
@@ -520,7 +520,7 @@
- 'of_property_for_each_string'
- 'of_property_for_each_u32'
- 'pci_bus_for_each_resource'
- - 'pci_doe_for_each_off'
+ - 'pci_dev_for_each_resource'
- 'pcl_for_each_chunk'
- 'pcl_for_each_segment'
- 'pcm_for_each_format'
diff --git a/.gitignore b/.gitignore
index 70ec603..7f86e08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,6 +103,7 @@
!.get_maintainer.ignore
!.gitattributes
!.gitignore
+!.kunitconfig
!.mailmap
!.rustfmt.toml
diff --git a/.mailmap b/.mailmap
index e424863..bf076bb 100644
--- a/.mailmap
+++ b/.mailmap
@@ -213,7 +213,10 @@
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
-Jens Axboe <axboe@suse.de>
+Jens Axboe <axboe@kernel.dk> <axboe@suse.de>
+Jens Axboe <axboe@kernel.dk> <jens.axboe@oracle.com>
+Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
+Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
@@ -232,6 +235,8 @@
John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
+<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
+<jon.toppins+linux@gmail.com> <jtoppins@redhat.com>
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
<josh@joshtriplett.org> <josh@freedesktop.org>
<josh@joshtriplett.org> <josh@kernel.org>
@@ -297,6 +302,8 @@
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <m.othacehe@gmail.com>
+Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
+Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
@@ -324,6 +331,7 @@
Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
Mayuresh Janorkar <mayur@ti.com>
Michael Buesch <m@bues.ch>
+Michal Simek <michal.simek@amd.com> <michal.simek@xilinx.com>
Michel Dänzer <michel@tungstengraphics.com>
Michel Lespinasse <michel@lespinasse.org>
Michel Lespinasse <michel@lespinasse.org> <walken@google.com>
@@ -356,6 +364,12 @@
Nicolas Saenz Julienne <nsaenz@kernel.org> <nsaenzjulienne@suse.de>
Nicolas Saenz Julienne <nsaenz@kernel.org> <nsaenzjulienne@suse.com>
Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
+Nikolay Aleksandrov <razor@blackwall.org> <naleksan@redhat.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@redhat.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@cumulusnetworks.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@nvidia.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@isovalent.com>
+Oleksandr Natalenko <oleksandr@natalenko.name> <oleksandr@redhat.com>
Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
@@ -371,6 +385,8 @@
Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.ibm.com>
Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.vnet.ibm.com>
Paul E. McKenney <paulmck@kernel.org> <paulmck@us.ibm.com>
+Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org>
+Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com>
Peter A Jonsson <pj@ludd.ltu.se>
Peter Oruba <peter.oruba@amd.com>
Peter Oruba <peter@oruba.de>
diff --git a/CREDITS b/CREDITS
index 8470591..de7e4db 100644
--- a/CREDITS
+++ b/CREDITS
@@ -229,6 +229,10 @@
S: Notre Dame, Indiana
S: USA
+N: Kai Bankett
+E: chaosman@ontika.net
+D: QNX6 filesystem
+
N: Greg Banks
E: gnb@alphalink.com.au
D: IDT77105 ATM network driver
@@ -886,6 +890,10 @@
D: Several hardware monitoring drivers
S: France
+N: Frank "Jedi/Sector One" Denis
+E: j@pureftpd.org
+D: QNX4 filesystem
+
N: Peter Denison
E: peterd@pnd-pc.demon.co.uk
W: http://www.pnd-pc.demon.co.uk/promise/
@@ -1259,6 +1267,10 @@
N: Adam Fritzler
E: mid@zigamorph.net
+N: Richard "Scuba" A. Frowijn
+E: scuba@wxs.nl
+D: QNX4 filesystem
+
N: Fernando Fuganti
E: fuganti@conectiva.com.br
E: fuganti@netbank.com.br
@@ -1694,6 +1706,10 @@
S: D-69126 Heidelberg
S: Germany
+N: Neil Horman
+M: nhorman@tuxdriver.com
+D: SCTP protocol maintainer.
+
N: Simon Horman
M: horms@verge.net.au
D: Renesas ARM/ARM64 SoC maintainer
@@ -2218,6 +2234,10 @@
D: NUMA support, Slab allocators, Page migration
D: Scalability, Time subsystem
+N: Anders Larsen
+E: al@alarsen.net
+D: QNX4 filesystem
+
N: Paul Laufer
E: paul@laufernet.com
D: Soundblaster driver fixes, ISAPnP quirk
@@ -2494,8 +2514,8 @@
D: cfdisk (curses based disk partitioning program)
N: Mat Martineau
-E: mat@martineau.name
-D: MPTCP subsystem co-maintainer 2020-2023
+E: martineau@kernel.org
+D: MPTCP subsystem co-maintainer
D: Keyctl restricted keyring and Diffie-Hellman UAPI
D: Bluetooth L2CAP ERTM mode and AMP
S: USA
@@ -3459,6 +3479,11 @@
S: Oldenburg
S: Germany
+N: Mathieu Poirier
+E: mathieu.poirier@linaro.org
+D: CoreSight kernel subsystem, Maintainer 2014-2022
+D: Perf tool support for CoreSight
+
N: Robert Schwebel
E: robert@schwebel.de
W: https://www.schwebel.de
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot b/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
deleted file mode 100644
index ed6b52c..0000000
--- a/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
+++ /dev/null
@@ -1,23 +0,0 @@
-What: /sys/fs/selinux/checkreqprot
-Date: April 2005 (predates git)
-KernelVersion: 2.6.12-rc2 (predates git)
-Contact: selinux@vger.kernel.org
-Description:
-
- The selinuxfs "checkreqprot" node allows SELinux to be configured
- to check the protection requested by userspace for mmap/mprotect
- calls instead of the actual protection applied by the kernel.
- This was a compatibility mechanism for legacy userspace and
- for the READ_IMPLIES_EXEC personality flag. However, if set to
- 1, it weakens security by allowing mappings to be made executable
- without authorization by policy. The default value of checkreqprot
- at boot was changed starting in Linux v4.4 to 0 (i.e. check the
- actual protection), and Android and Linux distributions have been
- explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
- initialization for some time. Support for setting checkreqprot to 1
- will be removed no sooner than June 2021, at which point the kernel
- will always cease using checkreqprot internally and will always
- check the actual protections being applied upon mmap/mprotect calls.
- The checkreqprot selinuxfs node will remain for backward compatibility
- but will discard writes of the "0" value and will reject writes of the
- "1" value when this mechanism is removed.
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-disable b/Documentation/ABI/obsolete/sysfs-selinux-disable
deleted file mode 100644
index c340278..0000000
--- a/Documentation/ABI/obsolete/sysfs-selinux-disable
+++ /dev/null
@@ -1,26 +0,0 @@
-What: /sys/fs/selinux/disable
-Date: April 2005 (predates git)
-KernelVersion: 2.6.12-rc2 (predates git)
-Contact: selinux@vger.kernel.org
-Description:
-
- The selinuxfs "disable" node allows SELinux to be disabled at runtime
- prior to a policy being loaded into the kernel. If disabled via this
- mechanism, SELinux will remain disabled until the system is rebooted.
-
- The preferred method of disabling SELinux is via the "selinux=0" boot
- parameter, but the selinuxfs "disable" node was created to make it
- easier for systems with primitive bootloaders that did not allow for
- easy modification of the kernel command line. Unfortunately, allowing
- for SELinux to be disabled at runtime makes it difficult to secure the
- kernel's LSM hooks using the "__ro_after_init" feature.
-
- Thankfully, the need for the SELinux runtime disable appears to be
- gone, the default Kconfig configuration disables this selinuxfs node,
- and only one of the major distributions, Fedora, supports disabling
- SELinux at runtime. Fedora is in the process of removing the
- selinuxfs "disable" node and once that is complete we will start the
- slow process of removing this code from the kernel.
-
- More information on /sys/fs/selinux/disable can be found under the
- CONFIG_SECURITY_SELINUX_DISABLE Kconfig option.
diff --git a/Documentation/ABI/removed/sysfs-selinux-checkreqprot b/Documentation/ABI/removed/sysfs-selinux-checkreqprot
new file mode 100644
index 0000000..f599a0a
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-selinux-checkreqprot
@@ -0,0 +1,26 @@
+What: /sys/fs/selinux/checkreqprot
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ REMOVAL UPDATE: The SELinux checkreqprot functionality was removed in
+ March 2023, the original deprecation notice is shown below.
+
+ The selinuxfs "checkreqprot" node allows SELinux to be configured
+ to check the protection requested by userspace for mmap/mprotect
+ calls instead of the actual protection applied by the kernel.
+ This was a compatibility mechanism for legacy userspace and
+ for the READ_IMPLIES_EXEC personality flag. However, if set to
+ 1, it weakens security by allowing mappings to be made executable
+ without authorization by policy. The default value of checkreqprot
+ at boot was changed starting in Linux v4.4 to 0 (i.e. check the
+ actual protection), and Android and Linux distributions have been
+ explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
+ initialization for some time. Support for setting checkreqprot to 1
+ will be removed no sooner than June 2021, at which point the kernel
+ will always cease using checkreqprot internally and will always
+ check the actual protections being applied upon mmap/mprotect calls.
+ The checkreqprot selinuxfs node will remain for backward compatibility
+ but will discard writes of the "0" value and will reject writes of the
+ "1" value when this mechanism is removed.
diff --git a/Documentation/ABI/removed/sysfs-selinux-disable b/Documentation/ABI/removed/sysfs-selinux-disable
new file mode 100644
index 0000000..cb783c6
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-selinux-disable
@@ -0,0 +1,29 @@
+What: /sys/fs/selinux/disable
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ REMOVAL UPDATE: The SELinux runtime disable functionality was removed
+ in March 2023, the original deprecation notice is shown below.
+
+ The selinuxfs "disable" node allows SELinux to be disabled at runtime
+ prior to a policy being loaded into the kernel. If disabled via this
+ mechanism, SELinux will remain disabled until the system is rebooted.
+
+ The preferred method of disabling SELinux is via the "selinux=0" boot
+ parameter, but the selinuxfs "disable" node was created to make it
+ easier for systems with primitive bootloaders that did not allow for
+ easy modification of the kernel command line. Unfortunately, allowing
+ for SELinux to be disabled at runtime makes it difficult to secure the
+ kernel's LSM hooks using the "__ro_after_init" feature.
+
+ Thankfully, the need for the SELinux runtime disable appears to be
+ gone, the default Kconfig configuration disables this selinuxfs node,
+ and only one of the major distributions, Fedora, supports disabling
+ SELinux at runtime. Fedora is in the process of removing the
+ selinuxfs "disable" node and once that is complete we will start the
+ slow process of removing this code from the kernel.
+
+ More information on /sys/fs/selinux/disable can be found under the
+ CONFIG_SECURITY_SELINUX_DISABLE Kconfig option.
diff --git a/Documentation/ABI/stable/sysfs-acpi-pmprofile b/Documentation/ABI/stable/sysfs-acpi-pmprofile
index 2d6314f..cd55e42 100644
--- a/Documentation/ABI/stable/sysfs-acpi-pmprofile
+++ b/Documentation/ABI/stable/sysfs-acpi-pmprofile
@@ -2,16 +2,17 @@
Date: 03-Nov-2011
KernelVersion: v3.2
Contact: linux-acpi@vger.kernel.org
-Description: The ACPI pm_profile sysfs interface exports the platform
- power management (and performance) requirement expectations
- as provided by BIOS. The integer value is directly passed as
- retrieved from the FADT ACPI table.
+Description: The ACPI pm_profile sysfs interface exposes the preferred
+ power management (and performance) profile of the platform
+ as provided in the ACPI FADT Preferred_PM_Profile field.
-Values: For possible values see ACPI specification:
- 5.2.9 Fixed ACPI Description Table (FADT)
- Field: Preferred_PM_Profile
+ The integer value is directly passed as retrieved from the FADT.
- Currently these values are defined by spec:
+Values: For the possible values refer to the Preferred_PM_Profile field
+ definition in Table 5.9 "FADT Format", Section 5.2.9 "Fixed ACPI
+ Description Table (FADT)" of the ACPI specification.
+
+ As of ACPI 6.5, the following values are defined:
== =================
0 Unspecified
@@ -22,5 +23,6 @@
5 SOHO Server
6 Appliance PC
7 Performance Server
- >7 Reserved
+ 8 Tablet
+ >8 Reserved
== =================
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 282de36..c57e5b7 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -336,18 +336,11 @@
Date: November 2016
Contact: linux-block@vger.kernel.org
Description:
- [RW] If polling is enabled, this controls what kind of polling
- will be performed. It defaults to -1, which is classic polling.
+ [RW] This was used to control what kind of polling will be
+ performed. It is now fixed to -1, which is classic polling.
In this mode, the CPU will repeatedly ask for completions
- without giving up any time. If set to 0, a hybrid polling mode
- is used, where the kernel will attempt to make an educated guess
- at when the IO will complete. Based on this guess, the kernel
- will put the process issuing IO to sleep for an amount of time,
- before entering a classic poll loop. This mode might be a little
- slower than pure classic polling, but it will be more efficient.
- If set to a value larger than 0, the kernel will put the process
- issuing IO to sleep for this amount of microseconds before
- entering classic polling.
+ without giving up any time.
+ <deprecated>
What: /sys/block/<disk>/queue/io_timeout
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 3becc9a..534b7a3 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -136,6 +136,22 @@
Also last configuration error overloaded.
Writing to it will clear the status.
+What: /sys/bus/dsa/devices/dsa<m>/iaa_cap
+Date: Sept 14, 2022
+KernelVersion: 6.0.0
+Contact: dmaengine@vger.kernel.org
+Description: IAA (IAX) capability mask. Exported to user space for application
+ consumption. This attribute should only be visible on IAA devices
+ that are version 2 or later.
+
+What: /sys/bus/dsa/devices/dsa<m>/event_log_size
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: The event log size to be configured. Default is 64 entries and
+ occupies 4k size if the evl entry is 64 bytes. It's visible
+ only on platforms that support the capability.
+
What: /sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
Date: Oct 27, 2020
KernelVersion: 5.11.0
@@ -219,6 +235,16 @@
Description: Indicate whether ATS disable is turned on for the workqueue.
0 indicates ATS is on, and 1 indicates ATS is off for the workqueue.
+What: /sys/bus/dsa/devices/wq<m>.<n>/prs_disable
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Controls whether PRS disable is turned on for the workqueue.
+ 0 indicates PRS is on, and 1 indicates PRS is off for the
+ workqueue. This option overrides block_on_fault attribute
+ if set. It's visible only on platforms that support the
+ capability.
+
What: /sys/bus/dsa/devices/wq<m>.<n>/occupancy
Date May 25, 2021
KernelVersion: 5.14.0
@@ -302,3 +328,28 @@
1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of
the max value). It's visible only on platforms that support
the capability.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/cr_faults
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the number of Completion Record (CR) faults this application
+ has caused.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/cr_fault_failures
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the number of Completion Record (CR) faults failures that this
+ application has caused. The failure counter is incremented when the
+ driver cannot fault in the address for the CR. Typically this is caused
+ by a bad address programmed in the submitted descriptor or a malicious
+ submitter is using bad CR address on purpose.
+
+What: /sys/bus/dsa/devices/wq<m>.<n>/dsa<x>\!wq<m>.<n>/file<y>/pid
+Date: Sept 14, 2022
+KernelVersion: 6.4.0
+Contact: dmaengine@vger.kernel.org
+Description: Show the process id of the application that opened the file. This is
+ helpful information for a monitor daemon that wants to kill the
+ application that opened the file.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 80b98a4..4feb692 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -76,7 +76,7 @@
KernelVersion: 4.0
Description: Default camera terminal descriptors
- All attributes read only:
+ All attributes read only except bmControls, which is read/write:
======================== ====================================
bmControls bitmap specifying which controls are
@@ -101,7 +101,7 @@
KernelVersion: 4.0
Description: Default processing unit descriptors
- All attributes read only:
+ All attributes read only except bmControls, which is read/write:
=============== ========================================
iProcessing index of string descriptor
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
new file mode 100644
index 0000000..fe61d37
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -0,0 +1,35 @@
+What: /sys/kernel/debug/cxl/memX/inject_poison
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Device Physical Address (DPA) is written to this
+ attribute, the memdev driver sends an inject poison command to
+ the device for the specified address. The DPA must be 64-byte
+ aligned and the length of the injected poison is 64-bytes. If
+ successful, the device returns poison when the address is
+ accessed through the CXL.mem bus. Injecting poison adds the
+ address to the device's Poison List and the error source is set
+ to Injected. In addition, the device adds a poison creation
+ event to its internal Informational Event log, updates the
+ Event Status register, and if configured, interrupts the host.
+ It is not an error to inject poison into an address that
+ already has poison present and no error is returned. The
+ inject_poison attribute is only visible for devices supporting
+ the capability.
+
+
+What: /sys/kernel/debug/memX/clear_poison
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Device Physical Address (DPA) is written to this
+ attribute, the memdev driver sends a clear poison command to
+ the device for the specified address. Clearing poison removes
+ the address from the device's Poison List and writes 0 (zero)
+ for 64 bytes starting at address. It is not an error to clear
+ poison from an address that does not have poison set. If the
+ device cannot clear poison from the address, -ENXIO is returned.
+ The clear_poison attribute is only visible for devices
+ supporting the capability.
diff --git a/Documentation/ABI/testing/sysfs-bus-cdx b/Documentation/ABI/testing/sysfs-bus-cdx
new file mode 100644
index 0000000..7af477f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-cdx
@@ -0,0 +1,56 @@
+What: /sys/bus/cdx/rescan
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Writing y/1/on to this file will cause rescan of the bus
+ and devices on the CDX bus. Any new devices are scanned and
+ added to the list of Linux devices and any devices removed are
+ also deleted from Linux.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/rescan
+
+What: /sys/bus/cdx/devices/.../vendor
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Vendor ID for this CDX device, in hexadecimal. Vendor ID is
+ 16 bit identifier which is specific to the device manufacturer.
+ Combination of Vendor ID and Device ID identifies a device.
+
+What: /sys/bus/cdx/devices/.../device
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Device ID for this CDX device, in hexadecimal. Device ID is
+ 16 bit identifier to identify a device type within the range
+ of a device manufacturer.
+ Combination of Vendor ID and Device ID identifies a device.
+
+What: /sys/bus/cdx/devices/.../reset
+Date: March 2023
+Contact: nipun.gupta@amd.com
+Description:
+ Writing y/1/on to this file resets the CDX device.
+ On resetting the device, the corresponding driver is notified
+ twice, once before the device is being reset, and again after
+ the reset has been complete.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/.../reset
+
+What: /sys/bus/cdx/devices/.../remove
+Date: March 2023
+Contact: tarak.reddy@amd.com
+Description:
+ Writing y/1/on to this file removes the corresponding
+ device from the CDX bus. If the device is to be reconfigured
+ reconfigured in the Hardware, the device can be removed, so
+ that the device driver does not access the device while it is
+ being reconfigured.
+
+ For example::
+
+ # echo 1 > /sys/bus/cdx/devices/.../remove
diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter
index ff83320b..1417c42 100644
--- a/Documentation/ABI/testing/sysfs-bus-counter
+++ b/Documentation/ABI/testing/sysfs-bus-counter
@@ -1,3 +1,33 @@
+What: /sys/bus/counter/devices/counterX/cascade_counts_enable
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Indicates the cascading of Counts on Counter X.
+
+ Valid attribute values are boolean.
+
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Selects the external clock pin for phase counting mode of
+ Counter X.
+
+ MTCLKA-MTCLKB:
+ MTCLKA and MTCLKB pins are selected for the external
+ phase clock.
+
+ MTCLKC-MTCLKD:
+ MTCLKC and MTCLKD pins are selected for the external
+ phase clock.
+
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_available
+KernelVersion: 6.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Discrete set of available values for the respective device
+ configuration are listed in this file.
+
What: /sys/bus/counter/devices/counterX/countY/count
KernelVersion: 5.2
Contact: linux-iio@vger.kernel.org
@@ -215,6 +245,8 @@
Description:
This attribute indicates the number of overflows of count Y.
+What: /sys/bus/counter/devices/counterX/cascade_counts_enable_component_id
+What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_component_id
What: /sys/bus/counter/devices/counterX/countY/capture_component_id
What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id
What: /sys/bus/counter/devices/counterX/countY/floor_component_id
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 3acf2f1..48ac0d9 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -415,3 +415,17 @@
1), and checks that the hardware accepts the commit request.
Reading this value indicates whether the region is committed or
not.
+
+
+What: /sys/bus/cxl/devices/memX/trigger_poison_list
+Date: April, 2023
+KernelVersion: v6.4
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a boolean 'true' is written to this attribute the
+ memdev driver retrieves the poison list from the device. The
+ list consists of addresses that are poisoned, or would result
+ in poison if accessed, and the source of the poison. This
+ attribute is only visible for devices supporting the
+ capability. The retrieved errors are logged as kernel
+ events when cxl_poison event tracing is enabled.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 6ba34c0..7140e8e 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1807,8 +1807,8 @@
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no offset etc.) resistance reading that can be processed
- into an ohm value.
+ Raw (unscaled no offset etc.) resistance reading.
+ Units after application of scale and offset are ohms.
What: /sys/bus/iio/devices/iio:deviceX/heater_enable
KernelVersion: 4.1.0
@@ -1894,8 +1894,9 @@
KernelVersion: 4.8
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no offset etc.) electric conductivity reading that
- can be processed to siemens per meter.
+ Raw (unscaled no offset etc.) electric conductivity reading.
+ Units after application of scale and offset are siemens per
+ meter.
What: /sys/bus/iio/devices/iio:deviceX/in_countY_raw
KernelVersion: 4.10
@@ -1951,8 +1952,8 @@
KernelVersion: 4.18
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled) phase difference reading from channel Y
- that can be processed to radians.
+ Raw (unscaled) phase difference reading from channel Y.
+ Units after application of scale and offset are radians.
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm1_input
What: /sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm1_input
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
index 0088aba..5a775b8 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
@@ -23,3 +23,55 @@
Reading this attribute gives the state of the DbC. It
can be one of the following states: disabled, enabled,
initialized, connected, configured and stalled.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_idVendor
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_idVendor attribute lets us change the idVendor field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x1d6b (Linux Foundation).
+ It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_idProduct
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_idProduct attribute lets us change the idProduct field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x0010. It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_bcdDevice
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This dbc_bcdDevice attribute lets us change the bcdDevice field
+ presented in the USB device descriptor by this xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB device descriptor change while
+ connected to a USB host.
+ The default value is 0x0010. (device rev 0.10)
+ It can be any 16-bit integer.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_bInterfaceProtocol
+Date: March 2023
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This attribute lets us change the bInterfaceProtocol field
+ presented in the USB Interface descriptor by the xhci debug
+ device.
+ Value can only be changed while debug capability (DbC) is in
+ disabled state to prevent USB descriptor change while
+ connected to a USB host.
+ The default value is 1 (GNU Remote Debug command).
+ Other permissible value is 0 which is for vendor defined debug
+ target.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
index ca93c21..fead760 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
@@ -234,8 +234,8 @@
For details, see section `5.10 RAS Internal Error Register Definitions,
Altra Family Soc BMC Interface Specification`.
-What: /sys/bus/platform/devices/smpro-errmon.*/event_[vrd_warn_fault|vrd_hot|dimm_hot]
-KernelVersion: 6.1
+What: /sys/bus/platform/devices/smpro-errmon.*/event_[vrd_warn_fault|vrd_hot|dimm_hot|dimm_2x_refresh]
+KernelVersion: 6.1 (event_[vrd_warn_fault|vrd_hot|dimm_hot]), 6.4 (event_dimm_2x_refresh)
Contact: Quan Nguyen <quan@os.amperecomputing.com>
Description:
(RO) Contains the detail information in case of VRD/DIMM warning/hot events
@@ -258,8 +258,21 @@
+---------------+---------------------------------------------------------------+---------------------+
| DIMM HOT | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_hot | DIMM Hot |
+---------------+---------------------------------------------------------------+---------------------+
+ | DIMM 2X | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_2x_refresh| DIMM 2x refresh rate|
+ | REFRESH RATE | | event in high temp |
+ +---------------+---------------------------------------------------------------+---------------------+
- For more details, see section `5.7 GPI Status Registers,
+ For more details, see section `5.7 GPI Status Registers and 5.9 Memory Error Register Definitions,
+ Altra Family Soc BMC Interface Specification`.
+
+What: /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
+KernelVersion: 6.4
+Contact: Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+ (RO) The sysfs returns the 2-byte DIMM failure syndrome data for slot
+ 0-15 if it failed to initialize.
+
+ For more details, see section `5.11 Boot Stage Register Definitions,
Altra Family Soc BMC Interface Specification`.
What: /sys/bus/platform/devices/smpro-misc.*/boot_progress
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 545c2dd..cb172db 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -166,6 +166,23 @@
The file will be present for all speeds of USB devices, and will
always read "no" for USB 1.1 and USB 2.0 devices.
+What: /sys/bus/usb/devices/<INTERFACE>/wireless_status
+Date: February 2023
+Contact: Bastien Nocera <hadess@hadess.net>
+Description:
+ Some USB devices use a USB receiver dongle to communicate
+ wirelessly with their device using proprietary protocols. This
+ attribute allows user-space to know whether the device is
+ connected to its receiver dongle, and, for example, consider
+ the device to be absent when choosing whether to show the
+ device's battery, show a headset in a list of outputs, or show
+ an on-screen keyboard if the only wireless keyboard is
+ turned off.
+ This attribute is not to be used to replace protocol specific
+ statuses available in WWAN, WLAN/Wi-Fi, Bluetooth, etc.
+ If the device does not use a receiver dongle with a wireless
+ device, then this attribute will not exist.
+
What: /sys/bus/usb/devices/.../<hub_interface>/port<X>
Date: August 2012
Contact: Lan Tianyu <tianyu.lan@intel.com>
diff --git a/Documentation/ABI/testing/sysfs-devices-state_synced b/Documentation/ABI/testing/sysfs-devices-state_synced
index 0c922d7..c64636d 100644
--- a/Documentation/ABI/testing/sysfs-devices-state_synced
+++ b/Documentation/ABI/testing/sysfs-devices-state_synced
@@ -21,4 +21,9 @@
at the time the kernel starts are not affected or limited in
any way by sync_state() callbacks.
+ Writing "1" to this file will force a call to the device's
+ sync_state() function if it hasn't been called already. The
+ sync_state() call happens independent of the state of the
+ consumer devices.
+
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472..8d7d8f0 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -14,7 +14,9 @@
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
- exceeds this limit.
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 will enable the power limit.
Only supported for particular Intel i915 graphics platforms.
diff --git a/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga b/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga
new file mode 100644
index 0000000..8f93d27
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-zynqmp-fpga
@@ -0,0 +1,73 @@
+What: /sys/bus/platform/drivers/zynqmp_fpga_manager/firmware:zynqmp-firmware:pcap/status
+Date: February 2023
+KernelVersion: 6.4
+Contact: Nava kishore Manne <nava.kishore.manne@amd.com>
+Description: (RO) Read fpga status.
+ Read returns a hexadecimal value that tells the current status
+ of the FPGA device. Each bit position in the status value is
+ described Below(see ug570 chapter 9).
+ https://docs.xilinx.com/v/u/en-US/ug570-ultrascale-configuration
+
+ ====================== ==============================================
+ BIT(0) 0: No CRC error
+ 1: CRC error
+
+ BIT(1) 0: Decryptor security not set
+ 1: Decryptor security set
+
+ BIT(2) 0: MMCMs/PLLs are not locked
+ 1: MMCMs/PLLs are locked
+
+ BIT(3) 0: DCI not matched
+ 1: DCI matched
+
+ BIT(4) 0: Start-up sequence has not finished
+ 1: Start-up sequence has finished
+
+ BIT(5) 0: All I/Os are placed in High-Z state
+ 1: All I/Os behave as configured
+
+ BIT(6) 0: Flip-flops and block RAM are write disabled
+ 1: Flip-flops and block RAM are write enabled
+
+ BIT(7) 0: GHIGH_B_STATUS asserted
+ 1: GHIGH_B_STATUS deasserted
+
+ BIT(8) to BIT(10) Status of the mode pins
+
+ BIT(11) 0: Initialization has not finished
+ 1: Initialization finished
+
+ BIT(12) Value on INIT_B_PIN pin
+
+ BIT(13) 0: Signal not released
+ 1: Signal released
+
+ BIT(14) Value on DONE_PIN pin.
+
+ BIT(15) 0: No IDCODE_ERROR
+ 1: IDCODE_ERROR
+
+ BIT(16) 0: No SECURITY_ERROR
+ 1: SECURITY_ERROR
+
+ BIT(17) System Monitor over-temperature if set
+
+ BIT(18) to BIT(20) Start-up state machine (0 to 7)
+ Phase 0 = 000
+ Phase 1 = 001
+ Phase 2 = 011
+ Phase 3 = 010
+ Phase 4 = 110
+ Phase 5 = 111
+ Phase 6 = 101
+ Phase 7 = 100
+
+ BIT(25) to BIT(26) Indicates the detected bus width
+ 00 = x1
+ 01 = x8
+ 10 = x16
+ 11 = x32
+ ====================== ==============================================
+
+ The other bits are reserved.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9413274..8140fc9 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -190,12 +190,6 @@
nat entries. By default, 1 is set, which indicates
10 MB / 1 GB RAM.
-What: /sys/fs/f2fs/<disk>/batched_trim_sections
-Date: February 2015
-Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
-Description: Controls the trimming rate in batch mode.
- <deprecated>
-
What: /sys/fs/f2fs/<disk>/cp_interval
Date: October 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
@@ -729,3 +723,20 @@
Date: January 2023
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the weight of last data block age.
+
+What: /sys/fs/f2fs/<disk>/compress_watermark
+Date: February 2023
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: When compress cache is on, it controls free memory watermark
+ in order to limit caching compress page. If free memory is lower
+ than watermark, then deny caching compress page. The value should be in
+ range of (0, 100], by default it was initialized as 20(%).
+
+What: /sys/fs/f2fs/<disk>/compress_percent
+Date: February 2023
+Contact: "Yangtao Li" <frank.li@vivo.com>
+Description: When compress cache is on, it controls cached page
+ percent(compress pages / free_ram) in order to limit caching compress page.
+ If cached page percent exceed threshold, then deny caching compress page.
+ The value should be in range of (0, 100], by default it was initialized
+ as 20(%).
diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index b15af6a..a42d438 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -53,7 +53,6 @@
The default domain type of a group may be modified only when
- - The group has only one device.
- The device in the group is not bound to any device driver.
So, the users must unbind the appropriate driver before
changing the default domain type.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
index d244674a..6041a02 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
@@ -51,3 +51,11 @@
When it is set to 0 only pages from the same node are merged,
otherwise pages from all nodes can be merged together (default).
+
+What: /sys/kernel/mm/ksm/general_profit
+Date: April 2023
+KernelVersion: 6.4
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Measure how effective KSM is.
+ general_profit: how effective is KSM. The formula for the
+ calculation is in Documentation/admin-guide/mm/ksm.rst.
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-ifs b/Documentation/ABI/testing/sysfs-platform-intel-ifs
index 5599198..41b4d5b 100644
--- a/Documentation/ABI/testing/sysfs-platform-intel-ifs
+++ b/Documentation/ABI/testing/sysfs-platform-intel-ifs
@@ -1,3 +1,7 @@
+Device instance to test mapping
+intel_ifs_0 -> Scan Test
+intel_ifs_1 -> Array BIST test
+
What: /sys/devices/virtual/misc/intel_ifs_<N>/run_test
Date: Nov 16 2022
KernelVersion: 6.2
@@ -8,6 +12,7 @@
completes the test for the core containing that thread.
Example: to test the core containing cpu5: echo 5 >
/sys/devices/virtual/misc/intel_ifs_<N>/run_test
+Devices: all
What: /sys/devices/virtual/misc/intel_ifs_<N>/status
Date: Nov 16 2022
@@ -15,21 +20,25 @@
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
Description: The status of the last test. It can be one of "pass", "fail"
or "untested".
+Devices: all
What: /sys/devices/virtual/misc/intel_ifs_<N>/details
Date: Nov 16 2022
KernelVersion: 6.2
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
Description: Additional information regarding the last test. The details file reports
- the hex value of the SCAN_STATUS MSR. Note that the error_code field
+ the hex value of the STATUS MSR for this test. Note that the error_code field
may contain driver defined software code not defined in the Intel SDM.
+Devices: all
What: /sys/devices/virtual/misc/intel_ifs_<N>/image_version
Date: Nov 16 2022
KernelVersion: 6.2
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
-Description: Version (hexadecimal) of loaded IFS binary image. If no scan image
- is loaded reports "none".
+Description: Version (hexadecimal) of loaded IFS test image. If no test image
+ is loaded reports "none". Only present for device instances where a test image
+ is applicable.
+Devices: intel_ifs_0
What: /sys/devices/virtual/misc/intel_ifs_<N>/current_batch
Date: Nov 16 2022
@@ -39,3 +48,5 @@
The number written treated as the 2 digit suffix in the following file name:
/lib/firmware/intel/ifs_<N>/ff-mm-ss-02x.scan
Reading the file will provide the suffix of the currently loaded IFS test image.
+ This file is present only for device instances where a test image is applicable.
+Devices: intel_ifs_0
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
index e79ca22..9b99a81b 100644
--- a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
@@ -68,3 +68,10 @@
Wasted burnt and invalid
Invalid not burnt but marked as valid (error state).
======= ===============================================
+
+What: /sys/bus/platform/devices/MLNXBF04:00/bootfifo
+Date: Apr 2023
+KernelVersion: 6.4
+Contact: "Liming Sun <limings@nvidia.com>"
+Description:
+ The file used to access the BlueField boot fifo.
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f99d433..a3942b1 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -413,6 +413,35 @@
The /sys/power/suspend_stats/last_failed_step file contains
the last failed step in the suspend/resume path.
+What: /sys/power/suspend_stats/last_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/last_hw_sleep file
+ contains the duration of time spent in a hardware sleep
+ state in the most recent system suspend-resume cycle.
+ This number is measured in microseconds.
+
+What: /sys/power/suspend_stats/total_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/total_hw_sleep file
+ contains the aggregate of time spent in a hardware sleep
+ state since the kernel was booted. This number
+ is measured in microseconds.
+
+What: /sys/power/suspend_stats/max_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <mario.limonciello@amd.com>
+Description:
+ The /sys/power/suspend_stats/max_hw_sleep file
+ contains the maximum amount of time that the hardware can
+ report for time spent in a hardware sleep state. When sleep
+ cycles are longer than this time, the values for
+ 'total_hw_sleep' and 'last_hw_sleep' may not be accurate.
+ This number is measured in microseconds.
+
What: /sys/power/sync_on_suspend
Date: October 2019
Contact: Jonas Meurer <jonas@freesources.org>
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index bdafeb4..9981d33 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -418,7 +418,6 @@
- drivers/next/e100.c
- drivers/net/e1000
- drivers/net/e1000e
- - drivers/net/ixgb
- drivers/net/ixgbe
- drivers/net/cxgb3
- drivers/net/s2io.c
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
index c9c957c..93d899d5 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -277,7 +277,7 @@
Again, only one request in a given batch need actually carry out a
grace-period operation, which means there must be an efficient way to
-identify which of many concurrent reqeusts will initiate the grace
+identify which of many concurrent requests will initiate the grace
period, and that there be an efficient way for the remaining requests to
wait for that grace period to complete. However, that is the topic of
the next section.
@@ -405,7 +405,7 @@
In earlier implementations, the task requesting the expedited grace
period also drove it to completion. This straightforward approach had
the disadvantage of needing to account for POSIX signals sent to user
-tasks, so more recent implemementations use the Linux kernel's
+tasks, so more recent implementations use the Linux kernel's
workqueues (see Documentation/core-api/workqueue.rst).
The requesting task still does counter snapshotting and funnel-lock
@@ -465,7 +465,7 @@
initialized, which does not happen until some time after the scheduler
spawns the first task. Given that there are parts of the kernel that
really do want to execute grace periods during this mid-boot “dead
-zone”, expedited grace periods must do something else during thie time.
+zone”, expedited grace periods must do something else during this time.
What they do is to fall back to the old practice of requiring that the
requesting task drive the expedited grace period, as was the case before
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
index 7fdf151..5750f12 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -168,7 +168,7 @@
+-----------------------------------------------------------------------+
The approach must be extended to handle one final case, that of waking a
-task blocked in ``synchronize_rcu()``. This task might be affinitied to
+task blocked in ``synchronize_rcu()``. This task might be affined to
a CPU that is not yet aware that the grace period has ended, and thus
might not yet be subject to the grace period's memory ordering.
Therefore, there is an ``smp_mb()`` after the return from
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 588d973..db8f16b3 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -201,7 +201,7 @@
In 2012, Josh Triplett received his Ph.D. with his dissertation
covering RCU-protected resizable hash tables and the relationship
between memory barriers and read-side traversal order: If the updater
-is making changes in the opposite direction from the read-side traveral
+is making changes in the opposite direction from the read-side traversal
order, the updater need only execute a memory-barrier instruction,
but if in the same direction, the updater needs to wait for a grace
period between the individual updates [JoshTriplettPhD]. Also in 2012,
@@ -1245,7 +1245,7 @@
[Viewed September 5, 2005]"
,annotation={
First posting showing how RCU can be safely adapted for
- preemptable RCU read side critical sections.
+ preemptible RCU read side critical sections.
}
}
@@ -1888,7 +1888,7 @@
\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com}
[Viewed October 25, 2007]"
,annotation={
- Final patch for preemptable RCU to -rt. (Later patches were
+ Final patch for preemptible RCU to -rt. (Later patches were
to mainline, eventually incorporated.)
}
}
@@ -2275,7 +2275,7 @@
\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com}
[Viewed August 15, 2009]"
,annotation={
- First posting of simple and fast preemptable RCU.
+ First posting of simple and fast preemptible RCU.
}
}
@@ -2639,7 +2639,7 @@
RCU-protected hash tables, barriers vs. read-side traversal order.
.
If the updater is making changes in the opposite direction from
- the read-side traveral order, the updater need only execute a
+ the read-side traversal order, the updater need only execute a
memory-barrier instruction, but if in the same direction, the
updater needs to wait for a grace period between the individual
updates.
diff --git a/Documentation/RCU/UP.rst b/Documentation/RCU/UP.rst
index 8b20fd4..4060d7a 100644
--- a/Documentation/RCU/UP.rst
+++ b/Documentation/RCU/UP.rst
@@ -107,7 +107,7 @@
Quick Quiz #3:
Why can't synchronize_rcu() return immediately on UP systems running
- preemptable RCU?
+ preemptible RCU?
.. _answer_quick_quiz_up:
@@ -143,7 +143,7 @@
Answer to Quick Quiz #3:
Why can't synchronize_rcu() return immediately on UP systems
- running preemptable RCU?
+ running preemptible RCU?
Because some other task might have been preempted in the middle
of an RCU read-side critical section. If synchronize_rcu()
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index cc361fb..bd3c58c 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -70,7 +70,7 @@
can serve as rcu_read_lock_sched(), but is less readable and
prevents lockdep from detecting locking issues.
- Please not that you *cannot* rely on code known to be built
+ Please note that you *cannot* rely on code known to be built
only in non-preemptible kernels. Such code can and will break,
especially in kernels built with CONFIG_PREEMPT_COUNT=y.
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
index 2749f43..69e73a3 100644
--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@@ -65,7 +65,7 @@
rcu_access_pointer(p):
Return the value of the pointer and omit all barriers,
but retain the compiler constraints that prevent duplicating
- or coalescsing. This is useful when testing the
+ or coalescing. This is useful when testing the
value of the pointer itself, for example, against NULL.
The rcu_dereference_check() check expression can be any boolean
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
index 0316ba0..b3b6dfa 100644
--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@@ -216,7 +216,7 @@
rcutorture's module parameters. For example, to test a change to RCU's
CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
This will of course result in the scripting reporting a failure, namely
-the resuling RCU CPU stall warning. As noted above, reducing memory may
+the resulting RCU CPU stall warning. As noted above, reducing memory may
require disabling rcutorture's callback-flooding tests::
kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
@@ -370,5 +370,5 @@
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
--duration 24h
-In this case, most of the kvm-again.sh parmeters may be supplied following
+In this case, most of the kvm-again.sh parameters may be supplied following
the pathname of the old run-results directory.
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 2c5563a..8eddef2 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -597,10 +597,10 @@
If the occasional sleep is permitted, the single-argument form may
be used, omitting the rcu_head structure from struct foo.
- kfree_rcu(old_fp);
+ kfree_rcu_mightsleep(old_fp);
-This variant of kfree_rcu() almost never blocks, but might do so by
-invoking synchronize_rcu() in response to memory-allocation failure.
+This variant almost never blocks, but might do so by invoking
+synchronize_rcu() in response to memory-allocation failure.
Again, see checklist.rst for additional rules governing the use of RCU.
diff --git a/Documentation/accel/index.rst b/Documentation/accel/index.rst
index 2b43c9a..e94a016 100644
--- a/Documentation/accel/index.rst
+++ b/Documentation/accel/index.rst
@@ -8,6 +8,7 @@
:maxdepth: 1
introduction
+ qaic/index
.. only:: subproject and html
diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
new file mode 100644
index 0000000..c80d0f1
--- /dev/null
+++ b/Documentation/accel/qaic/aic100.rst
@@ -0,0 +1,510 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+===============================
+ Qualcomm Cloud AI 100 (AIC100)
+===============================
+
+Overview
+========
+
+The Qualcomm Cloud AI 100/AIC100 family of products (including SA9000P - part of
+Snapdragon Ride) are PCIe adapter cards which contain a dedicated SoC ASIC for
+the purpose of efficiently running Artificial Intelligence (AI) Deep Learning
+inference workloads. They are AI accelerators.
+
+The PCIe interface of AIC100 is capable of PCIe Gen4 speeds over eight lanes
+(x8). An individual SoC on a card can have up to 16 NSPs for running workloads.
+Each SoC has an A53 management CPU. On card, there can be up to 32 GB of DDR.
+
+Multiple AIC100 cards can be hosted in a single system to scale overall
+performance. AIC100 cards are multi-user capable and able to execute workloads
+from multiple users in a concurrent manner.
+
+Hardware Description
+====================
+
+An AIC100 card consists of an AIC100 SoC, on-card DDR, and a set of misc
+peripherals (PMICs, etc).
+
+An AIC100 card can either be a PCIe HHHL form factor (a traditional PCIe card),
+or a Dual M.2 card. Both use PCIe to connect to the host system.
+
+As a PCIe endpoint/adapter, AIC100 uses the standard VendorID(VID)/
+DeviceID(DID) combination to uniquely identify itself to the host. AIC100
+uses the standard Qualcomm VID (0x17cb). All AIC100 SKUs use the same
+AIC100 DID (0xa100).
+
+AIC100 does not implement FLR (function level reset).
+
+AIC100 implements MSI but does not implement MSI-X. AIC100 requires 17 MSIs to
+operate (1 for MHI, 16 for the DMA Bridge).
+
+As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device
+hardware. AIC100 provides 3, 64-bit BARs.
+
+* The first BAR is 4K in size, and exposes the MHI interface to the host.
+
+* The second BAR is 2M in size, and exposes the DMA Bridge interface to the
+ host.
+
+* The third BAR is variable in size based on an individual AIC100's
+ configuration, but defaults to 64K. This BAR currently has no purpose.
+
+From the host perspective, AIC100 has several key hardware components -
+
+* MHI (Modem Host Interface)
+* QSM (QAIC Service Manager)
+* NSPs (Neural Signal Processor)
+* DMA Bridge
+* DDR
+
+MHI
+---
+
+AIC100 has one MHI interface over PCIe. MHI itself is documented at
+Documentation/mhi/index.rst MHI is the mechanism the host uses to communicate
+with the QSM. Except for workload data via the DMA Bridge, all interaction with
+the device occurs via MHI.
+
+QSM
+---
+
+QAIC Service Manager. This is an ARM A53 CPU that runs the primary
+firmware of the card and performs on-card management tasks. It also
+communicates with the host via MHI. Each AIC100 has one of
+these.
+
+NSP
+---
+
+Neural Signal Processor. Each AIC100 has up to 16 of these. These are
+the processors that run the workloads on AIC100. Each NSP is a Qualcomm Hexagon
+(Q6) DSP with HVX and HMX. Each NSP can only run one workload at a time, but
+multiple NSPs may be assigned to a single workload. Since each NSP can only run
+one workload, AIC100 is limited to 16 concurrent workloads. Workload
+"scheduling" is under the purview of the host. AIC100 does not automatically
+timeslice.
+
+DMA Bridge
+----------
+
+The DMA Bridge is custom DMA engine that manages the flow of data
+in and out of workloads. AIC100 has one of these. The DMA Bridge has 16
+channels, each consisting of a set of request/response FIFOs. Each active
+workload is assigned a single DMA Bridge channel. The DMA Bridge exposes
+hardware registers to manage the FIFOs (head/tail pointers), but requires host
+memory to store the FIFOs.
+
+DDR
+---
+
+AIC100 has on-card DDR. In total, an AIC100 can have up to 32 GB of DDR.
+This DDR is used to store workloads, data for the workloads, and is used by the
+QSM for managing the device. NSPs are granted access to sections of the DDR by
+the QSM. The host does not have direct access to the DDR, and must make
+requests to the QSM to transfer data to the DDR.
+
+High-level Use Flow
+===================
+
+AIC100 is a multi-user, programmable accelerator typically used for running
+neural networks in inferencing mode to efficiently perform AI operations.
+AIC100 is not intended for training neural networks. AIC100 can be utilized
+for generic compute workloads.
+
+Assuming a user wants to utilize AIC100, they would follow these steps:
+
+1. Compile the workload into an ELF targeting the NSP(s)
+2. Make requests to the QSM to load the workload and related artifacts into the
+ device DDR
+3. Make a request to the QSM to activate the workload onto a set of idle NSPs
+4. Make requests to the DMA Bridge to send input data to the workload to be
+ processed, and other requests to receive processed output data from the
+ workload.
+5. Once the workload is no longer required, make a request to the QSM to
+ deactivate the workload, thus putting the NSPs back into an idle state.
+6. Once the workload and related artifacts are no longer needed for future
+ sessions, make requests to the QSM to unload the data from DDR. This frees
+ the DDR to be used by other users.
+
+
+Boot Flow
+=========
+
+AIC100 uses a flashless boot flow, derived from Qualcomm MSMs.
+
+When AIC100 is first powered on, it begins executing PBL (Primary Bootloader)
+from ROM. PBL enumerates the PCIe link, and initializes the BHI (Boot Host
+Interface) component of MHI.
+
+Using BHI, the host points PBL to the location of the SBL (Secondary Bootloader)
+image. The PBL pulls the image from the host, validates it, and begins
+execution of SBL.
+
+SBL initializes MHI, and uses MHI to notify the host that the device has entered
+the SBL stage. SBL performs a number of operations:
+
+* SBL initializes the majority of hardware (anything PBL left uninitialized),
+ including DDR.
+* SBL offloads the bootlog to the host.
+* SBL synchronizes timestamps with the host for future logging.
+* SBL uses the Sahara protocol to obtain the runtime firmware images from the
+ host.
+
+Once SBL has obtained and validated the runtime firmware, it brings the NSPs out
+of reset, and jumps into the QSM.
+
+The QSM uses MHI to notify the host that the device has entered the QSM stage
+(AMSS in MHI terms). At this point, the AIC100 device is fully functional, and
+ready to process workloads.
+
+Userspace components
+====================
+
+Compiler
+--------
+
+An open compiler for AIC100 based on upstream LLVM can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100-cc
+
+Usermode Driver (UMD)
+---------------------
+
+An open UMD that interfaces with the qaic kernel driver can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100
+
+Sahara loader
+-------------
+
+An open implementation of the Sahara protocol called kickstart can be found at:
+https://github.com/andersson/qdl
+
+MHI Channels
+============
+
+AIC100 defines a number of MHI channels for different purposes. This is a list
+of the defined channels, and their uses.
+
++----------------+---------+----------+----------------------------------------+
+| Channel name | IDs | EEs | Purpose |
++================+=========+==========+========================================+
+| QAIC_LOOPBACK | 0 & 1 | AMSS | Any data sent to the device on this |
+| | | | channel is sent back to the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SAHARA | 2 & 3 | SBL | Used by SBL to obtain the runtime |
+| | | | firmware from the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DIAG | 4 & 5 | AMSS | Used to communicate with QSM via the |
+| | | | DIAG protocol. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SSR | 6 & 7 | AMSS | Used to notify the host of subsystem |
+| | | | restart events, and to offload SSR |
+| | | | crashdumps. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_QDSS | 8 & 9 | AMSS | Used for the Qualcomm Debug Subsystem. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_CONTROL | 10 & 11 | AMSS | Used for the Neural Network Control |
+| | | | (NNC) protocol. This is the primary |
+| | | | channel between host and QSM for |
+| | | | managing workloads. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_LOGGING | 12 & 13 | SBL | Used by the SBL to send the bootlog to |
+| | | | the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_STATUS | 14 & 15 | AMSS | Used to notify the host of Reliability,|
+| | | | Accessibility, Serviceability (RAS) |
+| | | | events. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TELEMETRY | 16 & 17 | AMSS | Used to get/set power/thermal/etc |
+| | | | attributes. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DEBUG | 18 & 19 | AMSS | Not used. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TIMESYNC | 20 & 21 | SBL/AMSS | Used to synchronize timestamps in the |
+| | | | device side logs with the host time |
+| | | | source. |
++----------------+---------+----------+----------------------------------------+
+
+DMA Bridge
+==========
+
+Overview
+--------
+
+The DMA Bridge is one of the main interfaces to the host from the device
+(the other being MHI). As part of activating a workload to run on NSPs, the QSM
+assigns that network a DMA Bridge channel. A workload's DMA Bridge channel
+(DBC for short) is solely for the use of that workload and is not shared with
+other workloads.
+
+Each DBC is a pair of FIFOs that manage data in and out of the workload. One
+FIFO is the request FIFO. The other FIFO is the response FIFO.
+
+Each DBC contains 4 registers in hardware:
+
+* Request FIFO head pointer (offset 0x0). Read only by the host. Indicates the
+ latest item in the FIFO the device has consumed.
+* Request FIFO tail pointer (offset 0x4). Read/write by the host. Host
+ increments this register to add new items to the FIFO.
+* Response FIFO head pointer (offset 0x8). Read/write by the host. Indicates
+ the latest item in the FIFO the host has consumed.
+* Response FIFO tail pointer (offset 0xc). Read only by the host. Device
+ increments this register to add new items to the FIFO.
+
+The values in each register are indexes in the FIFO. To get the location of the
+FIFO element pointed to by the register: FIFO base address + register * element
+size.
+
+DBC registers are exposed to the host via the second BAR. Each DBC consumes
+4KB of space in the BAR.
+
+The actual FIFOs are backed by host memory. When sending a request to the QSM
+to activate a network, the host must donate memory to be used for the FIFOs.
+Due to internal mapping limitations of the device, a single contiguous chunk of
+memory must be provided per DBC, which hosts both FIFOs. The request FIFO will
+consume the beginning of the memory chunk, and the response FIFO will consume
+the end of the memory chunk.
+
+Request FIFO
+------------
+
+A request FIFO element has the following structure:
+
+.. code-block:: c
+
+ struct request_elem {
+ u16 req_id;
+ u8 seq_id;
+ u8 pcie_dma_cmd;
+ u32 reserved;
+ u64 pcie_dma_source_addr;
+ u64 pcie_dma_dest_addr;
+ u32 pcie_dma_len;
+ u32 reserved;
+ u64 doorbell_addr;
+ u8 doorbell_attr;
+ u8 reserved;
+ u16 reserved;
+ u32 doorbell_data;
+ u32 sem_cmd0;
+ u32 sem_cmd1;
+ u32 sem_cmd2;
+ u32 sem_cmd3;
+ };
+
+Request field descriptions:
+
+req_id
+ request ID. A request FIFO element and a response FIFO element with
+ the same request ID refer to the same command.
+
+seq_id
+ sequence ID within a request. Ignored by the DMA Bridge.
+
+pcie_dma_cmd
+ describes the DMA element of this request.
+
+ * Bit(7) is the force msi flag, which overrides the DMA Bridge MSI logic
+ and generates a MSI when this request is complete, and QSM
+ configures the DMA Bridge to look at this bit.
+ * Bits(6:5) are reserved.
+ * Bit(4) is the completion code flag, and indicates that the DMA Bridge
+ shall generate a response FIFO element when this request is
+ complete.
+ * Bit(3) indicates if this request is a linked list transfer(0) or a bulk
+ transfer(1).
+ * Bit(2) is reserved.
+ * Bits(1:0) indicate the type of transfer. No transfer(0), to device(1),
+ from device(2). Value 3 is illegal.
+
+pcie_dma_source_addr
+ source address for a bulk transfer, or the address of the linked list.
+
+pcie_dma_dest_addr
+ destination address for a bulk transfer.
+
+pcie_dma_len
+ length of the bulk transfer. Note that the size of this field
+ limits transfers to 4G in size.
+
+doorbell_addr
+ address of the doorbell to ring when this request is complete.
+
+doorbell_attr
+ doorbell attributes.
+
+ * Bit(7) indicates if a write to a doorbell is to occur.
+ * Bits(6:2) are reserved.
+ * Bits(1:0) contain the encoding of the doorbell length. 0 is 32-bit,
+ 1 is 16-bit, 2 is 8-bit, 3 is reserved. The doorbell address
+ must be naturally aligned to the specified length.
+
+doorbell_data
+ data to write to the doorbell. Only the bits corresponding to
+ the doorbell length are valid.
+
+sem_cmdN
+ semaphore command.
+
+ * Bit(31) indicates this semaphore command is enabled.
+ * Bit(30) is the to-device DMA fence. Block this request until all
+ to-device DMA transfers are complete.
+ * Bit(29) is the from-device DMA fence. Block this request until all
+ from-device DMA transfers are complete.
+ * Bits(28:27) are reserved.
+ * Bits(26:24) are the semaphore command. 0 is NOP. 1 is init with the
+ specified value. 2 is increment. 3 is decrement. 4 is wait
+ until the semaphore is equal to the specified value. 5 is wait
+ until the semaphore is greater or equal to the specified value.
+ 6 is "P", wait until semaphore is greater than 0, then
+ decrement by 1. 7 is reserved.
+ * Bit(23) is reserved.
+ * Bit(22) is the semaphore sync. 0 is post sync, which means that the
+ semaphore operation is done after the DMA transfer. 1 is
+ presync, which gates the DMA transfer. Only one presync is
+ allowed per request.
+ * Bit(21) is reserved.
+ * Bits(20:16) is the index of the semaphore to operate on.
+ * Bits(15:12) are reserved.
+ * Bits(11:0) are the semaphore value to use in operations.
+
+Overall, a request is processed in 4 steps:
+
+1. If specified, the presync semaphore condition must be true
+2. If enabled, the DMA transfer occurs
+3. If specified, the postsync semaphore conditions must be true
+4. If enabled, the doorbell is written
+
+By using the semaphores in conjunction with the workload running on the NSPs,
+the data pipeline can be synchronized such that the host can queue multiple
+requests of data for the workload to process, but the DMA Bridge will only copy
+the data into the memory of the workload when the workload is ready to process
+the next input.
+
+Response FIFO
+-------------
+
+Once a request is fully processed, a response FIFO element is generated if
+specified in pcie_dma_cmd. The structure of a response FIFO element:
+
+.. code-block:: c
+
+ struct response_elem {
+ u16 req_id;
+ u16 completion_code;
+ };
+
+req_id
+ matches the req_id of the request that generated this element.
+
+completion_code
+ status of this request. 0 is success. Non-zero is an error.
+
+The DMA Bridge will generate a MSI to the host as a reaction to activity in the
+response FIFO of a DBC. The DMA Bridge hardware has an IRQ storm mitigation
+algorithm, where it will only generate a MSI when the response FIFO transitions
+from empty to non-empty (unless force MSI is enabled and triggered). In
+response to this MSI, the host is expected to drain the response FIFO, and must
+take care to handle any race conditions between draining the FIFO, and the
+device inserting elements into the FIFO.
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The NNC protocol is how the host makes requests to the QSM to manage workloads.
+It uses the QAIC_CONTROL MHI channel.
+
+Each NNC request is packaged into a message. Each message is a series of
+transactions. A passthrough type transaction can contain elements known as
+commands.
+
+QSM requires NNC messages be little endian encoded and the fields be naturally
+aligned. Since there are 64-bit elements in some NNC messages, 64-bit alignment
+must be maintained.
+
+A message contains a header and then a series of transactions. A message may be
+at most 4K in size from QSM to the host. From the host to the QSM, a message
+can be at most 64K (maximum size of a single MHI packet), but there is a
+continuation feature where message N+1 can be marked as a continuation of
+message N. This is used for exceedingly large DMA xfer transactions.
+
+Transaction descriptions
+------------------------
+
+passthrough
+ Allows userspace to send an opaque payload directly to the QSM.
+ This is used for NNC commands. Userspace is responsible for managing
+ the QSM message requirements in the payload.
+
+dma_xfer
+ DMA transfer. Describes an object that the QSM should DMA into the
+ device via address and size tuples.
+
+activate
+ Activate a workload onto NSPs. The host must provide memory to be
+ used by the DBC.
+
+deactivate
+ Deactivate an active workload and return the NSPs to idle.
+
+status
+ Query the QSM about it's NNC implementation. Returns the NNC version,
+ and if CRC is used.
+
+terminate
+ Release a user's resources.
+
+dma_xfer_cont
+ Continuation of a previous DMA transfer. If a DMA transfer
+ cannot be specified in a single message (highly fragmented), this
+ transaction can be used to specify more ranges.
+
+validate_partition
+ Query to QSM to determine if a partition identifier is valid.
+
+Each message is tagged with a user id, and a partition id. The user id allows
+QSM to track resources, and release them when the user goes away (eg the process
+crashes). A partition id identifies the resource partition that QSM manages,
+which this message applies to.
+
+Messages may have CRCs. Messages should have CRCs applied until the QSM
+reports via the status transaction that CRCs are not needed. The QSM on the
+SA9000P requires CRCs for black channel safing.
+
+Subsystem Restart (SSR)
+=======================
+
+SSR is the concept of limiting the impact of an error. An AIC100 device may
+have multiple users, each with their own workload running. If the workload of
+one user crashes, the fallout of that should be limited to that workload and not
+impact other workloads. SSR accomplishes this.
+
+If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI
+channel. This notification identifies the workload by it's assigned DBC. A
+multi-stage recovery process is then used to cleanup both sides, and get the
+DBC/NSPs into a working state.
+
+When SSR occurs, any state in the workload is lost. Any inputs that were in
+process, or queued by not yet serviced, are lost. The loaded artifacts will
+remain in on-card DDR, but the host will need to re-activate the workload if
+it desires to recover the workload.
+
+Reliability, Accessibility, Serviceability (RAS)
+================================================
+
+AIC100 is expected to be deployed in server systems where RAS ideology is
+applied. Simply put, RAS is the concept of detecting, classifying, and
+reporting errors. While PCIe has AER (Advanced Error Reporting) which factors
+into RAS, AER does not allow for a device to report details about internal
+errors. Therefore, AIC100 implements a custom RAS mechanism. When a RAS event
+occurs, QSM will report the event with appropriate details via the QAIC_STATUS
+MHI channel. A sysadmin may determine that a particular device needs
+additional service based on RAS reports.
+
+Telemetry
+=========
+
+QSM has the ability to report various physical attributes of the device, and in
+some cases, to allow the host to control them. Examples include thermal limits,
+thermal readings, and power readings. These items are communicated via the
+QAIC_TELEMETRY MHI channel.
diff --git a/Documentation/accel/qaic/index.rst b/Documentation/accel/qaic/index.rst
new file mode 100644
index 0000000..ad19b88
--- /dev/null
+++ b/Documentation/accel/qaic/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/qaic Qualcomm Cloud AI driver
+=====================================
+
+The accel/qaic driver supports the Qualcomm Cloud AI machine learning
+accelerator cards.
+
+.. toctree::
+
+ qaic
+ aic100
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
new file mode 100644
index 0000000..72a70ab
--- /dev/null
+++ b/Documentation/accel/qaic/qaic.rst
@@ -0,0 +1,170 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=============
+ QAIC driver
+=============
+
+The QAIC driver is the Kernel Mode Driver (KMD) for the AIC100 family of AI
+accelerator products.
+
+Interrupts
+==========
+
+While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation
+mechanism, it is still possible for an IRQ storm to occur. A storm can happen
+if the workload is particularly quick, and the host is responsive. If the host
+can drain the response FIFO as quickly as the device can insert elements into
+it, then the device will frequently transition the response FIFO from empty to
+non-empty and generate MSIs at a rate equivalent to the speed of the
+workload's ability to process inputs. The lprnet (license plate reader network)
+workload is known to trigger this condition, and can generate in excess of 100k
+MSIs per second. It has been observed that most systems cannot tolerate this
+for long, and will crash due to some form of watchdog due to the overhead of
+the interrupt controller interrupting the host CPU.
+
+To mitigate this issue, the QAIC driver implements specific IRQ handling. When
+QAIC receives an IRQ, it disables that line. This prevents the interrupt
+controller from interrupting the CPU. Then AIC drains the FIFO. Once the FIFO
+is drained, QAIC implements a "last chance" polling algorithm where QAIC will
+sleep for a time to see if the workload will generate more activity. The IRQ
+line remains disabled during this time. If no activity is detected, QAIC exits
+polling mode and reenables the IRQ line.
+
+This mitigation in QAIC is very effective. The same lprnet usecase that
+generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
+IRQs over 5 minutes while keeping the host system stable, and having the same
+workload throughput performance (within run to run noise variation).
+
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The implementation of NNC is split between the KMD (QAIC) and UMD. In general
+QAIC understands how to encode/decode NNC wire protocol, and elements of the
+protocol which require kernel space knowledge to process (for example, mapping
+host memory to device IOVAs). QAIC understands the structure of a message, and
+all of the transactions. QAIC does not understand commands (the payload of a
+passthrough transaction).
+
+QAIC handles and enforces the required little endianness and 64-bit alignment,
+to the degree that it can. Since QAIC does not know the contents of a
+passthrough transaction, it relies on the UMD to satisfy the requirements.
+
+The terminate transaction is of particular use to QAIC. QAIC is not aware of
+the resources that are loaded onto a device since the majority of that activity
+occurs within NNC commands. As a result, QAIC does not have the means to
+roll back userspace activity. To ensure that a userspace client's resources
+are fully released in the case of a process crash, or a bug, QAIC uses the
+terminate command to let QSM know when a user has gone away, and the resources
+can be released.
+
+QSM can report a version number of the NNC protocol it supports. This is in the
+form of a Major number and a Minor number.
+
+Major number updates indicate changes to the NNC protocol which impact the
+message format, or transactions (impacts QAIC).
+
+Minor number updates indicate changes to the NNC protocol which impact the
+commands (does not impact QAIC).
+
+uAPI
+====
+
+QAIC defines a number of driver specific IOCTLs as part of the userspace API.
+This section describes those APIs.
+
+DRM_IOCTL_QAIC_MANAGE
+ This IOCTL allows userspace to send a NNC request to the QSM. The call will
+ block until a response is received, or the request has timed out.
+
+DRM_IOCTL_QAIC_CREATE_BO
+ This IOCTL allows userspace to allocate a buffer object (BO) which can send
+ or receive data from a workload. The call will return a GEM handle that
+ represents the allocated buffer. The BO is not usable until it has been
+ sliced (see DRM_IOCTL_QAIC_ATTACH_SLICE_BO).
+
+DRM_IOCTL_QAIC_MMAP_BO
+ This IOCTL allows userspace to prepare an allocated BO to be mmap'd into the
+ userspace process.
+
+DRM_IOCTL_QAIC_ATTACH_SLICE_BO
+ This IOCTL allows userspace to slice a BO in preparation for sending the BO
+ to the device. Slicing is the operation of describing what portions of a BO
+ get sent where to a workload. This requires a set of DMA transfers for the
+ DMA Bridge, and as such, locks the BO to a specific DBC.
+
+DRM_IOCTL_QAIC_EXECUTE_BO
+ This IOCTL allows userspace to submit a set of sliced BOs to the device. The
+ call is non-blocking. Success only indicates that the BOs have been queued
+ to the device, but does not guarantee they have been executed.
+
+DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO
+ This IOCTL operates like DRM_IOCTL_QAIC_EXECUTE_BO, but it allows userspace
+ to shrink the BOs sent to the device for this specific call. If a BO
+ typically has N inputs, but only a subset of those is available, this IOCTL
+ allows userspace to indicate that only the first M bytes of the BO should be
+ sent to the device to minimize data transfer overhead. This IOCTL dynamically
+ recomputes the slicing, and therefore has some processing overhead before the
+ BOs can be queued to the device.
+
+DRM_IOCTL_QAIC_WAIT_BO
+ This IOCTL allows userspace to determine when a particular BO has been
+ processed by the device. The call will block until either the BO has been
+ processed and can be re-queued to the device, or a timeout occurs.
+
+DRM_IOCTL_QAIC_PERF_STATS_BO
+ This IOCTL allows userspace to collect performance statistics on the most
+ recent execution of a BO. This allows userspace to construct an end to end
+ timeline of the BO processing for a performance analysis.
+
+DRM_IOCTL_QAIC_PART_DEV
+ This IOCTL allows userspace to request a duplicate "shadow device". This extra
+ accelN device is associated with a specific partition of resources on the
+ AIC100 device and can be used for limiting a process to some subset of
+ resources.
+
+Userspace Client Isolation
+==========================
+
+AIC100 supports multiple clients. Multiple DBCs can be consumed by a single
+client, and multiple clients can each consume one or more DBCs. Workloads
+may contain sensitive information therefore only the client that owns the
+workload should be allowed to interface with the DBC.
+
+Clients are identified by the instance associated with their open(). A client
+may only use memory they allocate, and DBCs that are assigned to their
+workloads. Attempts to access resources assigned to other clients will be
+rejected.
+
+Module parameters
+=================
+
+QAIC supports the following module parameters:
+
+**datapath_polling (bool)**
+
+Configures QAIC to use a polling thread for datapath events instead of relying
+on the device interrupts. Useful for platforms with broken multiMSI. Must be
+set at QAIC driver initialization. Default is 0 (off).
+
+**mhi_timeout_ms (unsigned int)**
+
+Sets the timeout value for MHI operations in milliseconds (ms). Must be set
+at the time the driver detects a device. Default is 2000 (2 seconds).
+
+**control_resp_timeout_s (unsigned int)**
+
+Sets the timeout value for QSM responses to NNC messages in seconds (s). Must
+be set at the time the driver is sending a request to QSM. Default is 60 (one
+minute).
+
+**wait_exec_default_timeout_ms (unsigned int)**
+
+Sets the default timeout for the wait_exec ioctl in milliseconds (ms). Must be
+set prior to the waic_exec ioctl call. A value specified in the ioctl call
+overrides this for that call. Default is 5000 (5 seconds).
+
+**datapath_poll_interval_us (unsigned int)**
+
+Sets the polling interval in microseconds (us) when datapath polling is active.
+Takes effect at the next polling interval. Default is 100 (100 us).
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 7103b62..f61c01f 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -16,6 +16,7 @@
e) thrashing
f) direct compact
g) write-protect copy
+h) IRQ/SOFTIRQ
and makes these statistics available to userspace through
the taskstats interface.
@@ -49,7 +50,7 @@
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
-cache, direct compact, write-protect copy etc.
+cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -109,17 +110,19 @@
CPU count real total virtual total delay total delay average
8 7000000 6872122 3382277 0.423ms
IO count delay total delay average
- 0 0 0ms
+ 0 0 0.000ms
SWAP count delay total delay average
- 0 0 0ms
+ 0 0 0.000ms
RECLAIM count delay total delay average
- 0 0 0ms
+ 0 0 0.000ms
THRASHING count delay total delay average
- 0 0 0ms
+ 0 0 0.000ms
COMPACT count delay total delay average
- 0 0 0ms
- WPCOPY count delay total delay average
- 0 0 0ms
+ 0 0 0.000ms
+ WPCOPY count delay total delay average
+ 0 0 0.000ms
+ IRQ count delay total delay average
+ 0 0 0.000ms
Get IO accounting for pid 1, it works only with -p::
diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
index 5e40b3f..df6062e 100644
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@@ -105,6 +105,10 @@
after which monitors are most likely not needed and psi averages can be used
instead.
+Unprivileged users can also create monitors, with the only limitation that the
+window size must be a multiple of 2s, in order to prevent excessive resource
+usage.
+
When activated, psi monitor stays active for at least the duration of one
tracking window to avoid repeated activations/deactivations when system is
bouncing in and out of the stall state.
diff --git a/Documentation/admin-guide/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst
index d78dfe5..faf2ac4 100644
--- a/Documentation/admin-guide/blockdev/nbd.rst
+++ b/Documentation/admin-guide/blockdev/nbd.rst
@@ -14,7 +14,7 @@
Unlike NFS, it is possible to put any filesystem on it, etc.
For more information, or to download the nbd-client and nbd-server
-tools, go to http://nbd.sf.net/.
+tools, go to https://github.com/NetworkBlockDevice/nbd.
The nbd kernel module need only be installed on the client
system, as the nbd-server is completely in userspace. In fact,
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
index 5d844ed..ae646d6 100644
--- a/Documentation/admin-guide/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -719,7 +719,7 @@
cat, rmdir commands from the shell, or their equivalent from C.
- via the C library libcpuset.
- via the C library libcgroup.
- (http://sourceforge.net/projects/libcg/)
+ (https://github.com/libcgroup/libcgroup/)
- via the python application cset.
(http://code.google.com/p/cpuset/)
diff --git a/Documentation/admin-guide/cifs/changes.rst b/Documentation/admin-guide/cifs/changes.rst
index 3147bba..8c42c4d 100644
--- a/Documentation/admin-guide/cifs/changes.rst
+++ b/Documentation/admin-guide/cifs/changes.rst
@@ -5,5 +5,5 @@
See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
to cifs.ko module) by kernel version (and cifs internal module version).
-This may be easier to read than parsing the output of "git log fs/cifs"
-by release.
+This may be easier to read than parsing the output of
+"git log fs/smb/client" by release.
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index 2e151cd..5f936b4 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -45,7 +45,7 @@
If you have built the CIFS vfs as module (successfully) simply
type ``make modules_install`` (or if you prefer, manually copy the file to
-the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.ko).
+the modules directory e.g. /lib/modules/6.3.0-060300-generic/kernel/fs/smb/client/cifs.ko).
If you have built the CIFS vfs into the kernel itself, follow the instructions
for your distribution on how to install a new kernel (usually you
@@ -66,15 +66,15 @@
and maximum number of simultaneous requests to one server can be configured.
Changing these from their defaults is not recommended. By executing modinfo::
- modinfo kernel/fs/cifs/cifs.ko
+ modinfo <path to cifs.ko>
-on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
+on kernel/fs/smb/client/cifs.ko the list of configuration changes that can be made
at module initialization time (by running insmod cifs.ko) can be seen.
Recommendations
===============
-To improve security the SMB2.1 dialect or later (usually will get SMB3) is now
+To improve security the SMB2.1 dialect or later (usually will get SMB3.1.1) is now
the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0"
on mount (or vers=2.0 for Windows Vista). Note that the CIFS (vers=1.0) is
much older and less secure than the default dialect SMB3 which includes
diff --git a/Documentation/admin-guide/device-mapper/dm-flakey.rst b/Documentation/admin-guide/device-mapper/dm-flakey.rst
index 8613873..f7104c0 100644
--- a/Documentation/admin-guide/device-mapper/dm-flakey.rst
+++ b/Documentation/admin-guide/device-mapper/dm-flakey.rst
@@ -39,6 +39,10 @@
If no feature parameters are present, during the periods of
unreliability, all I/O returns errors.
+ error_reads:
+ All read I/O is failed with an error signalled.
+ Write I/O is handled correctly.
+
drop_writes:
All write I/O is silently ignored.
Read I/O is handled correctly.
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 4c559e0..5740d85 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -489,9 +489,6 @@
multiple of this tuning parameter if the stripe size is not set in the
ext4 superblock
- mb_max_inode_prealloc
- The maximum length of per-inode ext4_prealloc_space list.
-
mb_max_to_scan
The maximum number of extents the multiblock allocator will search to
find the best extent.
diff --git a/Documentation/admin-guide/gpio/sysfs.rst b/Documentation/admin-guide/gpio/sysfs.rst
index ec09ffd..35171d1 100644
--- a/Documentation/admin-guide/gpio/sysfs.rst
+++ b/Documentation/admin-guide/gpio/sysfs.rst
@@ -145,7 +145,7 @@
/* export the GPIO to userspace */
int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
- /* reverse gpio_export() */
+ /* reverse gpiod_export() */
void gpiod_unexport(struct gpio_desc *desc);
/* create a sysfs link to an exported GPIO node */
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
index f491de7..48ca0bd 100644
--- a/Documentation/admin-guide/hw-vuln/mds.rst
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -58,7 +58,7 @@
Hyper-Thread attacks are possible.
Deeper technical information is available in the MDS specific x86
-architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
+architecture section: :ref:`Documentation/arch/x86/mds.rst <mds>`.
Attack scenarios
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
index 76673af..014167e 100644
--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -63,7 +63,7 @@
which in turn potentially leaks data stored in the buffers.
More detailed technical information is available in the TAA specific x86
-architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+architecture section: :ref:`Documentation/arch/x86/tsx_async_abort.rst <tsx_async_abort>`.
Attack scenarios
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 09a563b..43ea3561 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -36,6 +36,7 @@
reporting-issues
reporting-regressions
+ quickly-build-trimmed-linux
bug-hunting
bug-bisect
tainted-kernels
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 86fd884..c18d94f 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -172,7 +172,7 @@
Offset of the free_list's member. This value is used to compute the number
of free pages.
-Each zone has a free_area structure array called free_area[MAX_ORDER].
+Each zone has a free_area structure array called free_area[MAX_ORDER + 1].
The free_list represents a linked list of free page blocks.
(list_head, next|prev)
@@ -189,8 +189,8 @@
information. Makedumpfile gets the start address of the vmalloc region
from this.
-(zone.free_area, MAX_ORDER)
----------------------------
+(zone.free_area, MAX_ORDER + 1)
+-------------------------------
Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator.
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 19600c5..1ba8f2a 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -128,10 +128,11 @@
KVM Kernel Virtual Machine support is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
+ LOONGARCH LoongArch architecture is enabled.
LOOP Loopback device support is enabled.
M68k M68k architecture is enabled.
These options have more detailed description inside of
- Documentation/m68k/kernel-options.rst.
+ Documentation/arch/m68k/kernel-options.rst.
MDA MDA console support is enabled.
MIPS MIPS architecture is enabled.
MOUSE Appropriate mouse support is enabled.
@@ -177,7 +178,7 @@
X86-32 X86-32, aka i386 architecture is enabled.
X86-64 X86-64 architecture is enabled.
More X86-64 boot options can be found in
- Documentation/x86/x86_64/boot-options.rst.
+ Documentation/arch/x86/x86_64/boot-options.rst.
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
X86_UV SGI UV support is enabled.
XEN Xen support is enabled
@@ -192,10 +193,10 @@
Parameters denoted with BOOT are actually interpreted by the boot
loader, and have no meaning to the kernel directly.
Do not modify the syntax of boot loader parameters without extreme
-need or coordination with <Documentation/x86/boot.rst>.
+need or coordination with <Documentation/arch/x86/boot.rst>.
There are also arch-specific kernel-parameters not documented here.
-See for example <Documentation/x86/x86_64/boot-options.rst>.
+See for example <Documentation/arch/x86/x86_64/boot-options.rst>.
Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
a trailing = on the name of any parameter states that that parameter will
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d..9e5bab2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -339,6 +339,29 @@
This mode requires kvm-amd.avic=1.
(Default when IOMMU HW support is present.)
+ amd_pstate= [X86]
+ disable
+ Do not enable amd_pstate as the default
+ scaling driver for the supported processors
+ passive
+ Use amd_pstate with passive mode as a scaling driver.
+ In this mode autonomous selection is disabled.
+ Driver requests a desired performance level and platform
+ tries to match the same performance level if it is
+ satisfied by guaranteed performance level.
+ active
+ Use amd_pstate_epp driver instance as the scaling driver,
+ driver provides a hint to the hardware if software wants
+ to bias toward performance (0x0) or energy efficiency (0xff)
+ to the CPPC firmware. then CPPC power algorithm will
+ calculate the runtime workload and adjust the realtime cores
+ frequency.
+ guided
+ Activate guided autonomous mode. Driver requests minimum and
+ maximum performance level and the platform autonomously
+ selects a performance level in this range and appropriate
+ to the current workload.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
@@ -889,15 +912,14 @@
cs89x0_media= [HW,NET]
Format: { rj45 | aui | bnc }
- csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call
- handling. When switched on, additional debug data is
- printed to the console in case a hanging CPU is
- detected, and that CPU is pinged again in order to try
- to resolve the hang situation.
- 0: disable csdlock debugging (default)
- 1: enable basic csdlock debugging (minor impact)
- ext: enable extended csdlock debugging (more impact,
- but more data)
+ csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU
+ function call handling. When switched on,
+ additional debug data is printed to the console
+ in case a hanging CPU is detected, and that
+ CPU is pinged again in order to try to resolve
+ the hang situation. The default value of this
+ option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ Kconfig option.
dasd= [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
@@ -929,9 +951,6 @@
debug_objects [KNL] Enable object debugging
- no_debug_objects
- [KNL] Disable object debugging
-
debug_guardpage_minorder=
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
parameter allows control of the order of pages that will
@@ -1582,6 +1601,20 @@
dependencies. This only applies for fw_devlink=on|rpm.
Format: <bool>
+ fw_devlink.sync_state =
+ [KNL] When all devices that could probe have finished
+ probing, this parameter controls what to do with
+ devices that haven't yet received their sync_state()
+ calls.
+ Format: { strict | timeout }
+ strict -- Default. Continue waiting on consumers to
+ probe successfully.
+ timeout -- Give up waiting on consumers and call
+ sync_state() on any devices that haven't yet
+ received their sync_state() calls after
+ deferred_probe_timeout has expired or by
+ late_initcall() if !CONFIG_MODULES.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
@@ -2976,7 +3009,7 @@
mce [X86-32] Machine Check Exception
- mce=option [X86-64] See Documentation/x86/x86_64/boot-options.rst
+ mce=option [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
md= [HW] RAID subsystems devices and level
See Documentation/admin-guide/md.rst.
@@ -3184,9 +3217,6 @@
deep - Suspend-To-RAM or equivalent (if supported)
See Documentation/admin-guide/pm/sleep-states.rst.
- meye.*= [HW] Set MotionEye Camera parameters
- See Documentation/admin-guide/media/meye.rst.
-
mfgpt_irq= [IA-32] Specify the IRQ to use for the
Multi-Function General Purpose Timers on AMD Geode
platforms.
@@ -3332,6 +3362,12 @@
specified, <module>.async_probe takes precedence for
the specific module.
+ module.enable_dups_trace
+ [KNL] When CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS is set,
+ this means that duplicate request_module() calls will
+ trigger a WARN_ON() instead of a pr_warn(). Note that
+ if MODULE_DEBUG_AUTOLOAD_DUPS_TRACE is set, WARN_ON()s
+ will always be issued and this option does nothing.
module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load.
@@ -3428,14 +3464,13 @@
1 to enable accounting
Default value is 0.
- nfsaddrs= [NFS] Deprecated. Use ip= instead.
- See Documentation/admin-guide/nfs/nfsroot.rst.
+ nfs.cache_getent=
+ [NFS] sets the pathname to the program which is used
+ to update the NFS client cache entries.
- nfsroot= [NFS] nfs root filesystem for disk-less boxes.
- See Documentation/admin-guide/nfs/nfsroot.rst.
-
- nfsrootdebug [NFS] enable nfsroot debugging messages.
- See Documentation/admin-guide/nfs/nfsroot.rst.
+ nfs.cache_getent_timeout=
+ [NFS] sets the timeout after which an attempt to
+ update a cache entry is deemed to have failed.
nfs.callback_nr_threads=
[NFSv4] set the total number of threads that the
@@ -3446,18 +3481,6 @@
[NFS] set the TCP port on which the NFSv4 callback
channel should listen.
- nfs.cache_getent=
- [NFS] sets the pathname to the program which is used
- to update the NFS client cache entries.
-
- nfs.cache_getent_timeout=
- [NFS] sets the timeout after which an attempt to
- update a cache entry is deemed to have failed.
-
- nfs.idmap_cache_timeout=
- [NFS] set the maximum lifetime for idmapper cache
- entries.
-
nfs.enable_ino64=
[NFS] enable 64-bit inode numbers.
If zero, the NFS client will fake up a 32-bit inode
@@ -3465,6 +3488,10 @@
of returning the full 64-bit number.
The default is to return 64-bit inode numbers.
+ nfs.idmap_cache_timeout=
+ [NFS] set the maximum lifetime for idmapper cache
+ entries.
+
nfs.max_session_cb_slots=
[NFSv4.1] Sets the maximum number of session
slots the client will assign to the callback
@@ -3492,21 +3519,14 @@
will be autodetected by the client, and it will fall
back to using the idmapper.
To turn off this behaviour, set the value to '0'.
+
nfs.nfs4_unique_id=
[NFS4] Specify an additional fixed unique ident-
ification string that NFSv4 clients can insert into
their nfs_client_id4 string. This is typically a
UUID that is generated at system install time.
- nfs.send_implementation_id =
- [NFSv4.1] Send client implementation identification
- information in exchange_id requests.
- If zero, no implementation identification information
- will be sent.
- The default is to send the implementation identification
- information.
-
- nfs.recover_lost_locks =
+ nfs.recover_lost_locks=
[NFSv4] Attempt to recover locks that were lost due
to a lease timeout on the server. Please note that
doing this risks data corruption, since there are
@@ -3518,7 +3538,15 @@
The default parameter value of '0' causes the kernel
not to attempt recovery of lost locks.
- nfs4.layoutstats_timer =
+ nfs.send_implementation_id=
+ [NFSv4.1] Send client implementation identification
+ information in exchange_id requests.
+ If zero, no implementation identification information
+ will be sent.
+ The default is to send the implementation identification
+ information.
+
+ nfs4.layoutstats_timer=
[NFSv4.2] Change the rate at which the kernel sends
layoutstats to the pNFS metadata server.
@@ -3527,19 +3555,11 @@
driver. A non-zero value sets the minimum interval
in seconds between layoutstats transmissions.
- nfsd.inter_copy_offload_enable =
+ nfsd.inter_copy_offload_enable=
[NFSv4.2] When set to 1, the server will support
server-to-server copies for which this server is
the destination of the copy.
- nfsd.nfsd4_ssc_umount_timeout =
- [NFSv4.2] When used as the destination of a
- server-to-server copy, knfsd temporarily mounts
- the source server. It caches the mount in case
- it will be needed again, and discards it if not
- used for the number of milliseconds specified by
- this parameter.
-
nfsd.nfs4_disable_idmapping=
[NFSv4] When set to the default of '1', the NFSv4
server will return only numeric uids and gids to
@@ -3547,6 +3567,22 @@
and gids from such clients. This is intended to ease
migration from NFSv2/v3.
+ nfsd.nfsd4_ssc_umount_timeout=
+ [NFSv4.2] When used as the destination of a
+ server-to-server copy, knfsd temporarily mounts
+ the source server. It caches the mount in case
+ it will be needed again, and discards it if not
+ used for the number of milliseconds specified by
+ this parameter.
+
+ nfsaddrs= [NFS] Deprecated. Use ip= instead.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ nfsroot= [NFS] nfs root filesystem for disk-less boxes.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
+
+ nfsrootdebug [NFS] enable nfsroot debugging messages.
+ See Documentation/admin-guide/nfs/nfsroot.rst.
nmi_backtrace.backtrace_idle [KNL]
Dump stacks even of idle CPUs in response to an
@@ -3576,37 +3612,12 @@
emulation library even if a 387 maths coprocessor
is present.
- no5lvl [X86-64] Disable 5-level paging mode. Forces
+ no4lvl [RISCV] Disable 4-level and 5-level paging modes. Forces
+ kernel to use 3-level paging instead.
+
+ no5lvl [X86-64,RISCV] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
- nofsgsbase [X86] Disables FSGSBASE instructions.
-
- no_console_suspend
- [HW] Never suspend the console
- Disable suspending of consoles during suspend and
- hibernate operations. Once disabled, debugging
- messages can reach various consoles while the rest
- of the system is being put to sleep (ie, while
- debugging driver suspend/resume hooks). This may
- not work reliably with all consoles, but is known
- to work with serial and VGA consoles.
- To facilitate more flexible debugging, we also add
- console_suspend, a printk module parameter to control
- it. Users could use console_suspend (usually
- /sys/module/printk/parameters/console_suspend) to
- turn on/off it dynamically.
-
- novmcoredd [KNL,KDUMP]
- Disable device dump. Device dump allows drivers to
- append dump data to vmcore so you can collect driver
- specified debug info. Drivers can append the data
- without any limit and this data is stored in memory,
- so this may cause significant memory stress. Disabling
- device dump can help save memory but the driver debug
- data will be no longer available. This parameter
- is only available when CONFIG_PROC_VMCORE_DEVICE_DUMP
- is set.
-
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
caches in the slab allocator. Saves per-node memory,
but will impact performance.
@@ -3623,6 +3634,24 @@
nocache [ARM]
+ no_console_suspend
+ [HW] Never suspend the console
+ Disable suspending of consoles during suspend and
+ hibernate operations. Once disabled, debugging
+ messages can reach various consoles while the rest
+ of the system is being put to sleep (ie, while
+ debugging driver suspend/resume hooks). This may
+ not work reliably with all consoles, but is known
+ to work with serial and VGA consoles.
+ To facilitate more flexible debugging, we also add
+ console_suspend, a printk module parameter to control
+ it. Users could use console_suspend (usually
+ /sys/module/printk/parameters/console_suspend) to
+ turn on/off it dynamically.
+
+ no_debug_objects
+ [KNL] Disable object debugging
+
nodsp [SH] Disable hardware DSP at boot time.
noefi Disable EFI runtime services support.
@@ -3631,14 +3660,6 @@
noexec [IA-64]
- nosmap [PPC]
- Disable SMAP (Supervisor Mode Access Prevention)
- even if it is supported by processor.
-
- nosmep [PPC64s]
- Disable SMEP (Supervisor Mode Execution Prevention)
- even if it is supported by processor.
-
noexec32 [X86-64]
This affects only 32-bit executables.
noexec32=on: enable non-executable mappings (default)
@@ -3646,74 +3667,18 @@
noexec32=off: disable non-executable mappings
read implies executable mappings
+ no_file_caps Tells the kernel not to honor file capabilities. The
+ only way then for a file to be executed with privilege
+ is to be setuid root or executed by root.
+
nofpu [MIPS,SH] Disable hardware FPU at boot time.
+ nofsgsbase [X86] Disables FSGSBASE instructions.
+
nofxsr [BUGS=X86-32] Disables x86 floating point extended
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
- nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
-
- nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
-
- nosmt [KNL,S390] Disable symmetric multithreading (SMT).
- Equivalent to smt=1.
-
- [KNL,X86] Disable symmetric multithreading (SMT).
- nosmt=force: Force disable SMT, cannot be undone
- via the sysfs control file.
-
- nospectre_v1 [X86,PPC] Disable mitigations for Spectre Variant 1
- (bounds check bypass). With this option data leaks are
- possible in the system.
-
- nospectre_v2 [X86,PPC_E500,ARM64] Disable all mitigations for
- the Spectre variant 2 (indirect branch prediction)
- vulnerability. System may allow data leaks with this
- option.
-
- nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch
- history injection) vulnerability. System may allow data leaks
- with this option.
-
- nospec_store_bypass_disable
- [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
-
- no_uaccess_flush
- [PPC] Don't flush the L1-D cache after accessing user data.
-
- noxsave [BUGS=X86] Disables x86 extended register state save
- and restore using xsave. The kernel will fallback to
- enabling legacy floating-point and sse state.
-
- noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
- register states. The kernel will fall back to use
- xsave to save the states. By using this parameter,
- performance of saving the states is degraded because
- xsave doesn't support modified optimization while
- xsaveopt supports it on xsaveopt enabled systems.
-
- noxsaves [X86] Disables xsaves and xrstors used in saving and
- restoring x86 extended register state in compacted
- form of xsave area. The kernel will fall back to use
- xsaveopt and xrstor to save and restore the states
- in standard form of xsave area. By using this
- parameter, xsave area per process might occupy more
- memory on xsaves enabled systems.
-
- nohlt [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait
- in do_idle() and not use the arch_cpu_idle()
- implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
- to be effective. This is useful on platforms where the
- sleep(SH) or wfi(ARM,ARM64) instructions do not work
- correctly or when doing power measurements to evaluate
- the impact of the sleep instructions. This is also
- useful when using JTAG debugger.
-
- no_file_caps Tells the kernel not to honor file capabilities. The
- only way then for a file to be executed with privilege
- is to be setuid root or executed by root.
-
nohalt [IA-64] Tells the kernel not to use the power saving
function PAL_HALT_LIGHT when idle. This increases
power-consumption. On the positive side, it reduces
@@ -3737,6 +3702,19 @@
nohibernate [HIBERNATION] Disable hibernation and resume.
+ nohlt [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait
+ in do_idle() and not use the arch_cpu_idle()
+ implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
+ to be effective. This is useful on platforms where the
+ sleep(SH) or wfi(ARM,ARM64) instructions do not work
+ correctly or when doing power measurements to evaluate
+ the impact of the sleep instructions. This is also
+ useful when using JTAG debugger.
+
+ nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+
+ nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
+
nohz= [KNL] Boottime enable/disable dynamic ticks
Valid arguments: on, off
Default: on
@@ -3754,16 +3732,6 @@
Note that this argument takes precedence over
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
- noiotrap [SH] Disables trapped I/O port accesses.
-
- noirqdebug [X86-32] Disables the code which attempts to detect and
- disable unhandled interrupt sources.
-
- no_timer_check [X86,APIC] Disables the code which tests for
- broken timer IRQ sources.
-
- noisapnp [ISAPNP] Disables ISA PnP code.
-
noinitrd [RAM] Tells the kernel not to load any configured
initial RAM disk.
@@ -3775,6 +3743,13 @@
noinvpcid [X86] Disable the INVPCID cpu feature.
+ noiotrap [SH] Disables trapped I/O port accesses.
+
+ noirqdebug [X86-32] Disables the code which attempts to detect and
+ disable unhandled interrupt sources.
+
+ noisapnp [ISAPNP] Disables ISA PnP code.
+
nojitter [IA-64] Disables jitter checking for ITC timers.
nokaslr [KNL]
@@ -3782,18 +3757,10 @@
kernel and module base offset ASLR (Address Space
Layout Randomization).
- no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
-
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
fault handling.
- no-vmw-sched-clock
- [X86,PV_OPS] Disable paravirtualized VMware scheduler
- clock and use the default one.
-
- no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
- steal time accounting. steal time is computed, but
- won't influence scheduler behaviour
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
nolapic [X86-32,APIC] Do not enable or use the local APIC.
@@ -3806,10 +3773,6 @@
nomfgpt [X86-32] Disable Multi-Function General Purpose
Timer usage (for AMD Geode machines).
- nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
- shutdown the other cpus. Instead use the REBOOT_VECTOR
- irq.
-
nomodeset Disable kernel modesetting. Most systems' firmware
sets up a display mode and provides framebuffer memory
for output. With nomodeset, DRM and fbdev drivers will
@@ -3822,6 +3785,10 @@
nomodule Disable module load
+ nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
+ shutdown the other cpus. Instead use the REBOOT_VECTOR
+ irq.
+
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
@@ -3830,6 +3797,9 @@
nopku [X86] Disable Memory Protection Keys CPU feature found
in some Intel CPUs.
+ nopti [X86-64]
+ Equivalent to pti=off
+
nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support
@@ -3849,21 +3819,77 @@
noresume [SWSUSP] Disables resume and restores original swap
space.
+ nosbagart [IA-64]
+
no-scroll [VGA] Disables scrollback.
This is required for the Braillex ib80-piezo Braille
reader made by F.H. Papenmeier (Germany).
- nosbagart [IA-64]
-
nosgx [X86-64,SGX] Disables Intel SGX kernel support.
+ nosmap [PPC]
+ Disable SMAP (Supervisor Mode Access Prevention)
+ even if it is supported by processor.
+
+ nosmep [PPC64s]
+ Disable SMEP (Supervisor Mode Execution Prevention)
+ even if it is supported by processor.
+
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
+ nosmt [KNL,S390] Disable symmetric multithreading (SMT).
+ Equivalent to smt=1.
+
+ [KNL,X86] Disable symmetric multithreading (SMT).
+ nosmt=force: Force disable SMT, cannot be undone
+ via the sysfs control file.
+
nosoftlockup [KNL] Disable the soft-lockup detector.
+ nospec_store_bypass_disable
+ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
+ nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch
+ history injection) vulnerability. System may allow data leaks
+ with this option.
+
+ nospectre_v1 [X86,PPC] Disable mitigations for Spectre Variant 1
+ (bounds check bypass). With this option data leaks are
+ possible in the system.
+
+ nospectre_v2 [X86,PPC_E500,ARM64] Disable all mitigations for
+ the Spectre variant 2 (indirect branch prediction)
+ vulnerability. System may allow data leaks with this
+ option.
+
+ no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
+ steal time accounting. steal time is computed, but
+ won't influence scheduler behaviour
+
nosync [HW,M68K] Disables sync negotiation for all devices.
+ no_timer_check [X86,APIC] Disables the code which tests for
+ broken timer IRQ sources.
+
+ no_uaccess_flush
+ [PPC] Don't flush the L1-D cache after accessing user data.
+
+ novmcoredd [KNL,KDUMP]
+ Disable device dump. Device dump allows drivers to
+ append dump data to vmcore so you can collect driver
+ specified debug info. Drivers can append the data
+ without any limit and this data is stored in memory,
+ so this may cause significant memory stress. Disabling
+ device dump can help save memory but the driver debug
+ data will be no longer available. This parameter
+ is only available when CONFIG_PROC_VMCORE_DEVICE_DUMP
+ is set.
+
+ no-vmw-sched-clock
+ [X86,PV_OPS] Disable paravirtualized VMware scheduler
+ clock and use the default one.
+
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
@@ -3875,6 +3901,25 @@
LEGACY_XAPIC_DISABLED bit set in the
IA32_XAPIC_DISABLE_STATUS MSR.
+ noxsave [BUGS=X86] Disables x86 extended register state save
+ and restore using xsave. The kernel will fallback to
+ enabling legacy floating-point and sse state.
+
+ noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
+ register states. The kernel will fall back to use
+ xsave to save the states. By using this parameter,
+ performance of saving the states is degraded because
+ xsave doesn't support modified optimization while
+ xsaveopt supports it on xsaveopt enabled systems.
+
+ noxsaves [X86] Disables xsaves and xrstors used in saving and
+ restoring x86 extended register state in compacted
+ form of xsave area. The kernel will fall back to use
+ xsaveopt and xrstor to save and restore the states
+ in standard form of xsave area. By using this
+ parameter, xsave area per process might occupy more
+ memory on xsaves enabled systems.
+
nps_mtm_hs_ctr= [KNL,ARC]
This parameter sets the maximum duration, in
cycles, each HW thread of the CTOP can run
@@ -3969,7 +4014,7 @@
[KNL] Minimal page reporting order
Format: <integer>
Adjust the minimal page reporting order. The page
- reporting is disabled when it exceeds (MAX_ORDER-1).
+ reporting is disabled when it exceeds MAX_ORDER.
panic= [KNL] Kernel behaviour on panic: delay <timeout>
timeout > 0: seconds before rebooting
@@ -4410,7 +4455,7 @@
and performance comparison.
pirq= [SMP,APIC] Manual mp-table setup
- See Documentation/x86/i386/IO-APIC.rst.
+ See Documentation/arch/x86/i386/IO-APIC.rst.
plip= [PPT,NET] Parallel port network link
Format: { parport<nr> | timid | 0 }
@@ -4582,9 +4627,6 @@
Not specifying this option is equivalent to pti=auto.
- nopti [X86-64]
- Equivalent to pti=off
-
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
@@ -5591,7 +5633,7 @@
serialnumber [BUGS=X86-32]
- sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst
+ sev=option[,option...] [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
shapers= [NET]
Maximal number of shapers.
@@ -6130,15 +6172,6 @@
later by a loaded module cannot be set this way.
Example: sysctl.vm.swappiness=40
- sysfs.deprecated=0|1 [KNL]
- Enable/disable old style sysfs layout for old udev
- on older distributions. When this option is enabled
- very new udev will not work anymore. When this option
- is disabled (or CONFIG_SYSFS_DEPRECATED not compiled)
- in older udev will not work anymore.
- Default depends on CONFIG_SYSFS_DEPRECATED_V2 set in
- the kernel configuration.
-
sysrq_always_enabled
[KNL]
Ignore sysrq setting - this boot parameter will
@@ -6770,7 +6803,7 @@
Can be used multiple times for multiple devices.
vga= [BOOT,X86-32] Select a particular video mode
- See Documentation/x86/boot.rst and
+ See Documentation/arch/x86/boot.rst and
Documentation/admin-guide/svga.rst.
Use vga=ask for menu.
This is actually a boot loader parameter; the value is
@@ -6933,6 +6966,12 @@
When enabled, memory and cache locality will be
impacted.
+ writecombine= [LOONGARCH] Control the MAT (Memory Access Type) of
+ ioremap_wc().
+
+ on - Enable writecombine, use WUC for ioremap_wc()
+ off - Disable writecombine, use SUC for ioremap_wc()
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
@@ -7059,20 +7098,3 @@
xmon commands.
off xmon is disabled.
- amd_pstate= [X86]
- disable
- Do not enable amd_pstate as the default
- scaling driver for the supported processors
- passive
- Use amd_pstate as a scaling driver, driver requests a
- desired performance on this abstract scale and the power
- management firmware translates the requests into actual
- hardware states (core frequency, data fabric and memory
- clocks etc.)
- active
- Use amd_pstate_epp driver instance as the scaling driver,
- driver provides a hint to the hardware if software wants
- to bias toward performance (0x0) or energy efficiency (0xff)
- to the CPPC firmware. then CPPC power algorithm will
- calculate the runtime workload and adjust the realtime cores
- frequency.
diff --git a/Documentation/admin-guide/media/cec.rst b/Documentation/admin-guide/media/cec.rst
index 14ec3ff..6b30e35 100644
--- a/Documentation/admin-guide/media/cec.rst
+++ b/Documentation/admin-guide/media/cec.rst
@@ -55,6 +55,15 @@
you can control the CEC line through this driver. This supports error
injection as well.
+- cec-gpio and Allwinner A10 (or any other driver that uses the CEC pin
+ framework to drive the CEC pin directly): the CEC pin framework uses
+ high-resolution timers. These timers are affected by NTP daemons that
+ speed up or slow down the clock to sync with the official time. The
+ chronyd server will by default increase or decrease the clock by
+ 1/12th. This will cause the CEC timings to go out of spec. To fix this,
+ add a 'maxslewrate 40000' line to chronyd.conf. This limits the clock
+ frequency change to 1/25th, which keeps the CEC timings within spec.
+
Utilities
=========
@@ -296,69 +305,71 @@
Making a CEC debugger
=====================
-By using a Raspberry Pi 2B/3/4 and some cheap components you can make
+By using a Raspberry Pi 4B and some cheap components you can make
your own low-level CEC debugger.
-Here is a picture of my setup:
-
-https://hverkuil.home.xs4all.nl/rpi3-cec.jpg
-
-It's a Raspberry Pi 3 together with a breadboard and some breadboard wires:
-
-http://www.dx.com/p/diy-40p-male-to-female-male-to-male-female-to-female-dupont-line-wire-3pcs-356089#.WYLOOXWGN7I
-
-Finally on of these HDMI female-female passthrough connectors (full soldering type 1):
+The critical component is one of these HDMI female-female passthrough connectors
+(full soldering type 1):
https://elabbay.myshopify.com/collections/camera/products/hdmi-af-af-v1a-hdmi-type-a-female-to-hdmi-type-a-female-pass-through-adapter-breakout-board?variant=45533926147
-We've tested this and it works up to 4kp30 (297 MHz). The quality is not high
-enough to pass-through 4kp60 (594 MHz).
+The video quality is variable and certainly not enough to pass-through 4kp60
+(594 MHz) video. You might be able to support 4kp30, but more likely you will
+be limited to 1080p60 (148.5 MHz). But for CEC testing that is fine.
-I also added an RTC and a breakout shield:
+You need a breadboard and some breadboard wires:
-https://www.amazon.com/Makerfire%C2%AE-Raspberry-Module-DS1307-Battery/dp/B00ZOXWHK4
+http://www.dx.com/p/diy-40p-male-to-female-male-to-male-female-to-female-dupont-line-wire-3pcs-356089#.WYLOOXWGN7I
-https://www.dx.com/p/raspberry-pi-gpio-expansion-board-breadboard-easy-multiplexing-board-one-to-three-with-screw-for-raspberry-pi-2-3-b-b-2729992.html#.YGRCG0MzZ7I
-
-These two are not needed but they make life a bit easier.
-
-If you want to monitor the HPD line as well, then you need one of these
-level shifters:
+If you want to monitor the HPD and/or 5V lines as well, then you need one of
+these 5V to 3.3V level shifters:
https://www.adafruit.com/product/757
(This is just where I got these components, there are many other places you
can get similar things).
-The CEC pin of the HDMI connector needs to be connected to these pins:
-CE0/IO8 and CE1/IO7 (pull-up GPIOs). The (optional) HPD pin of the HDMI
-connector should be connected (via a level shifter to convert the 5V
-to 3.3V) to these pins: IO17 and IO27. The (optional) 5V pin of the HDMI
-connector should be connected (via a level shifter) to these pins: IO22
-and IO24. Monitoring the HPD an 5V lines is not necessary, but it is helpful.
+The ground pin of the HDMI connector needs to be connected to a ground
+pin of the Raspberry Pi, of course.
-This kernel patch will hook up the cec-gpio driver correctly to
-e.g. ``arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts``::
+The CEC pin of the HDMI connector needs to be connected to these pins:
+GPIO 6 and GPIO 7. The optional HPD pin of the HDMI connector should
+be connected via the level shifter to these pins: GPIO 23 and GPIO 12.
+The optional 5V pin of the HDMI connector should be connected via the
+level shifter to these pins: GPIO 25 and GPIO 22. Monitoring the HPD and
+5V lines is not necessary, but it is helpful.
+
+This device tree addition in ``arch/arm/boot/dts/bcm2711-rpi-4-b.dts``
+will hook up the cec-gpio driver correctly::
+
+ cec@6 {
+ compatible = "cec-gpio";
+ cec-gpios = <&gpio 6 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+ hpd-gpios = <&gpio 23 GPIO_ACTIVE_HIGH>;
+ v5-gpios = <&gpio 25 GPIO_ACTIVE_HIGH>;
+ };
cec@7 {
compatible = "cec-gpio";
cec-gpios = <&gpio 7 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
- hpd-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;
+ hpd-gpios = <&gpio 12 GPIO_ACTIVE_HIGH>;
v5-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
};
- cec@8 {
- compatible = "cec-gpio";
- cec-gpios = <&gpio 8 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
- hpd-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
- v5-gpios = <&gpio 24 GPIO_ACTIVE_HIGH>;
- };
+If you haven't hooked up the HPD and/or 5V lines, then just delete those
+lines.
This dts change will enable two cec GPIO devices: I typically use one to
send/receive CEC commands and the other to monitor. If you monitor using
an unconfigured CEC adapter then it will use GPIO interrupts which makes
monitoring very accurate.
+If you just want to monitor traffic, then a single instance is sufficient.
+The minimum configuration is one HDMI female-female passthrough connector
+and two female-female breadboard wires: one for connecting the HDMI ground
+pin to a ground pin on the Raspberry Pi, and the other to connect the HDMI
+CEC pin to GPIO 6 on the Raspberry Pi.
+
The documentation on how to use the error injection is here: :ref:`cec_pin_error_inj`.
``cec-ctl --monitor-pin`` will do low-level CEC bus sniffing and analysis.
diff --git a/Documentation/admin-guide/media/i2c-cardlist.rst b/Documentation/admin-guide/media/i2c-cardlist.rst
index ef3b5ff..1825a0b 100644
--- a/Documentation/admin-guide/media/i2c-cardlist.rst
+++ b/Documentation/admin-guide/media/i2c-cardlist.rst
@@ -72,17 +72,13 @@
imx334 Sony IMX334 sensor
imx355 Sony IMX355 sensor
imx412 Sony IMX412 sensor
-m5mols Fujitsu M-5MOLS 8MP sensor
mt9m001 mt9m001
-mt9m032 MT9M032 camera sensor
mt9m111 mt9m111, mt9m112 and mt9m131
mt9p031 Aptina MT9P031
-mt9t001 Aptina MT9T001
mt9t112 Aptina MT9T111/MT9T112
mt9v011 Micron mt9v011 sensor
mt9v032 Micron MT9V032 sensor
mt9v111 Aptina MT9V111 sensor
-noon010pc30 Siliconfile NOON010PC30 sensor
ov13858 OmniVision OV13858 sensor
ov13b10 OmniVision OV13B10 sensor
ov2640 OmniVision OV2640 sensor
@@ -109,9 +105,6 @@
s5k4ecgx Samsung S5K4ECGX sensor
s5k5baf Samsung S5K5BAF sensor
s5k6a3 Samsung S5K6A3 sensor
-s5k6aa Samsung S5K6AAFX sensor
-sr030pc30 Siliconfile SR030PC30 sensor
-vs6624 ST VS6624 sensor
============ ==========================================================
Flash devices
@@ -222,7 +215,6 @@
============ ==========================================================
Driver Name
============ ==========================================================
-ad9389b Analog Devices AD9389B encoder
adv7170 Analog Devices ADV7170 video encoder
adv7175 Analog Devices ADV7175 video encoder
adv7343 ADV7343 video encoder
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index eed51a91..7626392 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -20,7 +20,7 @@
is automatically copied if a process later wants to update its
content). The amount of pages that KSM daemon scans in a single pass
and the time between the passes are configured using :ref:`sysfs
-intraface <ksm_sysfs>`
+interface <ksm_sysfs>`
KSM only merges anonymous (private) pages, never pagecache (file) pages.
KSM's merged pages were originally locked into kernel memory, but can now
@@ -157,6 +157,8 @@
The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
+general_profit
+ how effective is KSM. The calculation is explained below.
pages_shared
how many shared pages are being used
pages_sharing
@@ -207,7 +209,8 @@
ksm_rmap_items * sizeof(rmap_item).
where ksm_merging_pages is shown under the directory ``/proc/<pid>/``,
- and ksm_rmap_items is shown in ``/proc/<pid>/ksm_stat``.
+ and ksm_rmap_items is shown in ``/proc/<pid>/ksm_stat``. The process profit
+ is also shown in ``/proc/<pid>/ksm_stat`` as ksm_process_profit.
From the perspective of application, a high ratio of ``ksm_rmap_items`` to
``ksm_merging_pages`` means a bad madvise-applied policy, so developers or
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index b5f970d..c8f3802 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -91,9 +91,9 @@
The page is being locked for exclusive access, e.g. by undergoing read/write
IO.
7 - SLAB
- The page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator.
- When compound page is used, SLUB/SLQB will only set this flag on the head
- page; SLOB will not flag it at all.
+ The page is managed by the SLAB/SLUB kernel memory allocator.
+ When compound page is used, either will only set this flag on the head
+ page.
10 - BUDDY
A free memory block managed by the buddy system allocator.
The buddy system organizes free memory in blocks of various orders.
diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index 7dc823b..7c304e4 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -219,6 +219,31 @@
you still need to supply a page when ``UFFDIO_REGISTER_MODE_MISSING`` was
used.
+Userfaultfd write-protect mode currently behave differently on none ptes
+(when e.g. page is missing) over different types of memories.
+
+For anonymous memory, ``ioctl(UFFDIO_WRITEPROTECT)`` will ignore none ptes
+(e.g. when pages are missing and not populated). For file-backed memories
+like shmem and hugetlbfs, none ptes will be write protected just like a
+present pte. In other words, there will be a userfaultfd write fault
+message generated when writing to a missing page on file typed memories,
+as long as the page range was write-protected before. Such a message will
+not be generated on anonymous memories by default.
+
+If the application wants to be able to write protect none ptes on anonymous
+memory, one can pre-populate the memory with e.g. MADV_POPULATE_READ. On
+newer kernels, one can also detect the feature UFFD_FEATURE_WP_UNPOPULATED
+and set the feature bit in advance to make sure none ptes will also be
+write protected even upon anonymous memory.
+
+When using ``UFFDIO_REGISTER_MODE_WP`` in combination with either
+``UFFDIO_REGISTER_MODE_MISSING`` or ``UFFDIO_REGISTER_MODE_MINOR``, when
+resolving missing / minor faults with ``UFFDIO_COPY`` or ``UFFDIO_CONTINUE``
+respectively, it may be desirable for the new page / mapping to be
+write-protected (so future writes will also result in a WP fault). These ioctls
+support a mode flag (``UFFDIO_COPY_MODE_WP`` or ``UFFDIO_CONTINUE_MODE_WP``
+respectively) to configure the mapping this way.
+
QEMU/KVM
========
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 6e5298b..1cf40f6 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -303,13 +303,18 @@
AMD Pstate Driver Operation Modes
=================================
-``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and
-CPPC non-autonomous(passive) mode.
-active mode and passive mode can be chosen by different kernel parameters.
-When in Autonomous mode, CPPC ignores requests done in the Desired Performance
-Target register and takes into account only the values set to the Minimum requested
-performance, Maximum requested performance, and Energy Performance Preference
-registers. When Autonomous is disabled, it only considers the Desired Performance Target.
+``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
+non-autonomous (passive) mode and guided autonomous (guided) mode.
+Active/passive/guided mode can be chosen by different kernel parameters.
+
+- In autonomous mode, platform ignores the desired performance level request
+ and takes into account only the values set to the minimum, maximum and energy
+ performance preference registers.
+- In non-autonomous mode, platform gets desired performance level
+ from OS directly through Desired Performance Register.
+- In guided-autonomous mode, platform sets operating performance level
+ autonomously according to the current workload and within the limits set by
+ OS through min and max performance registers.
Active Mode
------------
@@ -338,6 +343,15 @@
processor must provide at least nominal performance requested and go higher if current
operating conditions allow.
+Guided Mode
+-----------
+
+``amd_pstate=guided``
+
+If ``amd_pstate=guided`` is passed to kernel command line option then this mode
+is activated. In this mode, driver requests minimum and maximum performance
+level and the platform autonomously selects a performance level in this range
+and appropriate to the current workload.
User Space Interface in ``sysfs`` - General
===========================================
@@ -358,6 +372,9 @@
"passive"
The driver is functional and in the ``passive mode``
+ "guided"
+ The driver is functional and in the ``guided mode``
+
"disable"
The driver is unregistered and not functional now.
diff --git a/Documentation/admin-guide/quickly-build-trimmed-linux.rst b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
new file mode 100644
index 0000000..f08149b
--- /dev/null
+++ b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
@@ -0,0 +1,1097 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+===========================================
+How to quickly build a trimmed Linux kernel
+===========================================
+
+This guide explains how to swiftly build Linux kernels that are ideal for
+testing purposes, but perfectly fine for day-to-day use, too.
+
+The essence of the process (aka 'TL;DR')
+========================================
+
+*[If you are new to compiling Linux, ignore this TLDR and head over to the next
+section below: it contains a step-by-step guide, which is more detailed, but
+still brief and easy to follow; that guide and its accompanying reference
+section also mention alternatives, pitfalls, and additional aspects, all of
+which might be relevant for you.]*
+
+If your system uses techniques like Secure Boot, prepare it to permit starting
+self-compiled Linux kernels; install compilers and everything else needed for
+building Linux; make sure to have 12 Gigabyte free space in your home directory.
+Now run the following commands to download fresh Linux mainline sources, which
+you then use to configure, build and install your own kernel::
+
+ git clone --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git ~/linux/
+ cd ~/linux/
+ # Hint: if you want to apply patches, do it at this point. See below for details.
+ # Hint: it's recommended to tag your build at this point. See below for details.
+ yes "" | make localmodconfig
+ # Hint: at this point you might want to adjust the build configuration; you'll
+ # have to, if you are running Debian. See below for details.
+ make -j $(nproc --all)
+ # Note: on many commodity distributions the next command suffices, but on Arch
+ # Linux, its derivatives, and some others it does not. See below for details.
+ command -v installkernel && sudo make modules_install install
+ reboot
+
+If you later want to build a newer mainline snapshot, use these commands::
+
+ cd ~/linux/
+ git fetch --depth 1 origin
+ # Note: the next command will discard any changes you did to the code:
+ git checkout --force --detach origin/master
+ # Reminder: if you want to (re)apply patches, do it at this point.
+ # Reminder: you might want to add or modify a build tag at this point.
+ make olddefconfig
+ make -j $(nproc --all)
+ # Reminder: the next command on some distributions does not suffice.
+ command -v installkernel && sudo make modules_install install
+ reboot
+
+Step-by-step guide
+==================
+
+Compiling your own Linux kernel is easy in principle. There are various ways to
+do it. Which of them actually work and is the best depends on the circumstances.
+
+This guide describes a way perfectly suited for those who want to quickly
+install Linux from sources without being bothered by complicated details; the
+goal is to cover everything typically needed on mainstream Linux distributions
+running on commodity PC or server hardware.
+
+The described approach is great for testing purposes, for example to try a
+proposed fix or to check if a problem was already fixed in the latest codebase.
+Nonetheless, kernels built this way are also totally fine for day-to-day use
+while at the same time being easy to keep up to date.
+
+The following steps describe the important aspects of the process; a
+comprehensive reference section later explains each of them in more detail. It
+sometimes also describes alternative approaches, pitfalls, as well as errors
+that might occur at a particular point -- and how to then get things rolling
+again.
+
+..
+ Note: if you see this note, you are reading the text's source file. You
+ might want to switch to a rendered version, as it makes it a lot easier to
+ quickly look something up in the reference section and afterwards jump back
+ to where you left off. Find a the latest rendered version here:
+ https://docs.kernel.org/admin-guide/quickly-build-trimmed-linux.html
+
+.. _backup_sbs:
+
+ * Create a fresh backup and put system repair and restore tools at hand, just
+ to be prepared for the unlikely case of something going sideways.
+
+ [:ref:`details<backup>`]
+
+.. _secureboot_sbs:
+
+ * On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later. The
+ quickest and easiest way to achieve this on commodity x86 systems is to
+ disable such techniques in the BIOS setup utility; alternatively, remove
+ their restrictions through a process initiated by
+ ``mokutil --disable-validation``.
+
+ [:ref:`details<secureboot>`]
+
+.. _buildrequires_sbs:
+
+ * Install all software required to build a Linux kernel. Often you will need:
+ 'bc', 'binutils' ('ld' et al.), 'bison', 'flex', 'gcc', 'git', 'openssl',
+ 'pahole', 'perl', and the development headers for 'libelf' and 'openssl'. The
+ reference section shows how to quickly install those on various popular Linux
+ distributions.
+
+ [:ref:`details<buildrequires>`]
+
+.. _diskspace_sbs:
+
+ * Ensure to have enough free space for building and installing Linux. For the
+ latter 150 Megabyte in /lib/ and 100 in /boot/ are a safe bet. For storing
+ sources and build artifacts 12 Gigabyte in your home directory should
+ typically suffice. If you have less available, be sure to check the reference
+ section for the step that explains adjusting your kernels build
+ configuration: it mentions a trick that reduce the amount of required space
+ in /home/ to around 4 Gigabyte.
+
+ [:ref:`details<diskspace>`]
+
+.. _sources_sbs:
+
+ * Retrieve the sources of the Linux version you intend to build; then change
+ into the directory holding them, as all further commands in this guide are
+ meant to be executed from there.
+
+ *[Note: the following paragraphs describe how to retrieve the sources by
+ partially cloning the Linux stable git repository. This is called a shallow
+ clone. The reference section explains two alternatives:* :ref:`packaged
+ archives<sources_archive>` *and* :ref:`a full git clone<sources_full>` *;
+ prefer the latter, if downloading a lot of data does not bother you, as that
+ will avoid some* :ref:`peculiar characteristics of shallow clones the
+ reference section explains<sources_shallow>` *.]*
+
+ First, execute the following command to retrieve a fresh mainline codebase::
+
+ git clone --no-checkout --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git ~/linux/
+ cd ~/linux/
+
+ If you want to access recent mainline releases and pre-releases, deepen you
+ clone's history to the oldest mainline version you are interested in::
+
+ git fetch --shallow-exclude=v6.0 origin
+
+ In case you want to access a stable/longterm release (say v6.1.5), simply add
+ the branch holding that series; afterwards fetch the history at least up to
+ the mainline version that started the series (v6.1)::
+
+ git remote set-branches --add origin linux-6.1.y
+ git fetch --shallow-exclude=v6.0 origin
+
+ Now checkout the code you are interested in. If you just performed the
+ initial clone, you will be able to check out a fresh mainline codebase, which
+ is ideal for checking whether developers already fixed an issue::
+
+ git checkout --detach origin/master
+
+ If you deepened your clone, you instead of ``origin/master`` can specify the
+ version you deepened to (``v6.0`` above); later releases like ``v6.1`` and
+ pre-release like ``v6.2-rc1`` will work, too. Stable or longterm versions
+ like ``v6.1.5`` work just the same, if you added the appropriate
+ stable/longterm branch as described.
+
+ [:ref:`details<sources>`]
+
+.. _patching_sbs:
+
+ * In case you want to apply a kernel patch, do so now. Often a command like
+ this will do the trick::
+
+ patch -p1 < ../proposed-fix.patch
+
+ If the ``-p1`` is actually needed, depends on how the patch was created; in
+ case it does not apply thus try without it.
+
+ If you cloned the sources with git and anything goes sideways, run ``git
+ reset --hard`` to undo any changes to the sources.
+
+ [:ref:`details<patching>`]
+
+.. _tagging_sbs:
+
+ * If you patched your kernel or have one of the same version installed already,
+ better add a unique tag to the one you are about to build::
+
+ echo "-proposed_fix" > localversion
+
+ Running ``uname -r`` under your kernel later will then print something like
+ '6.1-rc4-proposed_fix'.
+
+ [:ref:`details<tagging>`]
+
+ .. _configuration_sbs:
+
+ * Create the build configuration for your kernel based on an existing
+ configuration.
+
+ If you already prepared such a '.config' file yourself, copy it to
+ ~/linux/ and run ``make olddefconfig``.
+
+ Use the same command, if your distribution or somebody else already tailored
+ your running kernel to your or your hardware's needs: the make target
+ 'olddefconfig' will then try to use that kernel's .config as base.
+
+ Using this make target is fine for everybody else, too -- but you often can
+ save a lot of time by using this command instead::
+
+ yes "" | make localmodconfig
+
+ This will try to pick your distribution's kernel as base, but then disable
+ modules for any features apparently superfluous for your setup. This will
+ reduce the compile time enormously, especially if you are running an
+ universal kernel from a commodity Linux distribution.
+
+ There is a catch: 'localmodconfig' is likely to disable kernel features you
+ did not use since you booted your Linux -- like drivers for currently
+ disconnected peripherals or a virtualization software not haven't used yet.
+ You can reduce or nearly eliminate that risk with tricks the reference
+ section outlines; but when building a kernel just for quick testing purposes
+ it is often negligible if such features are missing. But you should keep that
+ aspect in mind when using a kernel built with this make target, as it might
+ be the reason why something you only use occasionally stopped working.
+
+ [:ref:`details<configuration>`]
+
+.. _configmods_sbs:
+
+ * Check if you might want to or have to adjust some kernel configuration
+ options:
+
+ * Evaluate how you want to handle debug symbols. Enable them, if you later
+ might need to decode a stack trace found for example in a 'panic', 'Oops',
+ 'warning', or 'BUG'; on the other hand disable them, if you are short on
+ storage space or prefer a smaller kernel binary. See the reference section
+ for details on how to do either. If neither applies, it will likely be fine
+ to simply not bother with this. [:ref:`details<configmods_debugsymbols>`]
+
+ * Are you running Debian? Then to avoid known problems by performing
+ additional adjustments explained in the reference section.
+ [:ref:`details<configmods_distros>`].
+
+ * If you want to influence the other aspects of the configuration, do so now
+ by using make targets like 'menuconfig' or 'xconfig'.
+ [:ref:`details<configmods_individual>`].
+
+.. _build_sbs:
+
+ * Build the image and the modules of your kernel::
+
+ make -j $(nproc --all)
+
+ If you want your kernel packaged up as deb, rpm, or tar file, see the
+ reference section for alternatives.
+
+ [:ref:`details<build>`]
+
+.. _install_sbs:
+
+ * Now install your kernel::
+
+ command -v installkernel && sudo make modules_install install
+
+ Often all left for you to do afterwards is a ``reboot``, as many commodity
+ Linux distributions will then create an initramfs (also known as initrd) and
+ an entry for your kernel in your bootloader's configuration; but on some
+ distributions you have to take care of these two steps manually for reasons
+ the reference section explains.
+
+ On a few distributions like Arch Linux and its derivatives the above command
+ does nothing at all; in that case you have to manually install your kernel,
+ as outlined in the reference section.
+
+ If you are running a immutable Linux distribution, check its documentation
+ and the web to find out how to install your own kernel there.
+
+ [:ref:`details<install>`]
+
+.. _another_sbs:
+
+ * To later build another kernel you need similar steps, but sometimes slightly
+ different commands.
+
+ First, switch back into the sources tree::
+
+ cd ~/linux/
+
+ In case you want to build a version from a stable/longterm series you have
+ not used yet (say 6.2.y), tell git to track it::
+
+ git remote set-branches --add origin linux-6.2.y
+
+ Now fetch the latest upstream changes; you again need to specify the earliest
+ version you care about, as git otherwise might retrieve the entire commit
+ history::
+
+ git fetch --shallow-exclude=v6.0 origin
+
+ Now switch to the version you are interested in -- but be aware the command
+ used here will discard any modifications you performed, as they would
+ conflict with the sources you want to checkout::
+
+ git checkout --force --detach origin/master
+
+ At this point you might want to patch the sources again or set/modify a build
+ tag, as explained earlier. Afterwards adjust the build configuration to the
+ new codebase using olddefconfig, which will now adjust the configuration file
+ you prepared earlier using localmodconfig (~/linux/.config) for your next
+ kernel::
+
+ # reminder: if you want to apply patches, do it at this point
+ # reminder: you might want to update your build tag at this point
+ make olddefconfig
+
+ Now build your kernel::
+
+ make -j $(nproc --all)
+
+ Afterwards install the kernel as outlined above::
+
+ command -v installkernel && sudo make modules_install install
+
+ [:ref:`details<another>`]
+
+.. _uninstall_sbs:
+
+ * Your kernel is easy to remove later, as its parts are only stored in two
+ places and clearly identifiable by the kernel's release name. Just ensure to
+ not delete the kernel you are running, as that might render your system
+ unbootable.
+
+ Start by deleting the directory holding your kernel's modules, which is named
+ after its release name -- '6.0.1-foobar' in the following example::
+
+ sudo rm -rf /lib/modules/6.0.1-foobar
+
+ Now try the following command, which on some distributions will delete all
+ other kernel files installed while also removing the kernel's entry from the
+ bootloader configuration::
+
+ command -v kernel-install && sudo kernel-install -v remove 6.0.1-foobar
+
+ If that command does not output anything or fails, see the reference section;
+ do the same if any files named '*6.0.1-foobar*' remain in /boot/.
+
+ [:ref:`details<uninstall>`]
+
+.. _submit_improvements:
+
+Did you run into trouble following any of the above steps that is not cleared up
+by the reference section below? Or do you have ideas how to improve the text?
+Then please take a moment of your time and let the maintainer of this document
+know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while CCing the
+Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is vital to
+improve this document further, which is in everybody's interest, as it will
+enable more people to master the task described here.
+
+Reference section for the step-by-step guide
+============================================
+
+This section holds additional information for each of the steps in the above
+guide.
+
+.. _backup:
+
+Prepare for emergencies
+-----------------------
+
+ *Create a fresh backup and put system repair and restore tools at hand*
+ [:ref:`... <backup_sbs>`]
+
+Remember, you are dealing with computers, which sometimes do unexpected things
+-- especially if you fiddle with crucial parts like the kernel of an operating
+system. That's what you are about to do in this process. Hence, better prepare
+for something going sideways, even if that should not happen.
+
+[:ref:`back to step-by-step guide <backup_sbs>`]
+
+.. _secureboot:
+
+Dealing with techniques like Secure Boot
+----------------------------------------
+
+ *On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later.*
+ [:ref:`... <secureboot_sbs>`]
+
+Many modern systems allow only certain operating systems to start; they thus by
+default will reject booting self-compiled kernels.
+
+You ideally deal with this by making your platform trust your self-built kernels
+with the help of a certificate and signing. How to do that is not described
+here, as it requires various steps that would take the text too far away from
+its purpose; 'Documentation/admin-guide/module-signing.rst' and various web
+sides already explain this in more detail.
+
+Temporarily disabling solutions like Secure Boot is another way to make your own
+Linux boot. On commodity x86 systems it is possible to do this in the BIOS Setup
+utility; the steps to do so are not described here, as they greatly vary between
+machines.
+
+On mainstream x86 Linux distributions there is a third and universal option:
+disable all Secure Boot restrictions for your Linux environment. You can
+initiate this process by running ``mokutil --disable-validation``; this will
+tell you to create a one-time password, which is safe to write down. Now
+restart; right after your BIOS performed all self-tests the bootloader Shim will
+show a blue box with a message 'Press any key to perform MOK management'. Hit
+some key before the countdown exposes. This will open a menu and choose 'Change
+Secure Boot state' there. Shim's 'MokManager' will now ask you to enter three
+randomly chosen characters from the one-time password specified earlier. Once
+you provided them, confirm that you really want to disable the validation.
+Afterwards, permit MokManager to reboot the machine.
+
+[:ref:`back to step-by-step guide <secureboot_sbs>`]
+
+.. _buildrequires:
+
+Install build requirements
+--------------------------
+
+ *Install all software required to build a Linux kernel.*
+ [:ref:`...<buildrequires_sbs>`]
+
+The kernel is pretty stand-alone, but besides tools like the compiler you will
+sometimes need a few libraries to build one. How to install everything needed
+depends on your Linux distribution and the configuration of the kernel you are
+about to build.
+
+Here are a few examples what you typically need on some mainstream
+distributions:
+
+ * Debian, Ubuntu, and derivatives::
+
+ sudo apt install bc binutils bison dwarves flex gcc git make openssl \
+ pahole perl-base libssl-dev libelf-dev
+
+ * Fedora and derivatives::
+
+ sudo dnf install binutils /usr/include/{libelf.h,openssl/pkcs7.h} \
+ /usr/bin/{bc,bison,flex,gcc,git,openssl,make,perl,pahole}
+
+ * openSUSE and derivatives::
+
+ sudo zypper install bc binutils bison dwarves flex gcc git make perl-base \
+ openssl openssl-devel libelf-dev
+
+In case you wonder why these lists include openssl and its development headers:
+they are needed for the Secure Boot support, which many distributions enable in
+their kernel configuration for x86 machines.
+
+Sometimes you will need tools for compression formats like bzip2, gzip, lz4,
+lzma, lzo, xz, or zstd as well.
+
+You might need additional libraries and their development headers in case you
+perform tasks not covered in this guide. For example, zlib will be needed when
+building kernel tools from the tools/ directory; adjusting the build
+configuration with make targets like 'menuconfig' or 'xconfig' will require
+development headers for ncurses or Qt5.
+
+[:ref:`back to step-by-step guide <buildrequires_sbs>`]
+
+.. _diskspace:
+
+Space requirements
+------------------
+
+ *Ensure to have enough free space for building and installing Linux.*
+ [:ref:`... <diskspace_sbs>`]
+
+The numbers mentioned are rough estimates with a big extra charge to be on the
+safe side, so often you will need less.
+
+If you have space constraints, remember to read the reference section when you
+reach the :ref:`section about configuration adjustments' <configmods>`, as
+ensuring debug symbols are disabled will reduce the consumed disk space by quite
+a few gigabytes.
+
+[:ref:`back to step-by-step guide <diskspace_sbs>`]
+
+
+.. _sources:
+
+Download the sources
+--------------------
+
+ *Retrieve the sources of the Linux version you intend to build.*
+ [:ref:`...<sources_sbs>`]
+
+The step-by-step guide outlines how to retrieve Linux' sources using a shallow
+git clone. There is :ref:`more to tell about this method<sources_shallow>` and
+two alternate ways worth describing: :ref:`packaged archives<sources_archive>`
+and :ref:`a full git clone<sources_full>`. And the aspects ':ref:`wouldn't it
+be wiser to use a proper pre-release than the latest mainline code
+<sources_snapshot>`' and ':ref:`how to get an even fresher mainline codebase
+<sources_fresher>`' need elaboration, too.
+
+Note, to keep things simple the commands used in this guide store the build
+artifacts in the source tree. If you prefer to separate them, simply add
+something like ``O=~/linux-builddir/`` to all make calls; also adjust the path
+in all commands that add files or modify any generated (like your '.config').
+
+[:ref:`back to step-by-step guide <sources_sbs>`]
+
+.. _sources_shallow:
+
+Noteworthy characteristics of shallow clones
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The step-by-step guide uses a shallow clone, as it is the best solution for most
+of this document's target audience. There are a few aspects of this approach
+worth mentioning:
+
+ * This document in most places uses ``git fetch`` with ``--shallow-exclude=``
+ to specify the earliest version you care about (or to be precise: its git
+ tag). You alternatively can use the parameter ``--shallow-since=`` to specify
+ an absolute (say ``'2023-07-15'``) or relative (``'12 months'``) date to
+ define the depth of the history you want to download. As a second
+ alternative, you can also specify a certain depth explicitly with a parameter
+ like ``--depth=1``, unless you add branches for stable/longterm kernels.
+
+ * When running ``git fetch``, remember to always specify the oldest version,
+ the time you care about, or an explicit depth as shown in the step-by-step
+ guide. Otherwise you will risk downloading nearly the entire git history,
+ which will consume quite a bit of time and bandwidth while also stressing the
+ servers.
+
+ Note, you do not have to use the same version or date all the time. But when
+ you change it over time, git will deepen or flatten the history to the
+ specified point. That allows you to retrieve versions you initially thought
+ you did not need -- or it will discard the sources of older versions, for
+ example in case you want to free up some disk space. The latter will happen
+ automatically when using ``--shallow-since=`` or
+ ``--depth=``.
+
+ * Be warned, when deepening your clone you might encounter an error like
+ 'fatal: error in object: unshallow cafecaca0c0dacafecaca0c0dacafecaca0c0da'.
+ In that case run ``git repack -d`` and try again``
+
+ * In case you want to revert changes from a certain version (say Linux 6.3) or
+ perform a bisection (v6.2..v6.3), better tell ``git fetch`` to retrieve
+ objects up to three versions earlier (e.g. 6.0): ``git describe`` will then
+ be able to describe most commits just like it would in a full git clone.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_archive:
+
+Downloading the sources using a packages archive
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+People new to compiling Linux often assume downloading an archive via the
+front-page of https://kernel.org is the best approach to retrieve Linux'
+sources. It actually can be, if you are certain to build just one particular
+kernel version without changing any code. Thing is: you might be sure this will
+be the case, but in practice it often will turn out to be a wrong assumption.
+
+That's because when reporting or debugging an issue developers will often ask to
+give another version a try. They also might suggest temporarily undoing a commit
+with ``git revert`` or might provide various patches to try. Sometimes reporters
+will also be asked to use ``git bisect`` to find the change causing a problem.
+These things rely on git or are a lot easier and quicker to handle with it.
+
+A shallow clone also does not add any significant overhead. For example, when
+you use ``git clone --depth=1`` to create a shallow clone of the latest mainline
+codebase git will only retrieve a little more data than downloading the latest
+mainline pre-release (aka 'rc') via the front-page of kernel.org would.
+
+A shallow clone therefore is often the better choice. If you nevertheless want
+to use a packaged source archive, download one via kernel.org; afterwards
+extract its content to some directory and change to the subdirectory created
+during extraction. The rest of the step-by-step guide will work just fine, apart
+from things that rely on git -- but this mainly concerns the section on
+successive builds of other versions.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_full:
+
+Downloading the sources using a full git clone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If downloading and storing a lot of data (~4,4 Gigabyte as of early 2023) is
+nothing that bothers you, instead of a shallow clone perform a full git clone
+instead. You then will avoid the specialties mentioned above and will have all
+versions and individual commits at hand at any time::
+
+ curl -L \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/clone.bundle \
+ -o linux-stable.git.bundle
+ git clone linux-stable.git.bundle ~/linux/
+ rm linux-stable.git.bundle
+ cd ~/linux/
+ git remote set-url origin \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git fetch origin
+ git checkout --detach origin/master
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_snapshot:
+
+Proper pre-releases (RCs) vs. latest mainline
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When cloning the sources using git and checking out origin/master, you often
+will retrieve a codebase that is somewhere between the latest and the next
+release or pre-release. This almost always is the code you want when giving
+mainline a shot: pre-releases like v6.1-rc5 are in no way special, as they do
+not get any significant extra testing before being published.
+
+There is one exception: you might want to stick to the latest mainline release
+(say v6.1) before its successor's first pre-release (v6.2-rc1) is out. That is
+because compiler errors and other problems are more likely to occur during this
+time, as mainline then is in its 'merge window': a usually two week long phase,
+in which the bulk of the changes for the next release is merged.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _sources_fresher:
+
+Avoiding the mainline lag
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The explanations for both the shallow clone and the full clone both retrieve the
+code from the Linux stable git repository. That makes things simpler for this
+document's audience, as it allows easy access to both mainline and
+stable/longterm releases. This approach has just one downside:
+
+Changes merged into the mainline repository are only synced to the master branch
+of the Linux stable repository every few hours. This lag most of the time is
+not something to worry about; but in case you really need the latest code, just
+add the mainline repo as additional remote and checkout the code from there::
+
+ git remote add mainline \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ git fetch mainline
+ git checkout --detach mainline/master
+
+When doing this with a shallow clone, remember to call ``git fetch`` with one
+of the parameters described earlier to limit the depth.
+
+[:ref:`back to step-by-step guide <sources_sbs>`] [:ref:`back to section intro <sources>`]
+
+.. _patching:
+
+Patch the sources (optional)
+----------------------------
+
+ *In case you want to apply a kernel patch, do so now.*
+ [:ref:`...<patching_sbs>`]
+
+This is the point where you might want to patch your kernel -- for example when
+a developer proposed a fix and asked you to check if it helps. The step-by-step
+guide already explains everything crucial here.
+
+[:ref:`back to step-by-step guide <patching_sbs>`]
+
+.. _tagging:
+
+Tagging this kernel build (optional, often wise)
+------------------------------------------------
+
+ *If you patched your kernel or already have that kernel version installed,
+ better tag your kernel by extending its release name:*
+ [:ref:`...<tagging_sbs>`]
+
+Tagging your kernel will help avoid confusion later, especially when you patched
+your kernel. Adding an individual tag will also ensure the kernel's image and
+its modules are installed in parallel to any existing kernels.
+
+There are various ways to add such a tag. The step-by-step guide realizes one by
+creating a 'localversion' file in your build directory from which the kernel
+build scripts will automatically pick up the tag. You can later change that file
+to use a different tag in subsequent builds or simply remove that file to dump
+the tag.
+
+[:ref:`back to step-by-step guide <tagging_sbs>`]
+
+.. _configuration:
+
+Define the build configuration for your kernel
+----------------------------------------------
+
+ *Create the build configuration for your kernel based on an existing
+ configuration.* [:ref:`... <configuration_sbs>`]
+
+There are various aspects for this steps that require a more careful
+explanation:
+
+Pitfalls when using another configuration file as base
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Make targets like localmodconfig and olddefconfig share a few common snares you
+want to be aware of:
+
+ * These targets will reuse a kernel build configuration in your build directory
+ (e.g. '~/linux/.config'), if one exists. In case you want to start from
+ scratch you thus need to delete it.
+
+ * The make targets try to find the configuration for your running kernel
+ automatically, but might choose poorly. A line like '# using defaults found
+ in /boot/config-6.0.7-250.fc36.x86_64' or 'using config:
+ '/boot/config-6.0.7-250.fc36.x86_64' tells you which file they picked. If
+ that is not the intended one, simply store it as '~/linux/.config'
+ before using these make targets.
+
+ * Unexpected things might happen if you try to use a config file prepared for
+ one kernel (say v6.0) on an older generation (say v5.15). In that case you
+ might want to use a configuration as base which your distribution utilized
+ when they used that or an slightly older kernel version.
+
+Influencing the configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The make target olddefconfig and the ``yes "" |`` used when utilizing
+localmodconfig will set any undefined build options to their default value. This
+among others will disable many kernel features that were introduced after your
+base kernel was released.
+
+If you want to set these configurations options manually, use ``oldconfig``
+instead of ``olddefconfig`` or omit the ``yes "" |`` when utilizing
+localmodconfig. Then for each undefined configuration option you will be asked
+how to proceed. In case you are unsure what to answer, simply hit 'enter' to
+apply the default value.
+
+Big pitfall when using localmodconfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As explained briefly in the step-by-step guide already: with localmodconfig it
+can easily happen that your self-built kernel will lack modules for tasks you
+did not perform before utilizing this make target. That's because those tasks
+require kernel modules that are normally autoloaded when you perform that task
+for the first time; if you didn't perform that task at least once before using
+localmodonfig, the latter will thus assume these modules are superfluous and
+disable them.
+
+You can try to avoid this by performing typical tasks that often will autoload
+additional kernel modules: start a VM, establish VPN connections, loop-mount a
+CD/DVD ISO, mount network shares (CIFS, NFS, ...), and connect all external
+devices (2FA keys, headsets, webcams, ...) as well as storage devices with file
+systems you otherwise do not utilize (btrfs, ext4, FAT, NTFS, XFS, ...). But it
+is hard to think of everything that might be needed -- even kernel developers
+often forget one thing or another at this point.
+
+Do not let that risk bother you, especially when compiling a kernel only for
+testing purposes: everything typically crucial will be there. And if you forget
+something important you can turn on a missing feature later and quickly run the
+commands to compile and install a better kernel.
+
+But if you plan to build and use self-built kernels regularly, you might want to
+reduce the risk by recording which modules your system loads over the course of
+a few weeks. You can automate this with `modprobed-db
+<https://github.com/graysky2/modprobed-db>`_. Afterwards use ``LSMOD=<path>`` to
+point localmodconfig to the list of modules modprobed-db noticed being used::
+
+ yes "" | make LSMOD="${HOME}"/.config/modprobed.db localmodconfig
+
+Remote building with localmodconfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you want to use localmodconfig to build a kernel for another machine, run
+``lsmod > lsmod_foo-machine`` on it and transfer that file to your build host.
+Now point the build scripts to the file like this: ``yes "" | make
+LSMOD=~/lsmod_foo-machine localmodconfig``. Note, in this case
+you likely want to copy a base kernel configuration from the other machine over
+as well and place it as .config in your build directory.
+
+[:ref:`back to step-by-step guide <configuration_sbs>`]
+
+.. _configmods:
+
+Adjust build configuration
+--------------------------
+
+ *Check if you might want to or have to adjust some kernel configuration
+ options:*
+
+Depending on your needs you at this point might want or have to adjust some
+kernel configuration options.
+
+.. _configmods_debugsymbols:
+
+Debug symbols
+~~~~~~~~~~~~~
+
+ *Evaluate how you want to handle debug symbols.*
+ [:ref:`...<configmods_sbs>`]
+
+Most users do not need to care about this, it's often fine to leave everything
+as it is; but you should take a closer look at this, if you might need to decode
+a stack trace or want to reduce space consumption.
+
+Having debug symbols available can be important when your kernel throws a
+'panic', 'Oops', 'warning', or 'BUG' later when running, as then you will be
+able to find the exact place where the problem occurred in the code. But
+collecting and embedding the needed debug information takes time and consumes
+quite a bit of space: in late 2022 the build artifacts for a typical x86 kernel
+configured with localmodconfig consumed around 5 Gigabyte of space with debug
+symbols, but less than 1 when they were disabled. The resulting kernel image and
+the modules are bigger as well, which increases load times.
+
+Hence, if you want a small kernel and are unlikely to decode a stack trace
+later, you might want to disable debug symbols to avoid above downsides::
+
+ ./scripts/config --file .config -d DEBUG_INFO \
+ -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -d DEBUG_INFO_DWARF4 \
+ -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
+ make olddefconfig
+
+You on the other hand definitely want to enable them, if there is a decent
+chance that you need to decode a stack trace later (as explained by 'Decode
+failure messages' in Documentation/admin-guide/tainted-kernels.rst in more
+detail)::
+
+ ./scripts/config --file .config -d DEBUG_INFO_NONE -e DEBUG_KERNEL
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS -e KALLSYMS_ALL
+ make olddefconfig
+
+Note, many mainstream distributions enable debug symbols in their kernel
+configurations -- make targets like localmodconfig and olddefconfig thus will
+often pick that setting up.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _configmods_distros:
+
+Distro specific adjustments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Are you running* [:ref:`... <configmods_sbs>`]
+
+The following sections help you to avoid build problems that are known to occur
+when following this guide on a few commodity distributions.
+
+**Debian:**
+
+ * Remove a stale reference to a certificate file that would cause your build to
+ fail::
+
+ ./scripts/config --file .config --set-str SYSTEM_TRUSTED_KEYS ''
+
+ Alternatively, download the needed certificate and make that configuration
+ option point to it, as `the Debian handbook explains in more detail
+ <https://debian-handbook.info/browse/stable/sect.kernel-compilation.html>`_
+ -- or generate your own, as explained in
+ Documentation/admin-guide/module-signing.rst.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _configmods_individual:
+
+Individual adjustments
+~~~~~~~~~~~~~~~~~~~~~~
+
+ *If you want to influence the other aspects of the configuration, do so
+ now* [:ref:`... <configmods_sbs>`]
+
+You at this point can use a command like ``make menuconfig`` to enable or
+disable certain features using a text-based user interface; to use a graphical
+configuration utilize, use the make target ``xconfig`` or ``gconfig`` instead.
+All of them require development libraries from toolkits they are based on
+(ncurses, Qt5, Gtk2); an error message will tell you if something required is
+missing.
+
+[:ref:`back to step-by-step guide <configmods_sbs>`]
+
+.. _build:
+
+Build your kernel
+-----------------
+
+ *Build the image and the modules of your kernel* [:ref:`... <build_sbs>`]
+
+A lot can go wrong at this stage, but the instructions below will help you help
+yourself. Another subsection explains how to directly package your kernel up as
+deb, rpm or tar file.
+
+Dealing with build errors
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a build error occurs, it might be caused by some aspect of your machine's
+setup that often can be fixed quickly; other times though the problem lies in
+the code and can only be fixed by a developer. A close examination of the
+failure messages coupled with some research on the internet will often tell you
+which of the two it is. To perform such a investigation, restart the build
+process like this::
+
+ make V=1
+
+The ``V=1`` activates verbose output, which might be needed to see the actual
+error. To make it easier to spot, this command also omits the ``-j $(nproc
+--all)`` used earlier to utilize every CPU core in the system for the job -- but
+this parallelism also results in some clutter when failures occur.
+
+After a few seconds the build process should run into the error again. Now try
+to find the most crucial line describing the problem. Then search the internet
+for the most important and non-generic section of that line (say 4 to 8 words);
+avoid or remove anything that looks remotely system-specific, like your username
+or local path names like ``/home/username/linux/``. First try your regular
+internet search engine with that string, afterwards search Linux kernel mailing
+lists via `lore.kernel.org/all/ <https://lore.kernel.org/all/>`_.
+
+This most of the time will find something that will explain what is wrong; quite
+often one of the hits will provide a solution for your problem, too. If you
+do not find anything that matches your problem, try again from a different angle
+by modifying your search terms or using another line from the error messages.
+
+In the end, most trouble you are to run into has likely been encountered and
+reported by others already. That includes issues where the cause is not your
+system, but lies the code. If you run into one of those, you might thus find a
+solution (e.g. a patch) or workaround for your problem, too.
+
+Package your kernel up
+~~~~~~~~~~~~~~~~~~~~~~
+
+The step-by-step guide uses the default make targets (e.g. 'bzImage' and
+'modules' on x86) to build the image and the modules of your kernel, which later
+steps of the guide then install. You instead can also directly build everything
+and directly package it up by using one of the following targets:
+
+ * ``make -j $(nproc --all) bindeb-pkg`` to generate a deb package
+
+ * ``make -j $(nproc --all) binrpm-pkg`` to generate a rpm package
+
+ * ``make -j $(nproc --all) tarbz2-pkg`` to generate a bz2 compressed tarball
+
+This is just a selection of available make targets for this purpose, see
+``make help`` for others. You can also use these targets after running
+``make -j $(nproc --all)``, as they will pick up everything already built.
+
+If you employ the targets to generate deb or rpm packages, ignore the
+step-by-step guide's instructions on installing and removing your kernel;
+instead install and remove the packages using the package utility for the format
+(e.g. dpkg and rpm) or a package management utility build on top of them (apt,
+aptitude, dnf/yum, zypper, ...). Be aware that the packages generated using
+these two make targets are designed to work on various distributions utilizing
+those formats, they thus will sometimes behave differently than your
+distribution's kernel packages.
+
+[:ref:`back to step-by-step guide <build_sbs>`]
+
+.. _install:
+
+Install your kernel
+-------------------
+
+ *Now install your kernel* [:ref:`... <install_sbs>`]
+
+What you need to do after executing the command in the step-by-step guide
+depends on the existence and the implementation of an ``installkernel``
+executable. Many commodity Linux distributions ship such a kernel installer in
+``/sbin/`` that does everything needed, hence there is nothing left for you
+except rebooting. But some distributions contain an installkernel that does
+only part of the job -- and a few lack it completely and leave all the work to
+you.
+
+If ``installkernel`` is found, the kernel's build system will delegate the
+actual installation of your kernel's image and related files to this executable.
+On almost all Linux distributions it will store the image as '/boot/vmlinuz-
+<your kernel's release name>' and put a 'System.map-<your kernel's release
+name>' alongside it. Your kernel will thus be installed in parallel to any
+existing ones, unless you already have one with exactly the same release name.
+
+Installkernel on many distributions will afterwards generate an 'initramfs'
+(often also called 'initrd'), which commodity distributions rely on for booting;
+hence be sure to keep the order of the two make targets used in the step-by-step
+guide, as things will go sideways if you install your kernel's image before its
+modules. Often installkernel will then add your kernel to the bootloader
+configuration, too. You have to take care of one or both of these tasks
+yourself, if your distributions installkernel doesn't handle them.
+
+A few distributions like Arch Linux and its derivatives totally lack an
+installkernel executable. On those just install the modules using the kernel's
+build system and then install the image and the System.map file manually::
+
+ sudo make modules_install
+ sudo install -m 0600 $(make -s image_name) /boot/vmlinuz-$(make -s kernelrelease)
+ sudo install -m 0600 System.map /boot/System.map-$(make -s kernelrelease)
+
+If your distribution boots with the help of an initramfs, now generate one for
+your kernel using the tools your distribution provides for this process.
+Afterwards add your kernel to your bootloader configuration and reboot.
+
+[:ref:`back to step-by-step guide <install_sbs>`]
+
+.. _another:
+
+Another round later
+-------------------
+
+ *To later build another kernel you need similar, but sometimes slightly
+ different commands* [:ref:`... <another_sbs>`]
+
+The process to build later kernels is similar, but at some points slightly
+different. You for example do not want to use 'localmodconfig' for succeeding
+kernel builds, as you already created a trimmed down configuration you want to
+use from now on. Hence instead just use ``oldconfig`` or ``olddefconfig`` to
+adjust your build configurations to the needs of the kernel version you are
+about to build.
+
+If you created a shallow-clone with git, remember what the :ref:`section that
+explained the setup described in more detail <sources>`: you need to use a
+slightly different ``git fetch`` command and when switching to another series
+need to add an additional remote branch.
+
+[:ref:`back to step-by-step guide <another_sbs>`]
+
+.. _uninstall:
+
+Uninstall the kernel later
+--------------------------
+
+ *All parts of your installed kernel are identifiable by its release name and
+ thus easy to remove later.* [:ref:`... <uninstall_sbs>`]
+
+Do not worry installing your kernel manually and thus bypassing your
+distribution's packaging system will totally mess up your machine: all parts of
+your kernel are easy to remove later, as files are stored in two places only and
+normally identifiable by the kernel's release name.
+
+One of the two places is a directory in /lib/modules/, which holds the modules
+for each installed kernel. This directory is named after the kernel's release
+name; hence, to remove all modules for one of your kernels, simply remove its
+modules directory in /lib/modules/.
+
+The other place is /boot/, where typically one to five files will be placed
+during installation of a kernel. All of them usually contain the release name in
+their file name, but how many files and their name depends somewhat on your
+distribution's installkernel executable (:ref:`see above <install>`) and its
+initramfs generator. On some distributions the ``kernel-install`` command
+mentioned in the step-by-step guide will remove all of these files for you --
+and the entry for your kernel in the bootloader configuration at the same time,
+too. On others you have to take care of these steps yourself. The following
+command should interactively remove the two main files of a kernel with the
+release name '6.0.1-foobar'::
+
+ rm -i /boot/{System.map,vmlinuz}-6.0.1-foobar
+
+Now remove the belonging initramfs, which often will be called something like
+``/boot/initramfs-6.0.1-foobar.img`` or ``/boot/initrd.img-6.0.1-foobar``.
+Afterwards check for other files in /boot/ that have '6.0.1-foobar' in their
+name and delete them as well. Now remove the kernel from your bootloader's
+configuration.
+
+Note, be very careful with wildcards like '*' when deleting files or directories
+for kernels manually: you might accidentally remove files of a 6.0.11 kernel
+when all you want is to remove 6.0 or 6.0.1.
+
+[:ref:`back to step-by-step guide <uninstall_sbs>`]
+
+.. _faq:
+
+FAQ
+===
+
+Why does this 'how-to' not work on my system?
+---------------------------------------------
+
+As initially stated, this guide is 'designed to cover everything typically
+needed [to build a kernel] on mainstream Linux distributions running on
+commodity PC or server hardware'. The outlined approach despite this should work
+on many other setups as well. But trying to cover every possible use-case in one
+guide would defeat its purpose, as without such a focus you would need dozens or
+hundreds of constructs along the lines of 'in case you are having <insert
+machine or distro>, you at this point have to do <this and that>
+<instead|additionally>'. Each of which would make the text longer, more
+complicated, and harder to follow.
+
+That being said: this of course is a balancing act. Hence, if you think an
+additional use-case is worth describing, suggest it to the maintainers of this
+document, as :ref:`described above <submit_improvements>`.
+
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/quickly-build-trimmed-linux.rst
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
+
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 7b481b2..8e03751 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -199,7 +199,7 @@
mode).
.. [#f3] For more details about the Machine Check Architecture (MCA),
- please read Documentation/x86/x86_64/machinecheck.rst at the Kernel tree.
+ please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
EDAC - Error Detection And Correction
*************************************
diff --git a/Documentation/admin-guide/serial-console.rst b/Documentation/admin-guide/serial-console.rst
index 58b3283..8c8b94e 100644
--- a/Documentation/admin-guide/serial-console.rst
+++ b/Documentation/admin-guide/serial-console.rst
@@ -33,8 +33,11 @@
9600n8. The maximum baudrate is 115200.
You can specify multiple console= options on the kernel command line.
-Output will appear on all of them. The last device will be used when
-you open ``/dev/console``. So, for example::
+
+The behavior is well defined when each device type is mentioned only once.
+In this case, the output will appear on all requested consoles. And
+the last device will be used when you open ``/dev/console``.
+So, for example::
console=ttyS1,9600 console=tty0
@@ -42,7 +45,34 @@
virtual console, and kernel messages will appear on both the VGA
console and the 2nd serial port (ttyS1 or COM2) at 9600 baud.
-Note that you can only define one console per device type (serial, video).
+The behavior is more complicated when the same device type is defined more
+times. In this case, there are the following two rules:
+
+1. The output will appear only on the first device of each defined type.
+
+2. ``/dev/console`` will be associated with the first registered device.
+ Where the registration order depends on how kernel initializes various
+ subsystems.
+
+ This rule is used also when the last console= parameter is not used
+ for other reasons. For example, because of a typo or because
+ the hardware is not available.
+
+The result might be surprising. For example, the following two command
+lines have the same result:
+
+ console=ttyS1,9600 console=tty0 console=tty1
+ console=tty0 console=ttyS1,9600 console=tty1
+
+The kernel messages are printed only on ``tty0`` and ``ttyS1``. And
+``/dev/console`` gets associated with ``tty0``. It is because kernel
+tries to register graphical consoles before serial ones. It does it
+because of the default behavior when no console device is specified,
+see below.
+
+Note that the last ``console=tty1`` parameter still makes a difference.
+The kernel command line is used also by systemd. It would use the last
+defined ``tty1`` as the login console.
If no console device is specified, the first device found capable of
acting as a system console will be used. At this time, the system
diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
index 6031495..e3cfffe 100644
--- a/Documentation/admin-guide/syscall-user-dispatch.rst
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -73,6 +73,10 @@
can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
Any other value should terminate the program with a SIGSYS.
+Additionally, a tasks syscall user dispatch configuration can be peeked
+and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
+requests. This is useful for checkpoint/restart software.
+
Security Notes
--------------
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 4b7bfea..d85d90f 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -95,7 +95,7 @@
the value 340 = 0x154.
See the ``type_of_loader`` and ``ext_loader_type`` fields in
-Documentation/x86/boot.rst for additional information.
+Documentation/arch/x86/boot.rst for additional information.
bootloader_version (x86 only)
@@ -105,7 +105,7 @@
file will contain the value 564 = 0x234.
See the ``type_of_loader`` and ``ext_loader_ver`` fields in
-Documentation/x86/boot.rst for additional information.
+Documentation/arch/x86/boot.rst for additional information.
bpf_stats_enabled
diff --git a/Documentation/admin-guide/unicode.rst b/Documentation/admin-guide/unicode.rst
index 290fe83..cba7e50 100644
--- a/Documentation/admin-guide/unicode.rst
+++ b/Documentation/admin-guide/unicode.rst
@@ -3,11 +3,10 @@
Last update: 2005-01-17, version 1.4
-This file is maintained by H. Peter Anvin <unicode@lanana.org> as part
-of the Linux Assigned Names And Numbers Authority (LANANA) project.
-The current version can be found at:
-
- http://www.lanana.org/docs/unicode/admin-guide/unicode.rst
+Note: The original version of this document, which was maintained at
+lanana.org as part of the Linux Assigned Names And Numbers Authority
+(LANANA) project, is no longer existent. So, this version in the
+mainline Linux kernel is now the maintained main document.
Introduction
------------
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index e256141..3a9c041 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -236,13 +236,14 @@
Deprecated Mount Options
========================
-=========================== ================
+============================ ================
Name Removal Schedule
-=========================== ================
+============================ ================
Mounting with V4 filesystem September 2030
+Mounting ascii-ci filesystem September 2030
ikeep/noikeep September 2025
attr2/noattr2 September 2025
-=========================== ================
+============================ ================
Removed Mount Options
diff --git a/Documentation/arch.rst b/Documentation/arch.rst
deleted file mode 100644
index 41a66a8..0000000
--- a/Documentation/arch.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-CPU Architectures
-=================
-
-These books provide programming details about architecture-specific
-implementation.
-
-.. toctree::
- :maxdepth: 2
-
- arc/index
- arm/index
- arm64/index
- ia64/index
- loongarch/index
- m68k/index
- mips/index
- nios2/index
- openrisc/index
- parisc/index
- powerpc/index
- riscv/index
- s390/index
- sh/index
- sparc/index
- x86/index
- xtensa/index
diff --git a/Documentation/arc/arc.rst b/Documentation/arch/arc/arc.rst
similarity index 100%
rename from Documentation/arc/arc.rst
rename to Documentation/arch/arc/arc.rst
diff --git a/Documentation/arc/features.rst b/Documentation/arch/arc/features.rst
similarity index 100%
rename from Documentation/arc/features.rst
rename to Documentation/arch/arc/features.rst
diff --git a/Documentation/arc/index.rst b/Documentation/arch/arc/index.rst
similarity index 100%
rename from Documentation/arc/index.rst
rename to Documentation/arch/arc/index.rst
diff --git a/Documentation/ia64/aliasing.rst b/Documentation/arch/ia64/aliasing.rst
similarity index 100%
rename from Documentation/ia64/aliasing.rst
rename to Documentation/arch/ia64/aliasing.rst
diff --git a/Documentation/ia64/efirtc.rst b/Documentation/arch/ia64/efirtc.rst
similarity index 100%
rename from Documentation/ia64/efirtc.rst
rename to Documentation/arch/ia64/efirtc.rst
diff --git a/Documentation/ia64/err_inject.rst b/Documentation/arch/ia64/err_inject.rst
similarity index 100%
rename from Documentation/ia64/err_inject.rst
rename to Documentation/arch/ia64/err_inject.rst
diff --git a/Documentation/ia64/features.rst b/Documentation/arch/ia64/features.rst
similarity index 100%
rename from Documentation/ia64/features.rst
rename to Documentation/arch/ia64/features.rst
diff --git a/Documentation/ia64/fsys.rst b/Documentation/arch/ia64/fsys.rst
similarity index 100%
rename from Documentation/ia64/fsys.rst
rename to Documentation/arch/ia64/fsys.rst
diff --git a/Documentation/ia64/ia64.rst b/Documentation/arch/ia64/ia64.rst
similarity index 100%
rename from Documentation/ia64/ia64.rst
rename to Documentation/arch/ia64/ia64.rst
diff --git a/Documentation/ia64/index.rst b/Documentation/arch/ia64/index.rst
similarity index 100%
rename from Documentation/ia64/index.rst
rename to Documentation/arch/ia64/index.rst
diff --git a/Documentation/ia64/irq-redir.rst b/Documentation/arch/ia64/irq-redir.rst
similarity index 100%
rename from Documentation/ia64/irq-redir.rst
rename to Documentation/arch/ia64/irq-redir.rst
diff --git a/Documentation/ia64/mca.rst b/Documentation/arch/ia64/mca.rst
similarity index 100%
rename from Documentation/ia64/mca.rst
rename to Documentation/arch/ia64/mca.rst
diff --git a/Documentation/ia64/serial.rst b/Documentation/arch/ia64/serial.rst
similarity index 100%
rename from Documentation/ia64/serial.rst
rename to Documentation/arch/ia64/serial.rst
diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst
new file mode 100644
index 0000000..80ee310
--- /dev/null
+++ b/Documentation/arch/index.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+CPU Architectures
+=================
+
+These books provide programming details about architecture-specific
+implementation.
+
+.. toctree::
+ :maxdepth: 2
+
+ arc/index
+ ../arm/index
+ ../arm64/index
+ ia64/index
+ ../loongarch/index
+ m68k/index
+ ../mips/index
+ nios2/index
+ openrisc/index
+ parisc/index
+ ../powerpc/index
+ ../riscv/index
+ ../s390/index
+ sh/index
+ sparc/index
+ x86/index
+ xtensa/index
diff --git a/Documentation/m68k/buddha-driver.rst b/Documentation/arch/m68k/buddha-driver.rst
similarity index 100%
rename from Documentation/m68k/buddha-driver.rst
rename to Documentation/arch/m68k/buddha-driver.rst
diff --git a/Documentation/m68k/features.rst b/Documentation/arch/m68k/features.rst
similarity index 100%
rename from Documentation/m68k/features.rst
rename to Documentation/arch/m68k/features.rst
diff --git a/Documentation/m68k/index.rst b/Documentation/arch/m68k/index.rst
similarity index 100%
rename from Documentation/m68k/index.rst
rename to Documentation/arch/m68k/index.rst
diff --git a/Documentation/m68k/kernel-options.rst b/Documentation/arch/m68k/kernel-options.rst
similarity index 100%
rename from Documentation/m68k/kernel-options.rst
rename to Documentation/arch/m68k/kernel-options.rst
diff --git a/Documentation/nios2/features.rst b/Documentation/arch/nios2/features.rst
similarity index 100%
rename from Documentation/nios2/features.rst
rename to Documentation/arch/nios2/features.rst
diff --git a/Documentation/nios2/index.rst b/Documentation/arch/nios2/index.rst
similarity index 100%
rename from Documentation/nios2/index.rst
rename to Documentation/arch/nios2/index.rst
diff --git a/Documentation/nios2/nios2.rst b/Documentation/arch/nios2/nios2.rst
similarity index 100%
rename from Documentation/nios2/nios2.rst
rename to Documentation/arch/nios2/nios2.rst
diff --git a/Documentation/openrisc/features.rst b/Documentation/arch/openrisc/features.rst
similarity index 100%
rename from Documentation/openrisc/features.rst
rename to Documentation/arch/openrisc/features.rst
diff --git a/Documentation/openrisc/index.rst b/Documentation/arch/openrisc/index.rst
similarity index 100%
rename from Documentation/openrisc/index.rst
rename to Documentation/arch/openrisc/index.rst
diff --git a/Documentation/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst
similarity index 100%
rename from Documentation/openrisc/openrisc_port.rst
rename to Documentation/arch/openrisc/openrisc_port.rst
diff --git a/Documentation/openrisc/todo.rst b/Documentation/arch/openrisc/todo.rst
similarity index 100%
rename from Documentation/openrisc/todo.rst
rename to Documentation/arch/openrisc/todo.rst
diff --git a/Documentation/parisc/debugging.rst b/Documentation/arch/parisc/debugging.rst
similarity index 100%
rename from Documentation/parisc/debugging.rst
rename to Documentation/arch/parisc/debugging.rst
diff --git a/Documentation/parisc/features.rst b/Documentation/arch/parisc/features.rst
similarity index 100%
rename from Documentation/parisc/features.rst
rename to Documentation/arch/parisc/features.rst
diff --git a/Documentation/parisc/index.rst b/Documentation/arch/parisc/index.rst
similarity index 100%
rename from Documentation/parisc/index.rst
rename to Documentation/arch/parisc/index.rst
diff --git a/Documentation/parisc/registers.rst b/Documentation/arch/parisc/registers.rst
similarity index 100%
rename from Documentation/parisc/registers.rst
rename to Documentation/arch/parisc/registers.rst
diff --git a/Documentation/sh/booting.rst b/Documentation/arch/sh/booting.rst
similarity index 100%
rename from Documentation/sh/booting.rst
rename to Documentation/arch/sh/booting.rst
diff --git a/Documentation/sh/features.rst b/Documentation/arch/sh/features.rst
similarity index 100%
rename from Documentation/sh/features.rst
rename to Documentation/arch/sh/features.rst
diff --git a/Documentation/sh/index.rst b/Documentation/arch/sh/index.rst
similarity index 100%
rename from Documentation/sh/index.rst
rename to Documentation/arch/sh/index.rst
diff --git a/Documentation/sh/new-machine.rst b/Documentation/arch/sh/new-machine.rst
similarity index 100%
rename from Documentation/sh/new-machine.rst
rename to Documentation/arch/sh/new-machine.rst
diff --git a/Documentation/sh/register-banks.rst b/Documentation/arch/sh/register-banks.rst
similarity index 100%
rename from Documentation/sh/register-banks.rst
rename to Documentation/arch/sh/register-banks.rst
diff --git a/Documentation/sparc/adi.rst b/Documentation/arch/sparc/adi.rst
similarity index 100%
rename from Documentation/sparc/adi.rst
rename to Documentation/arch/sparc/adi.rst
diff --git a/Documentation/sparc/console.rst b/Documentation/arch/sparc/console.rst
similarity index 100%
rename from Documentation/sparc/console.rst
rename to Documentation/arch/sparc/console.rst
diff --git a/Documentation/sparc/features.rst b/Documentation/arch/sparc/features.rst
similarity index 100%
rename from Documentation/sparc/features.rst
rename to Documentation/arch/sparc/features.rst
diff --git a/Documentation/sparc/index.rst b/Documentation/arch/sparc/index.rst
similarity index 100%
rename from Documentation/sparc/index.rst
rename to Documentation/arch/sparc/index.rst
diff --git a/Documentation/sparc/oradax/dax-hv-api.txt b/Documentation/arch/sparc/oradax/dax-hv-api.txt
similarity index 100%
rename from Documentation/sparc/oradax/dax-hv-api.txt
rename to Documentation/arch/sparc/oradax/dax-hv-api.txt
diff --git a/Documentation/sparc/oradax/oracle-dax.rst b/Documentation/arch/sparc/oradax/oracle-dax.rst
similarity index 100%
rename from Documentation/sparc/oradax/oracle-dax.rst
rename to Documentation/arch/sparc/oradax/oracle-dax.rst
diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst
similarity index 100%
rename from Documentation/x86/amd-memory-encryption.rst
rename to Documentation/arch/x86/amd-memory-encryption.rst
diff --git a/Documentation/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst
similarity index 100%
rename from Documentation/x86/amd_hsmp.rst
rename to Documentation/arch/x86/amd_hsmp.rst
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
new file mode 100644
index 0000000..33520ec
--- /dev/null
+++ b/Documentation/arch/x86/boot.rst
@@ -0,0 +1,1443 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Linux/x86 Boot Protocol
+===========================
+
+On the x86 platform, the Linux kernel uses a rather complicated boot
+convention. This has evolved partially due to historical aspects, as
+well as the desire in the early days to have the kernel itself be a
+bootable image, the complicated PC memory model and due to changed
+expectations in the PC industry caused by the effective demise of
+real-mode DOS as a mainstream operating system.
+
+Currently, the following versions of the Linux/x86 boot protocol exist.
+
+============= ============================================================
+Old kernels zImage/Image support only. Some very early kernels
+ may not even support a command line.
+
+Protocol 2.00 (Kernel 1.3.73) Added bzImage and initrd support, as
+ well as a formalized way to communicate between the
+ boot loader and the kernel. setup.S made relocatable,
+ although the traditional setup area still assumed
+ writable.
+
+Protocol 2.01 (Kernel 1.3.76) Added a heap overrun warning.
+
+Protocol 2.02 (Kernel 2.4.0-test3-pre3) New command line protocol.
+ Lower the conventional memory ceiling. No overwrite
+ of the traditional setup area, thus making booting
+ safe for systems which use the EBDA from SMM or 32-bit
+ BIOS entry points. zImage deprecated but still
+ supported.
+
+Protocol 2.03 (Kernel 2.4.18-pre1) Explicitly makes the highest possible
+ initrd address available to the bootloader.
+
+Protocol 2.04 (Kernel 2.6.14) Extend the syssize field to four bytes.
+
+Protocol 2.05 (Kernel 2.6.20) Make protected mode kernel relocatable.
+ Introduce relocatable_kernel and kernel_alignment fields.
+
+Protocol 2.06 (Kernel 2.6.22) Added a field that contains the size of
+ the boot command line.
+
+Protocol 2.07 (Kernel 2.6.24) Added paravirtualised boot protocol.
+ Introduced hardware_subarch and hardware_subarch_data
+ and KEEP_SEGMENTS flag in load_flags.
+
+Protocol 2.08 (Kernel 2.6.26) Added crc32 checksum and ELF format
+ payload. Introduced payload_offset and payload_length
+ fields to aid in locating the payload.
+
+Protocol 2.09 (Kernel 2.6.26) Added a field of 64-bit physical
+ pointer to single linked list of struct setup_data.
+
+Protocol 2.10 (Kernel 2.6.31) Added a protocol for relaxed alignment
+ beyond the kernel_alignment added, new init_size and
+ pref_address fields. Added extended boot loader IDs.
+
+Protocol 2.11 (Kernel 3.6) Added a field for offset of EFI handover
+ protocol entry point.
+
+Protocol 2.12 (Kernel 3.8) Added the xloadflags field and extension fields
+ to struct boot_params for loading bzImage and ramdisk
+ above 4G in 64bit.
+
+Protocol 2.13 (Kernel 3.14) Support 32- and 64-bit flags being set in
+ xloadflags to support booting a 64-bit kernel from 32-bit
+ EFI
+
+Protocol 2.14 BURNT BY INCORRECT COMMIT
+ ae7e1238e68f2a472a125673ab506d49158c1889
+ (x86/boot: Add ACPI RSDP address to setup_header)
+ DO NOT USE!!! ASSUME SAME AS 2.13.
+
+Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
+============= ============================================================
+
+.. note::
+ The protocol version number should be changed only if the setup header
+ is changed. There is no need to update the version number if boot_params
+ or kernel_info are changed. Additionally, it is recommended to use
+ xloadflags (in this case the protocol version number should not be
+ updated either) or kernel_info to communicate supported Linux kernel
+ features to the boot loader. Due to very limited space available in
+ the original setup header every update to it should be considered
+ with great care. Starting from the protocol 2.15 the primary way to
+ communicate things to the boot loader is the kernel_info.
+
+
+Memory Layout
+=============
+
+The traditional memory map for the kernel loader, used for Image or
+zImage kernels, typically looks like::
+
+ | |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+ 09A000 +------------------------+
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
+ 098000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ 090200 +------------------------+
+ | Kernel boot sector | The kernel legacy boot sector.
+ 090000 +------------------------+
+ | Protected-mode kernel | The bulk of the kernel image.
+ 010000 +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
+
+When using bzImage, the protected-mode kernel was relocated to
+0x100000 ("high memory"), and the kernel real-mode block (boot sector,
+setup, and stack/heap) was made relocatable to any address between
+0x10000 and end of low memory. Unfortunately, in protocols 2.00 and
+2.01 the 0x90000+ memory range is still used internally by the kernel;
+the 2.02 protocol resolves that problem.
+
+It is desirable to keep the "memory ceiling" -- the highest point in
+low memory touched by the boot loader -- as low as possible, since
+some newer BIOSes have begun to allocate some rather large amounts of
+memory, called the Extended BIOS Data Area, near the top of low
+memory. The boot loader should use the "INT 12h" BIOS call to verify
+how much low memory is available.
+
+Unfortunately, if INT 12h reports that the amount of memory is too
+low, there is usually nothing the boot loader can do but to report an
+error to the user. The boot loader should therefore be designed to
+take up as little space in low memory as it reasonably can. For
+zImage or old bzImage kernels, which need data written into the
+0x90000 segment, the boot loader should make sure not to use memory
+above the 0x9A000 point; too many BIOSes will break above that point.
+
+For a modern bzImage kernel with boot protocol version >= 2.02, a
+memory layout like the following is suggested::
+
+ ~ ~
+ | Protected-mode kernel |
+ 100000 +------------------------+
+ | I/O memory hole |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
+ X+10000 +------------------------+
+ | Stack/heap | For use by the kernel real-mode code.
+ X+08000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
+ X +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
+
+ ... where the address X is as low as the design of the boot loader permits.
+
+
+The Real-Mode Kernel Header
+===========================
+
+In the following text, and anywhere in the kernel boot sequence, "a
+sector" refers to 512 bytes. It is independent of the actual sector
+size of the underlying medium.
+
+The first step in loading a Linux kernel should be to load the
+real-mode code (boot sector and setup code) and then examine the
+following header at offset 0x01f1. The real-mode code can total up to
+32K, although the boot loader may choose to load only the first two
+sectors (1K) and then examine the bootup sector size.
+
+The header looks like:
+
+=========== ======== ===================== ============================================
+Offset/Size Proto Name Meaning
+=========== ======== ===================== ============================================
+01F1/1 ALL(1) setup_sects The size of the setup in sectors
+01F2/2 ALL root_flags If set, the root is mounted readonly
+01F4/4 2.04+(2) syssize The size of the 32-bit code in 16-byte paras
+01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only
+01FA/2 ALL vid_mode Video mode control
+01FC/2 ALL root_dev Default root device number
+01FE/2 ALL boot_flag 0xAA55 magic number
+0200/2 2.00+ jump Jump instruction
+0202/4 2.00+ header Magic signature "HdrS"
+0206/2 2.00+ version Boot protocol version supported
+0208/4 2.00+ realmode_swtch Boot loader hook (see below)
+020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
+020E/2 2.00+ kernel_version Pointer to kernel version string
+0210/1 2.00+ type_of_loader Boot loader identifier
+0211/1 2.00+ loadflags Boot protocol option flags
+0212/2 2.00+ setup_move_size Move to high memory size (used with hooks)
+0214/4 2.00+ code32_start Boot loader hook (see below)
+0218/4 2.00+ ramdisk_image initrd load address (set by boot loader)
+021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
+0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
+0224/2 2.01+ heap_end_ptr Free memory after setup end
+0226/1 2.02+(3) ext_loader_ver Extended boot loader version
+0227/1 2.02+(3) ext_loader_type Extended boot loader ID
+0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
+022C/4 2.03+ initrd_addr_max Highest legal initrd address
+0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
+0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
+0235/1 2.10+ min_alignment Minimum alignment, as a power of two
+0236/2 2.12+ xloadflags Boot protocol option flags
+0238/4 2.06+ cmdline_size Maximum size of the kernel command line
+023C/4 2.07+ hardware_subarch Hardware subarchitecture
+0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
+0248/4 2.08+ payload_offset Offset of kernel payload
+024C/4 2.08+ payload_length Length of kernel payload
+0250/8 2.09+ setup_data 64-bit physical pointer to linked list
+ of struct setup_data
+0258/8 2.10+ pref_address Preferred loading address
+0260/4 2.10+ init_size Linear memory required during initialization
+0264/4 2.11+ handover_offset Offset of handover entry point
+0268/4 2.15+ kernel_info_offset Offset of the kernel_info
+=========== ======== ===================== ============================================
+
+.. note::
+ (1) For backwards compatibility, if the setup_sects field contains 0, the
+ real value is 4.
+
+ (2) For boot protocol prior to 2.04, the upper two bytes of the syssize
+ field are unusable, which means the size of a bzImage kernel
+ cannot be determined.
+
+ (3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
+If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
+the boot protocol version is "old". Loading an old kernel, the
+following parameters should be assumed::
+
+ Image type = zImage
+ initrd not supported
+ Real-mode kernel must be located at 0x90000.
+
+Otherwise, the "version" field contains the protocol version,
+e.g. protocol version 2.01 will contain 0x0201 in this field. When
+setting fields in the header, you must make sure only to set fields
+supported by the protocol version in use.
+
+
+Details of Header Fields
+========================
+
+For each field, some are information from the kernel to the bootloader
+("read"), some are expected to be filled out by the bootloader
+("write"), and some are expected to be read and modified by the
+bootloader ("modify").
+
+All general purpose boot loaders should write the fields marked
+(obligatory). Boot loaders who want to load the kernel at a
+nonstandard address should fill in the fields marked (reloc); other
+boot loaders can ignore those fields.
+
+The byte order of all fields is littleendian (this is x86, after all.)
+
+============ ===========
+Field name: setup_sects
+Type: read
+Offset/size: 0x1f1/1
+Protocol: ALL
+============ ===========
+
+ The size of the setup code in 512-byte sectors. If this field is
+ 0, the real value is 4. The real-mode code consists of the boot
+ sector (always one 512-byte sector) plus the setup code.
+
+============ =================
+Field name: root_flags
+Type: modify (optional)
+Offset/size: 0x1f2/2
+Protocol: ALL
+============ =================
+
+ If this field is nonzero, the root defaults to readonly. The use of
+ this field is deprecated; use the "ro" or "rw" options on the
+ command line instead.
+
+============ ===============================================
+Field name: syssize
+Type: read
+Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL)
+Protocol: 2.04+
+============ ===============================================
+
+ The size of the protected-mode code in units of 16-byte paragraphs.
+ For protocol versions older than 2.04 this field is only two bytes
+ wide, and therefore cannot be trusted for the size of a kernel if
+ the LOAD_HIGH flag is set.
+
+============ ===============
+Field name: ram_size
+Type: kernel internal
+Offset/size: 0x1f8/2
+Protocol: ALL
+============ ===============
+
+ This field is obsolete.
+
+============ ===================
+Field name: vid_mode
+Type: modify (obligatory)
+Offset/size: 0x1fa/2
+============ ===================
+
+ Please see the section on SPECIAL COMMAND LINE OPTIONS.
+
+============ =================
+Field name: root_dev
+Type: modify (optional)
+Offset/size: 0x1fc/2
+Protocol: ALL
+============ =================
+
+ The default root device device number. The use of this field is
+ deprecated, use the "root=" option on the command line instead.
+
+============ =========
+Field name: boot_flag
+Type: read
+Offset/size: 0x1fe/2
+Protocol: ALL
+============ =========
+
+ Contains 0xAA55. This is the closest thing old Linux kernels have
+ to a magic number.
+
+============ =======
+Field name: jump
+Type: read
+Offset/size: 0x200/2
+Protocol: 2.00+
+============ =======
+
+ Contains an x86 jump instruction, 0xEB followed by a signed offset
+ relative to byte 0x202. This can be used to determine the size of
+ the header.
+
+============ =======
+Field name: header
+Type: read
+Offset/size: 0x202/4
+Protocol: 2.00+
+============ =======
+
+ Contains the magic number "HdrS" (0x53726448).
+
+============ =======
+Field name: version
+Type: read
+Offset/size: 0x206/2
+Protocol: 2.00+
+============ =======
+
+ Contains the boot protocol version, in (major << 8)+minor format,
+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
+ 10.17.
+
+============ =================
+Field name: realmode_swtch
+Type: modify (optional)
+Offset/size: 0x208/4
+Protocol: 2.00+
+============ =================
+
+ Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+
+============ =============
+Field name: start_sys_seg
+Type: read
+Offset/size: 0x20c/2
+Protocol: 2.00+
+============ =============
+
+ The load low segment (0x1000). Obsolete.
+
+============ ==============
+Field name: kernel_version
+Type: read
+Offset/size: 0x20e/2
+Protocol: 2.00+
+============ ==============
+
+ If set to a nonzero value, contains a pointer to a NUL-terminated
+ human-readable kernel version number string, less 0x200. This can
+ be used to display the kernel version to the user. This value
+ should be less than (0x200*setup_sects).
+
+ For example, if this value is set to 0x1c00, the kernel version
+ number string can be found at offset 0x1e00 in the kernel file.
+ This is a valid value if and only if the "setup_sects" field
+ contains the value 15 or higher, as::
+
+ 0x1c00 < 15*0x200 (= 0x1e00) but
+ 0x1c00 >= 14*0x200 (= 0x1c00)
+
+ 0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
+
+============ ==================
+Field name: type_of_loader
+Type: write (obligatory)
+Offset/size: 0x210/1
+Protocol: 2.00+
+============ ==================
+
+ If your boot loader has an assigned id (see table below), enter
+ 0xTV here, where T is an identifier for the boot loader and V is
+ a version number. Otherwise, enter 0xFF here.
+
+ For boot loader IDs above T = 0xD, write T = 0xE to this field and
+ write the extended ID minus 0x10 to the ext_loader_type field.
+ Similarly, the ext_loader_ver field can be used to provide more than
+ four bits for the bootloader version.
+
+ For example, for T = 0x15, V = 0x234, write::
+
+ type_of_loader <- 0xE4
+ ext_loader_type <- 0x05
+ ext_loader_ver <- 0x23
+
+ Assigned boot loader ids (hexadecimal):
+
+ == =======================================
+ 0 LILO
+ (0x00 reserved for pre-2.00 bootloader)
+ 1 Loadlin
+ 2 bootsect-loader
+ (0x20, all other values reserved)
+ 3 Syslinux
+ 4 Etherboot/gPXE/iPXE
+ 5 ELILO
+ 7 GRUB
+ 8 U-Boot
+ 9 Xen
+ A Gujin
+ B Qemu
+ C Arcturus Networks uCbootloader
+ D kexec-tools
+ E Extended (see ext_loader_type)
+ F Special (0xFF = undefined)
+ 10 Reserved
+ 11 Minimal Linux Bootloader
+ <http://sebastian-plotz.blogspot.de>
+ 12 OVMF UEFI virtualization stack
+ 13 barebox
+ == =======================================
+
+ Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
+
+============ ===================
+Field name: loadflags
+Type: modify (obligatory)
+Offset/size: 0x211/1
+Protocol: 2.00+
+============ ===================
+
+ This field is a bitmask.
+
+ Bit 0 (read): LOADED_HIGH
+
+ - If 0, the protected-mode code is loaded at 0x10000.
+ - If 1, the protected-mode code is loaded at 0x100000.
+
+ Bit 1 (kernel internal): KASLR_FLAG
+
+ - Used internally by the compressed kernel to communicate
+ KASLR status to kernel proper.
+
+ - If 1, KASLR enabled.
+ - If 0, KASLR disabled.
+
+ Bit 5 (write): QUIET_FLAG
+
+ - If 0, print early messages.
+ - If 1, suppress early messages.
+
+ This requests to the kernel (decompressor and early
+ kernel) to not write early messages that require
+ accessing the display hardware directly.
+
+ Bit 6 (obsolete): KEEP_SEGMENTS
+
+ Protocol: 2.07+
+
+ - This flag is obsolete.
+
+ Bit 7 (write): CAN_USE_HEAP
+
+ Set this bit to 1 to indicate that the value entered in the
+ heap_end_ptr is valid. If this field is clear, some setup code
+ functionality will be disabled.
+
+
+============ ===================
+Field name: setup_move_size
+Type: modify (obligatory)
+Offset/size: 0x212/2
+Protocol: 2.00-2.01
+============ ===================
+
+ When using protocol 2.00 or 2.01, if the real mode kernel is not
+ loaded at 0x90000, it gets moved there later in the loading
+ sequence. Fill in this field if you want additional data (such as
+ the kernel command line) moved in addition to the real-mode kernel
+ itself.
+
+ The unit is bytes starting with the beginning of the boot sector.
+
+ This field is can be ignored when the protocol is 2.02 or higher, or
+ if the real-mode code is loaded at 0x90000.
+
+============ ========================
+Field name: code32_start
+Type: modify (optional, reloc)
+Offset/size: 0x214/4
+Protocol: 2.00+
+============ ========================
+
+ The address to jump to in protected mode. This defaults to the load
+ address of the kernel, and can be used by the boot loader to
+ determine the proper load address.
+
+ This field can be modified for two purposes:
+
+ 1. as a boot loader hook (see Advanced Boot Loader Hooks below.)
+
+ 2. if a bootloader which does not install a hook loads a
+ relocatable kernel at a nonstandard address it will have to modify
+ this field to point to the load address.
+
+============ ==================
+Field name: ramdisk_image
+Type: write (obligatory)
+Offset/size: 0x218/4
+Protocol: 2.00+
+============ ==================
+
+ The 32-bit linear address of the initial ramdisk or ramfs. Leave at
+ zero if there is no initial ramdisk/ramfs.
+
+============ ==================
+Field name: ramdisk_size
+Type: write (obligatory)
+Offset/size: 0x21c/4
+Protocol: 2.00+
+============ ==================
+
+ Size of the initial ramdisk or ramfs. Leave at zero if there is no
+ initial ramdisk/ramfs.
+
+============ ===============
+Field name: bootsect_kludge
+Type: kernel internal
+Offset/size: 0x220/4
+Protocol: 2.00+
+============ ===============
+
+ This field is obsolete.
+
+============ ==================
+Field name: heap_end_ptr
+Type: write (obligatory)
+Offset/size: 0x224/2
+Protocol: 2.01+
+============ ==================
+
+ Set this field to the offset (from the beginning of the real-mode
+ code) of the end of the setup stack/heap, minus 0x0200.
+
+============ ================
+Field name: ext_loader_ver
+Type: write (optional)
+Offset/size: 0x226/1
+Protocol: 2.02+
+============ ================
+
+ This field is used as an extension of the version number in the
+ type_of_loader field. The total version number is considered to be
+ (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+ The use of this field is boot loader specific. If not written, it
+ is zero.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+============ =====================================================
+Field name: ext_loader_type
+Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size: 0x227/1
+Protocol: 2.02+
+============ =====================================================
+
+ This field is used as an extension of the type number in
+ type_of_loader field. If the type in type_of_loader is 0xE, then
+ the actual type is (ext_loader_type + 0x10).
+
+ This field is ignored if the type in type_of_loader is not 0xE.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+============ ==================
+Field name: cmd_line_ptr
+Type: write (obligatory)
+Offset/size: 0x228/4
+Protocol: 2.02+
+============ ==================
+
+ Set this field to the linear address of the kernel command line.
+ The kernel command line can be located anywhere between the end of
+ the setup heap and 0xA0000; it does not have to be located in the
+ same 64K segment as the real-mode code itself.
+
+ Fill in this field even if your boot loader does not support a
+ command line, in which case you can point this to an empty string
+ (or better yet, to the string "auto".) If this field is left at
+ zero, the kernel will assume that your boot loader does not support
+ the 2.02+ protocol.
+
+============ ===============
+Field name: initrd_addr_max
+Type: read
+Offset/size: 0x22c/4
+Protocol: 2.03+
+============ ===============
+
+ The maximum address that may be occupied by the initial
+ ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this
+ field is not present, and the maximum address is 0x37FFFFFF. (This
+ address is defined as the address of the highest safe byte, so if
+ your ramdisk is exactly 131072 bytes long and this field is
+ 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
+
+============ ============================
+Field name: kernel_alignment
+Type: read/modify (reloc)
+Offset/size: 0x230/4
+Protocol: 2.05+ (read), 2.10+ (modify)
+============ ============================
+
+ Alignment unit required by the kernel (if relocatable_kernel is
+ true.) A relocatable kernel that is loaded at an alignment
+ incompatible with the value in this field will be realigned during
+ kernel initialization.
+
+ Starting with protocol version 2.10, this reflects the kernel
+ alignment preferred for optimal performance; it is possible for the
+ loader to modify this field to permit a lesser alignment. See the
+ min_alignment and pref_address field below.
+
+============ ==================
+Field name: relocatable_kernel
+Type: read (reloc)
+Offset/size: 0x234/1
+Protocol: 2.05+
+============ ==================
+
+ If this field is nonzero, the protected-mode part of the kernel can
+ be loaded at any address that satisfies the kernel_alignment field.
+ After loading, the boot loader must set the code32_start field to
+ point to the loaded code, or to a boot loader hook.
+
+============ =============
+Field name: min_alignment
+Type: read (reloc)
+Offset/size: 0x235/1
+Protocol: 2.10+
+============ =============
+
+ This field, if nonzero, indicates as a power of two the minimum
+ alignment required, as opposed to preferred, by the kernel to boot.
+ If a boot loader makes use of this field, it should update the
+ kernel_alignment field with the alignment unit desired; typically::
+
+ kernel_alignment = 1 << min_alignment
+
+ There may be a considerable performance cost with an excessively
+ misaligned kernel. Therefore, a loader should typically try each
+ power-of-two alignment from kernel_alignment down to this alignment.
+
+============ ==========
+Field name: xloadflags
+Type: read
+Offset/size: 0x236/2
+Protocol: 2.12+
+============ ==========
+
+ This field is a bitmask.
+
+ Bit 0 (read): XLF_KERNEL_64
+
+ - If 1, this kernel has the legacy 64-bit entry point at 0x200.
+
+ Bit 1 (read): XLF_CAN_BE_LOADED_ABOVE_4G
+
+ - If 1, kernel/boot_params/cmdline/ramdisk can be above 4G.
+
+ Bit 2 (read): XLF_EFI_HANDOVER_32
+
+ - If 1, the kernel supports the 32-bit EFI handoff entry point
+ given at handover_offset.
+
+ Bit 3 (read): XLF_EFI_HANDOVER_64
+
+ - If 1, the kernel supports the 64-bit EFI handoff entry point
+ given at handover_offset + 0x200.
+
+ Bit 4 (read): XLF_EFI_KEXEC
+
+ - If 1, the kernel supports kexec EFI boot with EFI runtime support.
+
+
+============ ============
+Field name: cmdline_size
+Type: read
+Offset/size: 0x238/4
+Protocol: 2.06+
+============ ============
+
+ The maximum size of the command line without the terminating
+ zero. This means that the command line can contain at most
+ cmdline_size characters. With protocol version 2.05 and earlier, the
+ maximum size was 255.
+
+============ ====================================
+Field name: hardware_subarch
+Type: write (optional, defaults to x86/PC)
+Offset/size: 0x23c/4
+Protocol: 2.07+
+============ ====================================
+
+ In a paravirtualized environment the hardware low level architectural
+ pieces such as interrupt handling, page table handling, and
+ accessing process control registers needs to be done differently.
+
+ This field allows the bootloader to inform the kernel we are in one
+ one of those environments.
+
+ ========== ==============================
+ 0x00000000 The default x86/PC environment
+ 0x00000001 lguest
+ 0x00000002 Xen
+ 0x00000003 Moorestown MID
+ 0x00000004 CE4100 TV Platform
+ ========== ==============================
+
+============ =========================
+Field name: hardware_subarch_data
+Type: write (subarch-dependent)
+Offset/size: 0x240/8
+Protocol: 2.07+
+============ =========================
+
+ A pointer to data that is specific to hardware subarch
+ This field is currently unused for the default x86/PC environment,
+ do not modify.
+
+============ ==============
+Field name: payload_offset
+Type: read
+Offset/size: 0x248/4
+Protocol: 2.08+
+============ ==============
+
+ If non-zero then this field contains the offset from the beginning
+ of the protected-mode code to the payload.
+
+ The payload may be compressed. The format of both the compressed and
+ uncompressed data should be determined using the standard magic
+ numbers. The currently supported compression formats are gzip
+ (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
+ (magic number 5D 00), XZ (magic number FD 37), LZ4 (magic number
+ 02 21) and ZSTD (magic number 28 B5). The uncompressed payload is
+ currently always ELF (magic number 7F 45 4C 46).
+
+============ ==============
+Field name: payload_length
+Type: read
+Offset/size: 0x24c/4
+Protocol: 2.08+
+============ ==============
+
+ The length of the payload.
+
+============ ===============
+Field name: setup_data
+Type: write (special)
+Offset/size: 0x250/8
+Protocol: 2.09+
+============ ===============
+
+ The 64-bit physical pointer to NULL terminated single linked list of
+ struct setup_data. This is used to define a more extensible boot
+ parameters passing mechanism. The definition of struct setup_data is
+ as follow::
+
+ struct setup_data {
+ u64 next;
+ u32 type;
+ u32 len;
+ u8 data[0];
+ };
+
+ Where, the next is a 64-bit physical pointer to the next node of
+ linked list, the next field of the last node is 0; the type is used
+ to identify the contents of data; the len is the length of data
+ field; the data holds the real payload.
+
+ This list may be modified at a number of points during the bootup
+ process. Therefore, when modifying this list one should always make
+ sure to consider the case where the linked list already contains
+ entries.
+
+ The setup_data is a bit awkward to use for extremely large data objects,
+ both because the setup_data header has to be adjacent to the data object
+ and because it has a 32-bit length field. However, it is important that
+ intermediate stages of the boot process have a way to identify which
+ chunks of memory are occupied by kernel data.
+
+ Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
+ protocol 2.15::
+
+ struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+ };
+
+ The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
+ SETUP_INDIRECT itself since making the setup_indirect a tree structure
+ could require a lot of stack space in something that needs to parse it
+ and stack space can be limited in boot contexts.
+
+ Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
+ In this case setup_data and setup_indirect will look like this::
+
+ struct setup_data {
+ __u64 next = 0 or <addr_of_next_setup_data_struct>;
+ __u32 type = SETUP_INDIRECT;
+ __u32 len = sizeof(setup_indirect);
+ __u8 data[sizeof(setup_indirect)] = struct setup_indirect {
+ __u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
+ __u32 reserved = 0;
+ __u64 len = <len_of_SETUP_E820_EXT_data>;
+ __u64 addr = <addr_of_SETUP_E820_EXT_data>;
+ }
+ }
+
+.. note::
+ SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
+ from SETUP_INDIRECT itself. So, this kind of objects cannot be provided
+ by the bootloaders.
+
+============ ============
+Field name: pref_address
+Type: read (reloc)
+Offset/size: 0x258/8
+Protocol: 2.10+
+============ ============
+
+ This field, if nonzero, represents a preferred load address for the
+ kernel. A relocating bootloader should attempt to load at this
+ address if possible.
+
+ A non-relocatable kernel will unconditionally move itself and to run
+ at this address.
+
+============ =======
+Field name: init_size
+Type: read
+Offset/size: 0x260/4
+============ =======
+
+ This field indicates the amount of linear contiguous memory starting
+ at the kernel runtime start address that the kernel needs before it
+ is capable of examining its memory map. This is not the same thing
+ as the total amount of memory the kernel needs to boot, but it can
+ be used by a relocating boot loader to help select a safe load
+ address for the kernel.
+
+ The kernel runtime start address is determined by the following algorithm::
+
+ if (relocatable_kernel)
+ runtime_start = align_up(load_address, kernel_alignment)
+ else
+ runtime_start = pref_address
+
+============ ===============
+Field name: handover_offset
+Type: read
+Offset/size: 0x264/4
+============ ===============
+
+ This field is the offset from the beginning of the kernel image to
+ the EFI handover protocol entry point. Boot loaders using the EFI
+ handover protocol to boot the kernel should jump to this offset.
+
+ See EFI HANDOVER PROTOCOL below for more details.
+
+============ ==================
+Field name: kernel_info_offset
+Type: read
+Offset/size: 0x268/4
+Protocol: 2.15+
+============ ==================
+
+ This field is the offset from the beginning of the kernel image to the
+ kernel_info. The kernel_info structure is embedded in the Linux image
+ in the uncompressed protected mode region.
+
+
+The kernel_info
+===============
+
+The relationships between the headers are analogous to the various data
+sections:
+
+ setup_header = .data
+ boot_params/setup_data = .bss
+
+What is missing from the above list? That's right:
+
+ kernel_info = .rodata
+
+We have been (ab)using .data for things that could go into .rodata or .bss for
+a long time, for lack of alternatives and -- especially early on -- inertia.
+Also, the BIOS stub is responsible for creating boot_params, so it isn't
+available to a BIOS-based loader (setup_data is, though).
+
+setup_header is permanently limited to 144 bytes due to the reach of the
+2-byte jump field, which doubles as a length field for the structure, combined
+with the size of the "hole" in struct boot_params that a protected-mode loader
+or the BIOS stub has to copy it into. It is currently 119 bytes long, which
+leaves us with 25 very precious bytes. This isn't something that can be fixed
+without revising the boot protocol entirely, breaking backwards compatibility.
+
+boot_params proper is limited to 4096 bytes, but can be arbitrarily extended
+by adding setup_data entries. It cannot be used to communicate properties of
+the kernel image, because it is .bss and has no image-provided content.
+
+kernel_info solves this by providing an extensible place for information about
+the kernel image. It is readonly, because the kernel cannot rely on a
+bootloader copying its contents anywhere, but that is OK; if it becomes
+necessary it can still contain data items that an enabled bootloader would be
+expected to copy into a setup_data chunk.
+
+All kernel_info data should be part of this structure. Fixed size data have to
+be put before kernel_info_var_len_data label. Variable size data have to be put
+after kernel_info_var_len_data label. Each chunk of variable size data has to
+be prefixed with header/magic and its size, e.g.::
+
+ kernel_info:
+ .ascii "LToP" /* Header, Linux top (structure). */
+ .long kernel_info_var_len_data - kernel_info
+ .long kernel_info_end - kernel_info
+ .long 0x01234567 /* Some fixed size data for the bootloaders. */
+ kernel_info_var_len_data:
+ example_struct: /* Some variable size data for the bootloaders. */
+ .ascii "0123" /* Header/Magic. */
+ .long example_struct_end - example_struct
+ .ascii "Struct"
+ .long 0x89012345
+ example_struct_end:
+ example_strings: /* Some variable size data for the bootloaders. */
+ .ascii "ABCD" /* Header/Magic. */
+ .long example_strings_end - example_strings
+ .asciz "String_0"
+ .asciz "String_1"
+ example_strings_end:
+ kernel_info_end:
+
+This way the kernel_info is self-contained blob.
+
+.. note::
+ Each variable size data header/magic can be any 4-character string,
+ without \0 at the end of the string, which does not collide with
+ existing variable length data headers/magics.
+
+
+Details of the kernel_info Fields
+=================================
+
+============ ========
+Field name: header
+Offset/size: 0x0000/4
+============ ========
+
+ Contains the magic number "LToP" (0x506f544c).
+
+============ ========
+Field name: size
+Offset/size: 0x0004/4
+============ ========
+
+ This field contains the size of the kernel_info including kernel_info.header.
+ It does not count kernel_info.kernel_info_var_len_data size. This field should be
+ used by the bootloaders to detect supported fixed size fields in the kernel_info
+ and beginning of kernel_info.kernel_info_var_len_data.
+
+============ ========
+Field name: size_total
+Offset/size: 0x0008/4
+============ ========
+
+ This field contains the size of the kernel_info including kernel_info.header
+ and kernel_info.kernel_info_var_len_data.
+
+============ ==============
+Field name: setup_type_max
+Offset/size: 0x000c/4
+============ ==============
+
+ This field contains maximal allowed type for setup_data and setup_indirect structs.
+
+
+The Image Checksum
+==================
+
+From boot protocol version 2.08 onwards the CRC-32 is calculated over
+the entire file using the characteristic polynomial 0x04C11DB7 and an
+initial remainder of 0xffffffff. The checksum is appended to the
+file; therefore the CRC of the file up to the limit specified in the
+syssize field of the header is always 0.
+
+
+The Kernel Command Line
+=======================
+
+The kernel command line has become an important way for the boot
+loader to communicate with the kernel. Some of its options are also
+relevant to the boot loader itself, see "special command line options"
+below.
+
+The kernel command line is a null-terminated string. The maximum
+length can be retrieved from the field cmdline_size. Before protocol
+version 2.06, the maximum was 255 characters. A string that is too
+long will be automatically truncated by the kernel.
+
+If the boot protocol version is 2.02 or later, the address of the
+kernel command line is given by the header field cmd_line_ptr (see
+above.) This address can be anywhere between the end of the setup
+heap and 0xA0000.
+
+If the protocol version is *not* 2.02 or higher, the kernel
+command line is entered using the following protocol:
+
+ - At offset 0x0020 (word), "cmd_line_magic", enter the magic
+ number 0xA33F.
+
+ - At offset 0x0022 (word), "cmd_line_offset", enter the offset
+ of the kernel command line (relative to the start of the
+ real-mode kernel).
+
+ - The kernel command line *must* be within the memory region
+ covered by setup_move_size, so you may need to adjust this
+ field.
+
+
+Memory Layout of The Real-Mode Code
+===================================
+
+The real-mode code requires a stack/heap to be set up, as well as
+memory allocated for the kernel command line. This needs to be done
+in the real-mode accessible memory in bottom megabyte.
+
+It should be noted that modern machines often have a sizable Extended
+BIOS Data Area (EBDA). As a result, it is advisable to use as little
+of the low megabyte as possible.
+
+Unfortunately, under the following circumstances the 0x90000 memory
+segment has to be used:
+
+ - When loading a zImage kernel ((loadflags & 0x01) == 0).
+ - When loading a 2.01 or earlier boot protocol kernel.
+
+.. note::
+ For the 2.00 and 2.01 boot protocols, the real-mode code
+ can be loaded at another address, but it is internally
+ relocated to 0x90000. For the "old" protocol, the
+ real-mode code must be loaded at 0x90000.
+
+When loading at 0x90000, avoid using memory above 0x9a000.
+
+For boot protocol 2.02 or higher, the command line does not have to be
+located in the same 64K segment as the real-mode setup code; it is
+thus permitted to give the stack/heap the full 64K segment and locate
+the command line above it.
+
+The kernel command line should not be located below the real-mode
+code, nor should it be located in high memory.
+
+
+Sample Boot Configuartion
+=========================
+
+As a sample configuration, assume the following layout of the real
+mode segment.
+
+ When loading below 0x90000, use the entire segment:
+
+ ============= ===================
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0xdfff Stack and heap
+ 0xe000-0xffff Kernel command line
+ ============= ===================
+
+ When loading at 0x90000 OR the protocol version is 2.01 or earlier:
+
+ ============= ===================
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0x97ff Stack and heap
+ 0x9800-0x9fff Kernel command line
+ ============= ===================
+
+Such a boot loader should enter the following fields in the header::
+
+ unsigned long base_ptr; /* base address for real-mode segment */
+
+ if ( setup_sects == 0 ) {
+ setup_sects = 4;
+ }
+
+ if ( protocol >= 0x0200 ) {
+ type_of_loader = <type code>;
+ if ( loading_initrd ) {
+ ramdisk_image = <initrd_address>;
+ ramdisk_size = <initrd_size>;
+ }
+
+ if ( protocol >= 0x0202 && loadflags & 0x01 )
+ heap_end = 0xe000;
+ else
+ heap_end = 0x9800;
+
+ if ( protocol >= 0x0201 ) {
+ heap_end_ptr = heap_end - 0x200;
+ loadflags |= 0x80; /* CAN_USE_HEAP */
+ }
+
+ if ( protocol >= 0x0202 ) {
+ cmd_line_ptr = base_ptr + heap_end;
+ strcpy(cmd_line_ptr, cmdline);
+ } else {
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+ setup_move_size = heap_end + strlen(cmdline)+1;
+ strcpy(base_ptr+cmd_line_offset, cmdline);
+ }
+ } else {
+ /* Very old kernel */
+
+ heap_end = 0x9800;
+
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+
+ /* A very old kernel MUST have its real-mode code
+ loaded at 0x90000 */
+
+ if ( base_ptr != 0x90000 ) {
+ /* Copy the real-mode kernel */
+ memcpy(0x90000, base_ptr, (setup_sects+1)*512);
+ base_ptr = 0x90000; /* Relocated */
+ }
+
+ strcpy(0x90000+cmd_line_offset, cmdline);
+
+ /* It is recommended to clear memory up to the 32K mark */
+ memset(0x90000 + (setup_sects+1)*512, 0,
+ (64-(setup_sects+1))*512);
+ }
+
+
+Loading The Rest of The Kernel
+==============================
+
+The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
+in the kernel file (again, if setup_sects == 0 the real value is 4.)
+It should be loaded at address 0x10000 for Image/zImage kernels and
+0x100000 for bzImage kernels.
+
+The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
+bit (LOAD_HIGH) in the loadflags field is set::
+
+ is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
+ load_address = is_bzImage ? 0x100000 : 0x10000;
+
+Note that Image/zImage kernels can be up to 512K in size, and thus use
+the entire 0x10000-0x90000 range of memory. This means it is pretty
+much a requirement for these kernels to load the real-mode part at
+0x90000. bzImage kernels allow much more flexibility.
+
+Special Command Line Options
+============================
+
+If the command line provided by the boot loader is entered by the
+user, the user may expect the following command line options to work.
+They should normally not be deleted from the kernel command line even
+though not all of them are actually meaningful to the kernel. Boot
+loader authors who need additional command line options for the boot
+loader itself should get them registered in
+Documentation/admin-guide/kernel-parameters.rst to make sure they will not
+conflict with actual kernel options now or in the future.
+
+ vga=<mode>
+ <mode> here is either an integer (in C notation, either
+ decimal, octal, or hexadecimal) or one of the strings
+ "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask"
+ (meaning 0xFFFD). This value should be entered into the
+ vid_mode field, as it is used by the kernel before the command
+ line is parsed.
+
+ mem=<size>
+ <size> is an integer in C notation optionally followed by
+ (case insensitive) K, M, G, T, P or E (meaning << 10, << 20,
+ << 30, << 40, << 50 or << 60). This specifies the end of
+ memory to the kernel. This affects the possible placement of
+ an initrd, since an initrd should be placed near end of
+ memory. Note that this is an option to *both* the kernel and
+ the bootloader!
+
+ initrd=<file>
+ An initrd should be loaded. The meaning of <file> is
+ obviously bootloader-dependent, and some boot loaders
+ (e.g. LILO) do not have such a command.
+
+In addition, some boot loaders add the following options to the
+user-specified command line:
+
+ BOOT_IMAGE=<file>
+ The boot image which was loaded. Again, the meaning of <file>
+ is obviously bootloader-dependent.
+
+ auto
+ The kernel was booted without explicit user intervention.
+
+If these options are added by the boot loader, it is highly
+recommended that they are located *first*, before the user-specified
+or configuration-specified command line. Otherwise, "init=/bin/sh"
+gets confused by the "auto" option.
+
+
+Running the Kernel
+==================
+
+The kernel is started by jumping to the kernel entry point, which is
+located at *segment* offset 0x20 from the start of the real mode
+kernel. This means that if you loaded your real-mode kernel code at
+0x90000, the kernel entry point is 9020:0000.
+
+At entry, ds = es = ss should point to the start of the real-mode
+kernel code (0x9000 if the code is loaded at 0x90000), sp should be
+set up properly, normally pointing to the top of the heap, and
+interrupts should be disabled. Furthermore, to guard against bugs in
+the kernel, it is recommended that the boot loader sets fs = gs = ds =
+es = ss.
+
+In our example from above, we would do::
+
+ /* Note: in the case of the "old" kernel protocol, base_ptr must
+ be == 0x90000 at this point; see the previous sample code */
+
+ seg = base_ptr >> 4;
+
+ cli(); /* Enter with interrupts disabled! */
+
+ /* Set up the real-mode kernel stack */
+ _SS = seg;
+ _SP = heap_end;
+
+ _DS = _ES = _FS = _GS = seg;
+ jmp_far(seg+0x20, 0); /* Run the kernel */
+
+If your boot sector accesses a floppy drive, it is recommended to
+switch off the floppy motor before running the kernel, since the
+kernel boot leaves interrupts off and thus the motor will not be
+switched off, especially if the loaded kernel has the floppy driver as
+a demand-loaded module!
+
+
+Advanced Boot Loader Hooks
+==========================
+
+If the boot loader runs in a particularly hostile environment (such as
+LOADLIN, which runs under DOS) it may be impossible to follow the
+standard memory location requirements. Such a boot loader may use the
+following hooks that, if set, are invoked by the kernel at the
+appropriate time. The use of these hooks should probably be
+considered an absolutely last resort!
+
+IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and
+%edi across invocation.
+
+ realmode_swtch:
+ A 16-bit real mode far subroutine invoked immediately before
+ entering protected mode. The default routine disables NMI, so
+ your routine should probably do so, too.
+
+ code32_start:
+ A 32-bit flat-mode routine *jumped* to immediately after the
+ transition to protected mode, but before the kernel is
+ uncompressed. No segments, except CS, are guaranteed to be
+ set up (current kernels do, but older ones do not); you should
+ set them up to BOOT_DS (0x18) yourself.
+
+ After completing your hook, you should jump to the address
+ that was in this field before your boot loader overwrote it
+ (relocated, if appropriate.)
+
+
+32-bit Boot Protocol
+====================
+
+For machine with some new BIOS other than legacy BIOS, such as EFI,
+LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel
+based on legacy BIOS can not be used, so a 32-bit boot protocol needs
+to be defined.
+
+In 32-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+should be allocated and initialized to all zero. Then the setup header
+from offset 0x01f1 of kernel image on should be loaded into struct
+boot_params and examined. The end of setup header can be calculated as
+follow::
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as
+described in chapter Documentation/arch/x86/zero-page.rst.
+
+After setting up the struct boot_params, the boot loader can load the
+32/64-bit kernel in the same way as that of 16-bit boot protocol.
+
+In 32-bit boot protocol, the kernel is started by jumping to the
+32-bit kernel entry point, which is the start address of loaded
+32/64-bit kernel.
+
+At entry, the CPU must be in 32-bit protected mode with paging
+disabled; a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
+address of the struct boot_params; %ebp, %edi and %ebx must be zero.
+
+64-bit Boot Protocol
+====================
+
+For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
+and we need a 64-bit boot protocol.
+
+In 64-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+could be allocated anywhere (even above 4G) and initialized to all zero.
+Then, the setup header at offset 0x01f1 of kernel image on should be
+loaded into struct boot_params and examined. The end of setup header
+can be calculated as follows::
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as described
+in chapter Documentation/arch/x86/zero-page.rst.
+
+After setting up the struct boot_params, the boot loader can load
+64-bit kernel in the same way as that of 16-bit boot protocol, but
+kernel could be loaded above 4G.
+
+In 64-bit boot protocol, the kernel is started by jumping to the
+64-bit kernel entry point, which is the start address of loaded
+64-bit kernel plus 0x200.
+
+At entry, the CPU must be in 64-bit mode with paging enabled.
+The range with setup_header.init_size from start address of loaded
+kernel and zero page and command line buffer get ident mapping;
+a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
+address of the struct boot_params.
+
+EFI Handover Protocol (deprecated)
+==================================
+
+This protocol allows boot loaders to defer initialisation to the EFI
+boot stub. The boot loader is required to load the kernel/initrd(s)
+from the boot media and jump to the EFI handover protocol entry point
+which is hdr->handover_offset bytes from the beginning of
+startup_{32,64}.
+
+The boot loader MUST respect the kernel's PE/COFF metadata when it comes
+to section alignment, the memory footprint of the executable image beyond
+the size of the file itself, and any other aspect of the PE/COFF header
+that may affect correct operation of the image as a PE/COFF binary in the
+execution context provided by the EFI firmware.
+
+The function prototype for the handover entry point looks like this::
+
+ efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
+
+'handle' is the EFI image handle passed to the boot loader by the EFI
+firmware, 'table' is the EFI system table - these are the first two
+arguments of the "handoff state" as described in section 2.3 of the
+UEFI specification. 'bp' is the boot loader-allocated boot params.
+
+The boot loader *must* fill out the following fields in bp::
+
+ - hdr.cmd_line_ptr
+ - hdr.ramdisk_image (if applicable)
+ - hdr.ramdisk_size (if applicable)
+
+All other fields should be zero.
+
+NOTE: The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
+ entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
+ loading protocol (refer to [0] for an example of the bootloader side of
+ this), which removes the need for any knowledge on the part of the EFI
+ bootloader regarding the internal representation of boot_params or any
+ requirements/limitations regarding the placement of the command line
+ and ramdisk in memory, or the placement of the kernel image itself.
+
+[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0
diff --git a/Documentation/arch/x86/booting-dt.rst b/Documentation/arch/x86/booting-dt.rst
new file mode 100644
index 0000000..b089ffd
--- /dev/null
+++ b/Documentation/arch/x86/booting-dt.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+DeviceTree Booting
+------------------
+
+ There is one single 32bit entry point to the kernel at code32_start,
+ the decompressor (the real mode entry point goes to the same 32bit
+ entry point once it switched into protected mode). That entry point
+ supports one calling convention which is documented in
+ Documentation/arch/x86/boot.rst
+ The physical pointer to the device-tree block is passed via setup_data
+ which requires at least boot protocol 2.09.
+ The type filed is defined as
+
+ #define SETUP_DTB 2
+
+ This device-tree is used as an extension to the "boot page". As such it
+ does not parse / consider data which is already covered by the boot
+ page. This includes memory size, reserved ranges, command line arguments
+ or initrd address. It simply holds information which can not be retrieved
+ otherwise like interrupt routing or a list of devices behind an I2C bus.
diff --git a/Documentation/arch/x86/buslock.rst b/Documentation/arch/x86/buslock.rst
new file mode 100644
index 0000000..31ec0ef
--- /dev/null
+++ b/Documentation/arch/x86/buslock.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===============================
+Bus lock detection and handling
+===============================
+
+:Copyright: |copy| 2021 Intel Corporation
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+ - Tony Luck <tony.luck@intel.com>
+
+Problem
+=======
+
+A split lock is any atomic operation whose operand crosses two cache lines.
+Since the operand spans two cache lines and the operation must be atomic,
+the system locks the bus while the CPU accesses the two cache lines.
+
+A bus lock is acquired through either split locked access to writeback (WB)
+memory or any locked access to non-WB memory. This is typically thousands of
+cycles slower than an atomic operation within a cache line. It also disrupts
+performance on other cores and brings the whole system to its knees.
+
+Detection
+=========
+
+Intel processors may support either or both of the following hardware
+mechanisms to detect split locks and bus locks.
+
+#AC exception for split lock detection
+--------------------------------------
+
+Beginning with the Tremont Atom CPU split lock operations may raise an
+Alignment Check (#AC) exception when a split lock operation is attemped.
+
+#DB exception for bus lock detection
+------------------------------------
+
+Some CPUs have the ability to notify the kernel by an #DB trap after a user
+instruction acquires a bus lock and is executed. This allows the kernel to
+terminate the application or to enforce throttling.
+
+Software handling
+=================
+
+The kernel #AC and #DB handlers handle bus lock based on the kernel
+parameter "split_lock_detect". Here is a summary of different options:
+
++------------------+----------------------------+-----------------------+
+|split_lock_detect=|#AC for split lock |#DB for bus lock |
++------------------+----------------------------+-----------------------+
+|off |Do nothing |Do nothing |
++------------------+----------------------------+-----------------------+
+|warn |Kernel OOPs |Warn once per task and |
+|(default) |Warn once per task, add a |and continues to run. |
+| |delay, add synchronization | |
+| |to prevent more than one | |
+| |core from executing a | |
+| |split lock in parallel. | |
+| |sysctl split_lock_mitigate | |
+| |can be used to avoid the | |
+| |delay and synchronization | |
+| |When both features are | |
+| |supported, warn in #AC | |
++------------------+----------------------------+-----------------------+
+|fatal |Kernel OOPs |Send SIGBUS to user. |
+| |Send SIGBUS to user | |
+| |When both features are | |
+| |supported, fatal in #AC | |
++------------------+----------------------------+-----------------------+
+|ratelimit:N |Do nothing |Limit bus lock rate to |
+|(0 < N <= 1000) | |N bus locks per second |
+| | |system wide and warn on|
+| | |bus locks. |
++------------------+----------------------------+-----------------------+
+
+Usages
+======
+
+Detecting and handling bus lock may find usages in various areas:
+
+It is critical for real time system designers who build consolidated real
+time systems. These systems run hard real time code on some cores and run
+"untrusted" user processes on other cores. The hard real time cannot afford
+to have any bus lock from the untrusted processes to hurt real time
+performance. To date the designers have been unable to deploy these
+solutions as they have no way to prevent the "untrusted" user code from
+generating split lock and bus lock to block the hard real time code to
+access memory during bus locking.
+
+It's also useful for general computing to prevent guests or user
+applications from slowing down the overall system by executing instructions
+with bus lock.
+
+
+Guidance
+========
+off
+---
+
+Disable checking for split lock and bus lock. This option can be useful if
+there are legacy applications that trigger these events at a low rate so
+that mitigation is not needed.
+
+warn
+----
+
+A warning is emitted when a bus lock is detected which allows to identify
+the offending application. This is the default behavior.
+
+fatal
+-----
+
+In this case, the bus lock is not tolerated and the process is killed.
+
+ratelimit
+---------
+
+A system wide bus lock rate limit N is specified where 0 < N <= 1000. This
+allows a bus lock rate up to N bus locks per second. When the bus lock rate
+is exceeded then any task which is caught via the buslock #DB exception is
+throttled by enforced sleeps until the rate goes under the limit again.
+
+This is an effective mitigation in cases where a minimal impact can be
+tolerated, but an eventual Denial of Service attack has to be prevented. It
+allows to identify the offending processes and analyze whether they are
+malicious or just badly written.
+
+Selecting a rate limit of 1000 allows the bus to be locked for up to about
+seven million cycles each second (assuming 7000 cycles for each bus
+lock). On a 2 GHz processor that would be about 0.35% system slowdown.
diff --git a/Documentation/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst
similarity index 100%
rename from Documentation/x86/cpuinfo.rst
rename to Documentation/arch/x86/cpuinfo.rst
diff --git a/Documentation/x86/earlyprintk.rst b/Documentation/arch/x86/earlyprintk.rst
similarity index 100%
rename from Documentation/x86/earlyprintk.rst
rename to Documentation/arch/x86/earlyprintk.rst
diff --git a/Documentation/x86/elf_auxvec.rst b/Documentation/arch/x86/elf_auxvec.rst
similarity index 100%
rename from Documentation/x86/elf_auxvec.rst
rename to Documentation/arch/x86/elf_auxvec.rst
diff --git a/Documentation/x86/entry_64.rst b/Documentation/arch/x86/entry_64.rst
similarity index 100%
rename from Documentation/x86/entry_64.rst
rename to Documentation/arch/x86/entry_64.rst
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/arch/x86/exception-tables.rst
similarity index 100%
rename from Documentation/x86/exception-tables.rst
rename to Documentation/arch/x86/exception-tables.rst
diff --git a/Documentation/x86/features.rst b/Documentation/arch/x86/features.rst
similarity index 100%
rename from Documentation/x86/features.rst
rename to Documentation/arch/x86/features.rst
diff --git a/Documentation/x86/i386/IO-APIC.rst b/Documentation/arch/x86/i386/IO-APIC.rst
similarity index 100%
rename from Documentation/x86/i386/IO-APIC.rst
rename to Documentation/arch/x86/i386/IO-APIC.rst
diff --git a/Documentation/x86/i386/index.rst b/Documentation/arch/x86/i386/index.rst
similarity index 100%
rename from Documentation/x86/i386/index.rst
rename to Documentation/arch/x86/i386/index.rst
diff --git a/Documentation/x86/ifs.rst b/Documentation/arch/x86/ifs.rst
similarity index 100%
rename from Documentation/x86/ifs.rst
rename to Documentation/arch/x86/ifs.rst
diff --git a/Documentation/x86/index.rst b/Documentation/arch/x86/index.rst
similarity index 100%
rename from Documentation/x86/index.rst
rename to Documentation/arch/x86/index.rst
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/arch/x86/intel-hfi.rst
similarity index 100%
rename from Documentation/x86/intel-hfi.rst
rename to Documentation/arch/x86/intel-hfi.rst
diff --git a/Documentation/x86/intel_txt.rst b/Documentation/arch/x86/intel_txt.rst
similarity index 100%
rename from Documentation/x86/intel_txt.rst
rename to Documentation/arch/x86/intel_txt.rst
diff --git a/Documentation/x86/iommu.rst b/Documentation/arch/x86/iommu.rst
similarity index 100%
rename from Documentation/x86/iommu.rst
rename to Documentation/arch/x86/iommu.rst
diff --git a/Documentation/arch/x86/kernel-stacks.rst b/Documentation/arch/x86/kernel-stacks.rst
new file mode 100644
index 0000000..738671a
--- /dev/null
+++ b/Documentation/arch/x86/kernel-stacks.rst
@@ -0,0 +1,152 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Kernel Stacks
+=============
+
+Kernel stacks on x86-64 bit
+===========================
+
+Most of the text from Keith Owens, hacked by AK
+
+x86_64 page size (PAGE_SIZE) is 4K.
+
+Like all other architectures, x86_64 has a kernel stack for every
+active thread. These thread stacks are THREAD_SIZE (4*PAGE_SIZE) big.
+These stacks contain useful data as long as a thread is alive or a
+zombie. While the thread is in user space the kernel stack is empty
+except for the thread_info structure at the bottom.
+
+In addition to the per thread stacks, there are specialized stacks
+associated with each CPU. These stacks are only used while the kernel
+is in control on that CPU; when a CPU returns to user space the
+specialized stacks contain no useful data. The main CPU stacks are:
+
+* Interrupt stack. IRQ_STACK_SIZE
+
+ Used for external hardware interrupts. If this is the first external
+ hardware interrupt (i.e. not a nested hardware interrupt) then the
+ kernel switches from the current task to the interrupt stack. Like
+ the split thread and interrupt stacks on i386, this gives more room
+ for kernel interrupt processing without having to increase the size
+ of every per thread stack.
+
+ The interrupt stack is also used when processing a softirq.
+
+Switching to the kernel interrupt stack is done by software based on a
+per CPU interrupt nest counter. This is needed because x86-64 "IST"
+hardware stacks cannot nest without races.
+
+x86_64 also has a feature which is not available on i386, the ability
+to automatically switch to a new stack for designated events such as
+double fault or NMI, which makes it easier to handle these unusual
+events on x86_64. This feature is called the Interrupt Stack Table
+(IST). There can be up to 7 IST entries per CPU. The IST code is an
+index into the Task State Segment (TSS). The IST entries in the TSS
+point to dedicated stacks; each stack can be a different size.
+
+An IST is selected by a non-zero value in the IST field of an
+interrupt-gate descriptor. When an interrupt occurs and the hardware
+loads such a descriptor, the hardware automatically sets the new stack
+pointer based on the IST value, then invokes the interrupt handler. If
+the interrupt came from user mode, then the interrupt handler prologue
+will switch back to the per-thread stack. If software wants to allow
+nested IST interrupts then the handler must adjust the IST values on
+entry to and exit from the interrupt handler. (This is occasionally
+done, e.g. for debug exceptions.)
+
+Events with different IST codes (i.e. with different stacks) can be
+nested. For example, a debug interrupt can safely be interrupted by an
+NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
+pointers on entry to and exit from all IST events, in theory allowing
+IST events with the same code to be nested. However in most cases, the
+stack size allocated to an IST assumes no nesting for the same code.
+If that assumption is ever broken then the stacks will become corrupt.
+
+The currently assigned IST stacks are:
+
+* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 8 - Double Fault Exception (#DF).
+
+ Invoked when handling one exception causes another exception. Happens
+ when the kernel is very confused (e.g. kernel stack pointer corrupt).
+ Using a separate stack allows the kernel to recover from it well enough
+ in many cases to still output an oops.
+
+* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for non-maskable interrupts (NMI).
+
+ NMI can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for NMI events avoids making
+ assumptions about the previous state of the kernel stack.
+
+* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for hardware debug interrupts (interrupt 1) and for software
+ debug interrupts (INT3).
+
+ When debugging a kernel, debug interrupts (both hardware and
+ software) can occur at any time. Using IST for these interrupts
+ avoids making assumptions about the previous state of the kernel
+ stack.
+
+ To handle nested #DB correctly there exist two instances of DB stacks. On
+ #DB entry the IST stackpointer for #DB is switched to the second instance
+ so a nested #DB starts from a clean stack. The nested #DB switches
+ the IST stackpointer to a guard hole to catch triple nesting.
+
+* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 18 - Machine Check Exception (#MC).
+
+ MCE can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for MCE events avoids making
+ assumptions about the previous state of the kernel stack.
+
+For more details see the Intel IA32 or AMD AMD64 architecture manuals.
+
+
+Printing backtraces on x86
+==========================
+
+The question about the '?' preceding function names in an x86 stacktrace
+keeps popping up, here's an indepth explanation. It helps if the reader
+stares at print_context_stack() and the whole machinery in and around
+arch/x86/kernel/dumpstack.c.
+
+Adapted from Ingo's mail, Message-ID: <20150521101614.GA10889@gmail.com>:
+
+We always scan the full kernel stack for return addresses stored on
+the kernel stack(s) [1]_, from stack top to stack bottom, and print out
+anything that 'looks like' a kernel text address.
+
+If it fits into the frame pointer chain, we print it without a question
+mark, knowing that it's part of the real backtrace.
+
+If the address does not fit into our expected frame pointer chain we
+still print it, but we print a '?'. It can mean two things:
+
+ - either the address is not part of the call chain: it's just stale
+ values on the kernel stack, from earlier function calls. This is
+ the common case.
+
+ - or it is part of the call chain, but the frame pointer was not set
+ up properly within the function, so we don't recognize it.
+
+This way we will always print out the real call chain (plus a few more
+entries), regardless of whether the frame pointer was set up correctly
+or not - but in most cases we'll get the call chain right as well. The
+entries printed are strictly in stack order, so you can deduce more
+information from that as well.
+
+The most important property of this method is that we _never_ lose
+information: we always strive to print _all_ addresses on the stack(s)
+that look like kernel text addresses, so if debug information is wrong,
+we still print out the real call chain as well - just with more question
+marks than ideal.
+
+.. [1] For things like IRQ and IST stacks, we also scan those stacks, in
+ the right order, and try to cross from one stack into another
+ reconstructing the call chain. This works most of the time.
diff --git a/Documentation/x86/mds.rst b/Documentation/arch/x86/mds.rst
similarity index 100%
rename from Documentation/x86/mds.rst
rename to Documentation/arch/x86/mds.rst
diff --git a/Documentation/x86/microcode.rst b/Documentation/arch/x86/microcode.rst
similarity index 100%
rename from Documentation/x86/microcode.rst
rename to Documentation/arch/x86/microcode.rst
diff --git a/Documentation/arch/x86/mtrr.rst b/Documentation/arch/x86/mtrr.rst
new file mode 100644
index 0000000..f65ef03
--- /dev/null
+++ b/Documentation/arch/x86/mtrr.rst
@@ -0,0 +1,354 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+MTRR (Memory Type Range Register) control
+=========================================
+
+:Authors: - Richard Gooch <rgooch@atnf.csiro.au> - 3 Jun 1999
+ - Luis R. Rodriguez <mcgrof@do-not-panic.com> - April 9, 2015
+
+
+Phasing out MTRR use
+====================
+
+MTRR use is replaced on modern x86 hardware with PAT. Direct MTRR use by
+drivers on Linux is now completely phased out, device drivers should use
+arch_phys_wc_add() in combination with ioremap_wc() to make MTRR effective on
+non-PAT systems while a no-op but equally effective on PAT enabled systems.
+
+Even if Linux does not use MTRRs directly, some x86 platform firmware may still
+set up MTRRs early before booting the OS. They do this as some platform
+firmware may still have implemented access to MTRRs which would be controlled
+and handled by the platform firmware directly. An example of platform use of
+MTRRs is through the use of SMI handlers, one case could be for fan control,
+the platform code would need uncachable access to some of its fan control
+registers. Such platform access does not need any Operating System MTRR code in
+place other than mtrr_type_lookup() to ensure any OS specific mapping requests
+are aligned with platform MTRR setup. If MTRRs are only set up by the platform
+firmware code though and the OS does not make any specific MTRR mapping
+requests mtrr_type_lookup() should always return MTRR_TYPE_INVALID.
+
+For details refer to Documentation/arch/x86/pat.rst.
+
+.. tip::
+ On Intel P6 family processors (Pentium Pro, Pentium II and later)
+ the Memory Type Range Registers (MTRRs) may be used to control
+ processor access to memory ranges. This is most useful when you have
+ a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+ allows bus write transfers to be combined into a larger transfer
+ before bursting over the PCI/AGP bus. This can increase performance
+ of image write operations 2.5 times or more.
+
+ The Cyrix 6x86, 6x86MX and M II processors have Address Range
+ Registers (ARRs) which provide a similar functionality to MTRRs. For
+ these, the ARRs are used to emulate the MTRRs.
+
+ The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+ MTRRs. These are supported. The AMD Athlon family provide 8 Intel
+ style MTRRs.
+
+ The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
+ are supported.
+
+ The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs.
+
+ The CONFIG_MTRR option creates a /proc/mtrr file which may be used
+ to manipulate your MTRRs. Typically the X server should use
+ this. This should have a reasonably generic interface so that
+ similar control registers on other processors can be easily
+ supported.
+
+There are two interfaces to /proc/mtrr: one is an ASCII interface
+which allows you to read and write. The other is an ioctl()
+interface. The ASCII interface is meant for administration. The
+ioctl() interface is meant for C programs (i.e. the X server). The
+interfaces are described below, with sample commands and C code.
+
+
+Reading MTRRs from the shell
+============================
+::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+
+Creating MTRRs from the C-shell::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr
+
+or if you use bash::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr
+
+And the result thereof::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+ reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1
+
+This is for video RAM at base address 0xf8000000 and size 4 megabytes. To
+find out your base address, you need to look at the output of your X
+server, which tells you where the linear framebuffer address is. A
+typical line that you may get is::
+
+ (--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000
+
+Note that you should only use the value from the X server, as it may
+move the framebuffer base address, so the only value you can trust is
+that reported by the X server.
+
+To find out the size of your framebuffer (what, you don't actually
+know?), the following line will tell you::
+
+ (--) S3: videoram: 4096k
+
+That's 4 megabytes, which is 0x400000 bytes (in hexadecimal).
+A patch is being written for XFree86 which will make this automatic:
+in other words the X server will manipulate /proc/mtrr using the
+ioctl() interface, so users won't have to do anything. If you use a
+commercial X server, lobby your vendor to add support for MTRRs.
+
+
+Creating overlapping MTRRs
+==========================
+::
+
+ %echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr
+ %echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr
+
+And the results::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
+ reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
+ reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
+
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
+excluded from the beginning of the region because it is used for
+registers.
+
+NOTE: You can only create type=uncachable region, if the first
+region that you created is type=write-combining.
+
+
+Removing MTRRs from the C-shel
+==============================
+::
+
+ % echo "disable=2" >! /proc/mtrr
+
+or using bash::
+
+ % echo "disable=2" >| /proc/mtrr
+
+
+Reading MTRRs from a C program using ioctl()'s
+==============================================
+::
+
+ /* mtrr-show.c
+
+ Source file for mtrr-show (example program to show MTRRs using ioctl()'s)
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This program will use an ioctl() on /proc/mtrr to show the current MTRR
+ settings. This is an alternative to reading /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main ()
+ {
+ int fd;
+ struct mtrr_gentry gentry;
+
+ if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (1);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (2);
+ }
+ for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0;
+ ++gentry.regnum)
+ {
+ if (gentry.size < 1)
+ {
+ fprintf (stderr, "Register: %u disabled\n", gentry.regnum);
+ continue;
+ }
+ fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n",
+ gentry.regnum, gentry.base, gentry.size,
+ mtrr_strings[gentry.type]);
+ }
+ if (errno == EINVAL) exit (0);
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (3);
+ } /* End Function main */
+
+
+Creating MTRRs from a C programme using ioctl()'s
+=================================================
+::
+
+ /* mtrr-add.c
+
+ Source file for mtrr-add (example programme to add an MTRRs using ioctl())
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This programme will use an ioctl() on /proc/mtrr to add an entry. The first
+ available mtrr is used. This is an alternative to writing /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <string.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main (int argc, char **argv)
+ {
+ int fd;
+ struct mtrr_sentry sentry;
+
+ if (argc != 4)
+ {
+ fprintf (stderr, "Usage:\tmtrr-add base size type\n");
+ exit (1);
+ }
+ sentry.base = strtoul (argv[1], NULL, 0);
+ sentry.size = strtoul (argv[2], NULL, 0);
+ for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type)
+ {
+ if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break;
+ }
+ if (sentry.type >= MTRR_NUM_TYPES)
+ {
+ fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]);
+ exit (2);
+ }
+ if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (3);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (4);
+ }
+ if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1)
+ {
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (5);
+ }
+ fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n");
+ sleep (5);
+ close (fd);
+ fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n",
+ stderr);
+ } /* End Function main */
diff --git a/Documentation/x86/orc-unwinder.rst b/Documentation/arch/x86/orc-unwinder.rst
similarity index 100%
rename from Documentation/x86/orc-unwinder.rst
rename to Documentation/arch/x86/orc-unwinder.rst
diff --git a/Documentation/x86/pat.rst b/Documentation/arch/x86/pat.rst
similarity index 100%
rename from Documentation/x86/pat.rst
rename to Documentation/arch/x86/pat.rst
diff --git a/Documentation/x86/pti.rst b/Documentation/arch/x86/pti.rst
similarity index 100%
rename from Documentation/x86/pti.rst
rename to Documentation/arch/x86/pti.rst
diff --git a/Documentation/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
similarity index 100%
rename from Documentation/x86/resctrl.rst
rename to Documentation/arch/x86/resctrl.rst
diff --git a/Documentation/x86/sgx.rst b/Documentation/arch/x86/sgx.rst
similarity index 100%
rename from Documentation/x86/sgx.rst
rename to Documentation/arch/x86/sgx.rst
diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst
new file mode 100644
index 0000000..33cb050
--- /dev/null
+++ b/Documentation/arch/x86/sva.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Shared Virtual Addressing (SVA) with ENQCMD
+===========================================
+
+Background
+==========
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM).
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to the PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU is also
+required to support the PCIe features ATS and PRI. ATS allows devices
+to cache translations for virtual addresses. The IOMMU driver uses the
+mmu_notifier() support to keep the device TLB cache and the CPU cache in
+sync. When an ATS lookup fails for a virtual address, the device should
+use the PRI in order to request the virtual address to be paged into the
+CPU page tables. The device must use ATS again in order the fetch the
+translation before use.
+
+Shared Hardware Workqueues
+==========================
+
+Unlike Single Root I/O Virtualization (SR-IOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20-bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+======
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement fairness or ensure forward progress should be provided.
+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permits hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=============================
+
+A new thread-scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA-capable device, this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU-specific API
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+iommu_sva_bind_device(), which will do the following:
+
+- Allocate the PASID, and program the process page-table (%cr3 register) in the
+ PASID context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+ the device TLB in sync. For example, when a page-table entry is invalidated,
+ the IOMMU propagates the invalidation to the device TLB. This will force any
+ future access by the device to this virtual address to participate in
+ ATS. If the IOMMU responds with proper response that a page is not
+ present, the device would request the page to be paged in via the PCIe PRI
+ protocol before performing I/O.
+
+This MSR is managed with the XSAVE feature set as "supervisor state" to
+ensure the MSR is updated during context switch.
+
+PASID Management
+================
+
+The kernel must allocate a PASID on behalf of each process which will use
+ENQCMD and program it into the new MSR to communicate the process identity to
+platform hardware. ENQCMD uses the PASID stored in this MSR to tag requests
+from this process. When a user submits a work descriptor to a device using the
+ENQCMD instruction, the PASID field in the descriptor is auto-filled with the
+value from MSR_IA32_PASID. Requests for DMA from the device are also tagged
+with the same PASID. The platform IOMMU uses the PASID in the transaction to
+perform address translation. The IOMMU APIs setup the corresponding PASID
+entry in IOMMU with the process address used by the CPU (e.g. %cr3 register in
+x86).
+
+The MSR must be configured on each logical CPU before any application
+thread can interact with a device. Threads that belong to the same
+process share the same page tables, thus the same MSR value.
+
+PASID Life Cycle Management
+===========================
+
+PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created.
+
+Only processes that access SVA-capable devices need to have a PASID
+allocated. This allocation happens when a process opens/binds an SVA-capable
+device but finds no PASID for this process. Subsequent binds of the same, or
+other devices will share the same PASID.
+
+Although the PASID is allocated to the process by opening a device,
+it is not active in any of the threads of that process. It's loaded to the
+IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+to a device using the ENQCMD.
+
+That first access will trigger a #GP fault because the IA32_PASID MSR
+has not been initialized with the PASID value assigned to the process
+when the device was opened. The Linux #GP handler notes that a PASID has
+been allocated for the process, and so initializes the IA32_PASID MSR
+and returns so that the ENQCMD instruction is re-executed.
+
+On fork(2) or exec(2) the PASID is removed from the process as it no
+longer has the same address space that it had when the device was opened.
+
+On clone(2) the new task shares the same address space, so will be
+able to use the PASID allocated to the process. The IA32_PASID is not
+preemptively initialized as the PASID value might not be allocated yet or
+the kernel does not know whether this thread is going to access the device
+and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+init optimization. Since #GP faults have to be handled on any threads that
+were created before the PASID was assigned to the mm of the process, newly
+created threads might as well be treated in a consistent way.
+
+Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+all threads in unbind, free the PASID lazily only on mm exit.
+
+If a process does a close(2) of the device file descriptor and munmap(2)
+of the device MMIO portal, then the driver will unbind the device. The
+PASID is still marked VALID in the PASID_MSR for any threads in the
+process that accessed the device. But this is harmless as without the
+MMIO portal they cannot submit new work to the device.
+
+Relationships
+=============
+
+ * Each process has many threads, but only one PASID.
+ * Devices have a limited number (~10's to 1000's) of hardware workqueues.
+ The device driver manages allocating hardware workqueues.
+ * A single mmap() maps a single hardware workqueue as a "portal" and
+ each portal maps down to a single workqueue.
+ * For each device with which a process interacts, there must be
+ one or more mmap()'d portals.
+ * Many threads within a process can share a single portal to access
+ a single device.
+ * Multiple processes can separately mmap() the same portal, in
+ which case they still share one device hardware workqueue.
+ * The single process-wide PASID is used by all threads to interact
+ with all devices. There is not, for instance, a PASID for each
+ thread or each thread<->device pair.
+
+FAQ
+===
+
+* What is SVA/SVM?
+
+Shared Virtual Addressing (SVA) permits I/O hardware and the processor to
+work in the same address space, i.e., to share it. Some call it Shared
+Virtual Memory (SVM), but Linux community wanted to avoid confusing it with
+POSIX Shared Memory and Secure Virtual Machines which were terms already in
+circulation.
+
+* What is a PASID?
+
+A Process Address Space ID (PASID) is a PCIe-defined Transaction Layer Packet
+(TLP) prefix. A PASID is a 20-bit number allocated and managed by the OS.
+PASID is included in all transactions between the platform and the device.
+
+* How are shared workqueues different?
+
+Traditionally, in order for userspace applications to interact with hardware,
+there is a separate hardware instance required per process. For example,
+consider doorbells as a mechanism of informing hardware about work to process.
+Each doorbell is required to be spaced 4k (or page-size) apart for process
+isolation. This requires hardware to provision that space and reserve it in
+MMIO. This doesn't scale as the number of threads becomes quite large. The
+hardware also manages the queue depth for Shared Work Queues (SWQ), and
+consumers don't need to track queue depth. If there is no space to accept
+a command, the device will return an error indicating retry.
+
+A user should check Deferrable Memory Write (DMWr) capability on the device
+and only submits ENQCMD when the device supports it. In the new DMWr PCIe
+terminology, devices need to support DMWr completer capability. In addition,
+it requires all switch ports to support DMWr routing and must be enabled by
+the PCIe subsystem, much like how PCIe atomic operations are managed for
+instance.
+
+SWQ allows hardware to provision just a single address in the device. When
+used with ENQCMD to submit work, the device can distinguish the process
+submitting the work since it will include the PASID assigned to that
+process. This helps the device scale to a large number of processes.
+
+* Is this the same as a user space device driver?
+
+Communicating with the device via the shared workqueue is much simpler
+than a full blown user space driver. The kernel driver does all the
+initialization of the hardware. User space only needs to worry about
+submitting work and processing completions.
+
+* Is this the same as SR-IOV?
+
+Single Root I/O Virtualization (SR-IOV) focuses on providing independent
+hardware interfaces for virtualizing hardware. Hence, it's required to be
+almost fully functional interface to software supporting the traditional
+BARs, space for interrupts via MSI-X, its own register layout.
+Virtual Functions (VFs) are assisted by the Physical Function (PF)
+driver.
+
+Scalable I/O Virtualization builds on the PASID concept to create device
+instances for virtualization. SIOV requires host software to assist in
+creating virtual devices; each virtual device is represented by a PASID
+along with the bus/device/function of the device. This allows device
+hardware to optimize device resource creation and can grow dynamically on
+demand. SR-IOV creation and management is very static in nature. Consult
+references below for more details.
+
+* Why not just create a virtual function for each app?
+
+Creating PCIe SR-IOV type Virtual Functions (VF) is expensive. VFs require
+duplicated hardware for PCI config space and interrupts such as MSI-X.
+Resources such as interrupts have to be hard partitioned between VFs at
+creation time, and cannot scale dynamically on demand. The VFs are not
+completely independent from the Physical Function (PF). Most VFs require
+some communication and assistance from the PF driver. SIOV, in contrast,
+creates a software-defined device where all the configuration and control
+aspects are mediated via the slow path. The work submission and completion
+happen without any mediation.
+
+* Does this support virtualization?
+
+ENQCMD can be used from within a guest VM. In these cases, the VMM helps
+with setting up a translation table to translate from Guest PASID to Host
+PASID. Please consult the ENQCMD instruction set reference for more
+details.
+
+* Does memory need to be pinned?
+
+When devices support SVA along with platform hardware such as IOMMU
+supporting such devices, there is no need to pin memory for DMA purposes.
+Devices that support SVA also support other PCIe features that remove the
+pinning requirement for memory.
+
+Device TLB support - Device requests the IOMMU to lookup an address before
+use via Address Translation Service (ATS) requests. If the mapping exists
+but there is no page allocated by the OS, IOMMU hardware returns that no
+mapping exists.
+
+Device requests the virtual address to be mapped via Page Request
+Interface (PRI). Once the OS has successfully completed the mapping, it
+returns the response back to the device. The device requests again for
+a translation and continues.
+
+IOMMU works with the OS in managing consistency of page-tables with the
+device. When removing pages, it interacts with the device to remove any
+device TLB entry that might have been cached before removing the mappings from
+the OS.
+
+References
+==========
+
+VT-D:
+https://01.org/blogs/ashokraj/2018/recent-enhancements-intel-virtualization-technology-directed-i/o-intel-vt-d
+
+SIOV:
+https://01.org/blogs/2019/assignable-interfaces-intel-scalable-i/o-virtualization-linux
+
+ENQCMD in ISE:
+https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
+
+DSA spec:
+https://software.intel.com/sites/default/files/341204-intel-data-streaming-accelerator-spec.pdf
diff --git a/Documentation/x86/tdx.rst b/Documentation/arch/x86/tdx.rst
similarity index 100%
rename from Documentation/x86/tdx.rst
rename to Documentation/arch/x86/tdx.rst
diff --git a/Documentation/x86/tlb.rst b/Documentation/arch/x86/tlb.rst
similarity index 100%
rename from Documentation/x86/tlb.rst
rename to Documentation/arch/x86/tlb.rst
diff --git a/Documentation/x86/topology.rst b/Documentation/arch/x86/topology.rst
similarity index 100%
rename from Documentation/x86/topology.rst
rename to Documentation/arch/x86/topology.rst
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/arch/x86/tsx_async_abort.rst
similarity index 100%
rename from Documentation/x86/tsx_async_abort.rst
rename to Documentation/arch/x86/tsx_async_abort.rst
diff --git a/Documentation/x86/usb-legacy-support.rst b/Documentation/arch/x86/usb-legacy-support.rst
similarity index 100%
rename from Documentation/x86/usb-legacy-support.rst
rename to Documentation/arch/x86/usb-legacy-support.rst
diff --git a/Documentation/arch/x86/x86_64/5level-paging.rst b/Documentation/arch/x86/x86_64/5level-paging.rst
new file mode 100644
index 0000000..71f882f4
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/5level-paging.rst
@@ -0,0 +1,67 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+5-level paging
+==============
+
+Overview
+========
+Original x86-64 was limited by 4-level paging to 256 TiB of virtual address
+space and 64 TiB of physical address space. We are already bumping into
+this limit: some vendors offer servers with 64 TiB of memory today.
+
+To overcome the limitation upcoming hardware will introduce support for
+5-level paging. It is a straight-forward extension of the current page
+table structure adding one more layer of translation.
+
+It bumps the limits to 128 PiB of virtual address space and 4 PiB of
+physical address space. This "ought to be enough for anybody" ©.
+
+QEMU 2.9 and later support 5-level paging.
+
+Virtual memory layout for 5-level paging is described in
+Documentation/arch/x86/x86_64/mm.rst
+
+
+Enabling 5-level paging
+=======================
+CONFIG_X86_5LEVEL=y enables the feature.
+
+Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
+In this case additional page table level -- p4d -- will be folded at
+runtime.
+
+User-space and large virtual address space
+==========================================
+On x86, 5-level paging enables 56-bit userspace virtual address space.
+Not all user space is ready to handle wide addresses. It's known that
+at least some JIT compilers use higher bits in pointers to encode their
+information. It collides with valid pointers with 5-level paging and
+leads to crashes.
+
+To mitigate this, we are not going to allocate virtual address space
+above 47-bit by default.
+
+But userspace can ask for allocation from full address space by
+specifying hint address (with or without MAP_FIXED) above 47-bits.
+
+If hint address set above 47-bit, but MAP_FIXED is not specified, we try
+to look for unmapped area by specified address. If it's already
+occupied, we look for unmapped area in *full* address space, rather than
+from 47-bit window.
+
+A high hint address would only affect the allocation in question, but not
+any future mmap()s.
+
+Specifying high hint address on older kernel or on machine without 5-level
+paging support is safe. The hint will be ignored and kernel will fall back
+to allocation from 47-bit address space.
+
+This approach helps to easily make application's memory allocator aware
+about large address space without manually tracking allocated virtual
+address space.
+
+One important case we need to handle here is interaction with MPX.
+MPX (without MAWA extension) cannot handle addresses above 47-bit, so we
+need to make sure that MPX cannot be enabled we already have VMA above
+the boundary and forbid creating such VMAs once MPX is enabled.
diff --git a/Documentation/arch/x86/x86_64/boot-options.rst b/Documentation/arch/x86/x86_64/boot-options.rst
new file mode 100644
index 0000000..137432d
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/boot-options.rst
@@ -0,0 +1,319 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+AMD64 Specific Boot Options
+===========================
+
+There are many others (usually documented in driver documentation), but
+only the AMD64 specific ones are listed here.
+
+Machine check
+=============
+Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
+
+ mce=off
+ Disable machine check
+ mce=no_cmci
+ Disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your hardware
+ is misbehaving.
+ Note that you'll get more problems without CMCI than with
+ due to the shared banks, i.e. you might get duplicated
+ error logs.
+ mce=dont_log_ce
+ Don't make logs for corrected errors. All events reported
+ as corrected are silently cleared by OS.
+ This option will be useful if you have no interest in any
+ of corrected errors.
+ mce=ignore_ce
+ Disable features for corrected errors, e.g. polling timer
+ and CMCI. All events reported as corrected are not cleared
+ by OS and remained in its error banks.
+ Usually this disablement is not recommended, however if
+ there is an agent checking/clearing corrected errors
+ (e.g. BIOS or hardware monitoring applications), conflicting
+ with OS's error handling, and you cannot deactivate the agent,
+ then this option will be a help.
+ mce=no_lmce
+ Do not opt-in to Local MCE delivery. Use legacy method
+ to broadcast MCEs.
+ mce=bootlog
+ Enable logging of machine checks left over from booting.
+ Disabled by default on AMD Fam10h and older because some BIOS
+ leave bogus ones.
+ If your BIOS doesn't do that it's a good idea to enable though
+ to make sure you log even machine check events that result
+ in a reboot. On Intel systems it is enabled by default.
+ mce=nobootlog
+ Disable boot machine check logging.
+ mce=monarchtimeout (number)
+ monarchtimeout:
+ Sets the time in us to wait for other CPUs on machine checks. 0
+ to disable.
+ mce=bios_cmci_threshold
+ Don't overwrite the bios-set CMCI threshold. This boot option
+ prevents Linux from overwriting the CMCI threshold set by the
+ bios. Without this option, Linux always sets the CMCI
+ threshold to 1. Enabling this may make memory predictive failure
+ analysis less effective if the bios sets thresholds for memory
+ errors since we will not see details for all errors.
+ mce=recovery
+ Force-enable recoverable machine check code paths
+
+ nomce (for compatibility with i386)
+ same as mce=off
+
+ Everything else is in sysfs now.
+
+APICs
+=====
+
+ apic
+ Use IO-APIC. Default
+
+ noapic
+ Don't use the IO-APIC.
+
+ disableapic
+ Don't use the local APIC
+
+ nolapic
+ Don't use the local APIC (alias for i386 compatibility)
+
+ pirq=...
+ See Documentation/arch/x86/i386/IO-APIC.rst
+
+ noapictimer
+ Don't set up the APIC timer
+
+ no_timer_check
+ Don't check the IO-APIC timer. This can work around
+ problems with incorrect timer initialization on some boards.
+
+ apicpmtimer
+ Do APIC timer calibration using the pmtimer. Implies
+ apicmaintimer. Useful when your PIT timer is totally broken.
+
+Timing
+======
+
+ notsc
+ Deprecated, use tsc=unstable instead.
+
+ nohpet
+ Don't use the HPET timer.
+
+Idle loop
+=========
+
+ idle=poll
+ Don't do power saving in the idle loop using HLT, but poll for rescheduling
+ event. This will make the CPUs eat a lot more power, but may be useful
+ to get slightly better performance in multiprocessor benchmarks. It also
+ makes some profiling using performance counters more accurate.
+ Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
+ CPUs) this option has no performance advantage over the normal idle loop.
+ It may also interact badly with hyperthreading.
+
+Rebooting
+=========
+
+ reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] | p[ci] [, [w]arm | [c]old]
+ bios
+ Use the CPU reboot vector for warm reset
+ warm
+ Don't set the cold reboot flag
+ cold
+ Set the cold reboot flag
+ triple
+ Force a triple fault (init)
+ kbd
+ Use the keyboard controller. cold reset (default)
+ acpi
+ Use the ACPI RESET_REG in the FADT. If ACPI is not configured or
+ the ACPI reset does not work, the reboot path attempts the reset
+ using the keyboard controller.
+ efi
+ Use efi reset_system runtime service. If EFI is not configured or
+ the EFI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
+ pci
+ Use a write to the PCI config space register 0xcf9 to trigger reboot.
+
+ Using warm reset will be much faster especially on big memory
+ systems because the BIOS will not go through the memory check.
+ Disadvantage is that not all hardware will be completely reinitialized
+ on reboot so there may be boot problems on some systems.
+
+ reboot=force
+ Don't stop other CPUs on reboot. This can make reboot more reliable
+ in some cases.
+
+ reboot=default
+ There are some built-in platform specific "quirks" - you may see:
+ "reboot: <name> series board detected. Selecting <type> for reboots."
+ In the case where you think the quirk is in error (e.g. you have
+ newer BIOS, or newer board) using this option will ignore the built-in
+ quirk table, and use the generic default reboot actions.
+
+NUMA
+====
+
+ numa=off
+ Only set up a single NUMA node spanning all memory.
+
+ numa=noacpi
+ Don't parse the SRAT table for NUMA setup
+
+ numa=nohmat
+ Don't parse the HMAT table for NUMA setup, or soft-reserved memory
+ partitioning.
+
+ numa=fake=<size>[MG]
+ If given as a memory unit, fills all system RAM with nodes of
+ size interleaved over physical nodes.
+
+ numa=fake=<N>
+ If given as an integer, fills all system RAM with N fake nodes
+ interleaved over physical nodes.
+
+ numa=fake=<N>U
+ If given as an integer followed by 'U', it will divide each
+ physical node into N emulated nodes.
+
+ACPI
+====
+
+ acpi=off
+ Don't enable ACPI
+ acpi=ht
+ Use ACPI boot table parsing, but don't enable ACPI interpreter
+ acpi=force
+ Force ACPI on (currently not needed)
+ acpi=strict
+ Disable out of spec ACPI workarounds.
+ acpi_sci={edge,level,high,low}
+ Set up ACPI SCI interrupt.
+ acpi=noirq
+ Don't route interrupts
+ acpi=nocmcff
+ Disable firmware first mode for corrected errors. This
+ disables parsing the HEST CMC error source to check if
+ firmware has set the FF flag. This may result in
+ duplicate corrected error reports.
+
+PCI
+===
+
+ pci=off
+ Don't use PCI
+ pci=conf1
+ Use conf1 access.
+ pci=conf2
+ Use conf2 access.
+ pci=rom
+ Assign ROMs.
+ pci=assign-busses
+ Assign busses
+ pci=irqmask=MASK
+ Set PCI interrupt mask to MASK
+ pci=lastbus=NUMBER
+ Scan up to NUMBER busses, no matter what the mptable says.
+ pci=noacpi
+ Don't use ACPI to set up PCI interrupt routing.
+
+IOMMU (input/output memory management unit)
+===========================================
+Multiple x86-64 PCI-DMA mapping implementations exist, for example:
+
+ 1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
+ (e.g. because you have < 3 GB memory).
+ Kernel boot message: "PCI-DMA: Disabling IOMMU"
+
+ 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
+ Kernel boot message: "PCI-DMA: using GART IOMMU"
+
+ 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
+ e.g. if there is no hardware IOMMU in the system and it is need because
+ you have >3GB memory or told the kernel to us it (iommu=soft))
+ Kernel boot message: "PCI-DMA: Using software bounce buffering
+ for IO (SWIOTLB)"
+
+::
+
+ iommu=[<size>][,noagp][,off][,force][,noforce]
+ [,memaper[=<order>]][,merge][,fullflush][,nomerge]
+ [,noaperture]
+
+General iommu options:
+
+ off
+ Don't initialize and use any kind of IOMMU.
+ noforce
+ Don't force hardware IOMMU usage when it is not needed. (default).
+ force
+ Force the use of the hardware IOMMU even when it is
+ not actually needed (e.g. because < 3 GB memory).
+ soft
+ Use software bounce buffering (SWIOTLB) (default for
+ Intel machines). This can be used to prevent the usage
+ of an available hardware IOMMU.
+
+iommu options only relevant to the AMD GART hardware IOMMU:
+
+ <size>
+ Set the size of the remapping area in bytes.
+ allowed
+ Overwrite iommu off workarounds for specific chipsets.
+ fullflush
+ Flush IOMMU on each allocation (default).
+ nofullflush
+ Don't use IOMMU fullflush.
+ memaper[=<order>]
+ Allocate an own aperture over RAM with size 32MB<<order.
+ (default: order=1, i.e. 64MB)
+ merge
+ Do scatter-gather (SG) merging. Implies "force" (experimental).
+ nomerge
+ Don't do scatter-gather (SG) merging.
+ noaperture
+ Ask the IOMMU not to touch the aperture for AGP.
+ noagp
+ Don't initialize the AGP driver and use full aperture.
+ panic
+ Always panic when IOMMU overflows.
+
+iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
+implementation:
+
+ swiotlb=<slots>[,force,noforce]
+ <slots>
+ Prereserve that many 2K slots for the software IO bounce buffering.
+ force
+ Force all IO through the software TLB.
+ noforce
+ Do not initialize the software TLB.
+
+
+Miscellaneous
+=============
+
+ nogbpages
+ Do not use GB pages for kernel direct mappings.
+ gbpages
+ Use GB pages for kernel direct mappings.
+
+
+AMD SEV (Secure Encrypted Virtualization)
+=========================================
+Options relating to AMD SEV, specified via the following format:
+
+::
+
+ sev=option1[,option2]
+
+The available options are:
+
+ debug
+ Enable debug messages.
diff --git a/Documentation/x86/x86_64/cpu-hotplug-spec.rst b/Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst
similarity index 100%
rename from Documentation/x86/x86_64/cpu-hotplug-spec.rst
rename to Documentation/arch/x86/x86_64/cpu-hotplug-spec.rst
diff --git a/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
new file mode 100644
index 0000000..ba74617
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Fake NUMA For CPUSets
+=====================
+
+:Author: David Rientjes <rientjes@cs.washington.edu>
+
+Using numa=fake and CPUSets for Resource Management
+
+This document describes how the numa=fake x86_64 command-line option can be used
+in conjunction with cpusets for coarse memory management. Using this feature,
+you can create fake NUMA nodes that represent contiguous chunks of memory and
+assign them to cpusets and their attached tasks. This is a way of limiting the
+amount of system memory that are available to a certain class of tasks.
+
+For more information on the features of cpusets, see
+Documentation/admin-guide/cgroup-v1/cpusets.rst.
+There are a number of different configurations you can use for your needs. For
+more information on the numa=fake command line option and its various ways of
+configuring fake nodes, see Documentation/arch/x86/x86_64/boot-options.rst.
+
+For the purposes of this introduction, we'll assume a very primitive NUMA
+emulation setup of "numa=fake=4*512,". This will split our system memory into
+four equal chunks of 512M each that we can now use to assign to cpusets. As
+you become more familiar with using this combination for resource control,
+you'll determine a better setup to minimize the number of nodes you have to deal
+with.
+
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
+
+ Faking node 0 at 0000000000000000-0000000020000000 (512MB)
+ Faking node 1 at 0000000020000000-0000000040000000 (512MB)
+ Faking node 2 at 0000000040000000-0000000060000000 (512MB)
+ Faking node 3 at 0000000060000000-0000000080000000 (512MB)
+ ...
+ On node 0 totalpages: 130975
+ On node 1 totalpages: 131072
+ On node 2 totalpages: 131072
+ On node 3 totalpages: 131072
+
+Now following the instructions for mounting the cpusets filesystem from
+Documentation/admin-guide/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
+address spaces) to individual cpusets::
+
+ [root@xroads /]# mkdir exampleset
+ [root@xroads /]# mount -t cpuset none exampleset
+ [root@xroads /]# mkdir exampleset/ddset
+ [root@xroads /]# cd exampleset/ddset
+ [root@xroads /exampleset/ddset]# echo 0-1 > cpus
+ [root@xroads /exampleset/ddset]# echo 0-1 > mems
+
+Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
+memory allocations (1G).
+
+You can now assign tasks to these cpusets to limit the memory resources
+available to them according to the fake nodes assigned as mems::
+
+ [root@xroads /exampleset/ddset]# echo $$ > tasks
+ [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
+ [1] 13425
+
+Notice the difference between the system memory usage as reported by
+/proc/meminfo between the restricted cpuset case above and the unrestricted
+case (i.e. running the same 'dd' command without assigning it to a fake NUMA
+cpuset):
+
+ ======== ============ ==========
+ Name Unrestricted Restricted
+ ======== ============ ==========
+ MemTotal 3091900 kB 3091900 kB
+ MemFree 42113 kB 1513236 kB
+ ======== ============ ==========
+
+This allows for coarse memory management for the tasks you assign to particular
+cpusets. Since cpusets can form a hierarchy, you can create some pretty
+interesting combinations of use-cases for various classes of tasks for your
+memory management needs.
diff --git a/Documentation/x86/x86_64/fsgs.rst b/Documentation/arch/x86/x86_64/fsgs.rst
similarity index 100%
rename from Documentation/x86/x86_64/fsgs.rst
rename to Documentation/arch/x86/x86_64/fsgs.rst
diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/arch/x86/x86_64/index.rst
similarity index 100%
rename from Documentation/x86/x86_64/index.rst
rename to Documentation/arch/x86/x86_64/index.rst
diff --git a/Documentation/x86/x86_64/machinecheck.rst b/Documentation/arch/x86/x86_64/machinecheck.rst
similarity index 100%
rename from Documentation/x86/x86_64/machinecheck.rst
rename to Documentation/arch/x86/x86_64/machinecheck.rst
diff --git a/Documentation/x86/x86_64/mm.rst b/Documentation/arch/x86/x86_64/mm.rst
similarity index 100%
rename from Documentation/x86/x86_64/mm.rst
rename to Documentation/arch/x86/x86_64/mm.rst
diff --git a/Documentation/x86/x86_64/uefi.rst b/Documentation/arch/x86/x86_64/uefi.rst
similarity index 100%
rename from Documentation/x86/x86_64/uefi.rst
rename to Documentation/arch/x86/x86_64/uefi.rst
diff --git a/Documentation/arch/x86/xstate.rst b/Documentation/arch/x86/xstate.rst
new file mode 100644
index 0000000..ae5c69e
--- /dev/null
+++ b/Documentation/arch/x86/xstate.rst
@@ -0,0 +1,174 @@
+Using XSTATE features in user space applications
+================================================
+
+The x86 architecture supports floating-point extensions which are
+enumerated via CPUID. Applications consult CPUID and use XGETBV to
+evaluate which features have been enabled by the kernel XCR0.
+
+Up to AVX-512 and PKRU states, these features are automatically enabled by
+the kernel if available. Features like AMX TILE_DATA (XSTATE component 18)
+are enabled by XCR0 as well, but the first use of related instruction is
+trapped by the kernel because by default the required large XSTATE buffers
+are not allocated automatically.
+
+The purpose for dynamic features
+--------------------------------
+
+Legacy userspace libraries often have hard-coded, static sizes for
+alternate signal stacks, often using MINSIGSTKSZ which is typically 2KB.
+That stack must be able to store at *least* the signal frame that the
+kernel sets up before jumping into the signal handler. That signal frame
+must include an XSAVE buffer defined by the CPU.
+
+However, that means that the size of signal stacks is dynamic, not static,
+because different CPUs have differently-sized XSAVE buffers. A compiled-in
+size of 2KB with existing applications is too small for new CPU features
+like AMX. Instead of universally requiring larger stack, with the dynamic
+enabling, the kernel can enforce userspace applications to have
+properly-sized altstacks.
+
+Using dynamically enabled XSTATE features in user space applications
+--------------------------------------------------------------------
+
+The kernel provides an arch_prctl(2) based mechanism for applications to
+request the usage of such features. The arch_prctl(2) options related to
+this are:
+
+-ARCH_GET_XCOMP_SUPP
+
+ arch_prctl(ARCH_GET_XCOMP_SUPP, &features);
+
+ ARCH_GET_XCOMP_SUPP stores the supported features in userspace storage of
+ type uint64_t. The second argument is a pointer to that storage.
+
+-ARCH_GET_XCOMP_PERM
+
+ arch_prctl(ARCH_GET_XCOMP_PERM, &features);
+
+ ARCH_GET_XCOMP_PERM stores the features for which the userspace process
+ has permission in userspace storage of type uint64_t. The second argument
+ is a pointer to that storage.
+
+-ARCH_REQ_XCOMP_PERM
+
+ arch_prctl(ARCH_REQ_XCOMP_PERM, feature_nr);
+
+ ARCH_REQ_XCOMP_PERM allows to request permission for a dynamically enabled
+ feature or a feature set. A feature set can be mapped to a facility, e.g.
+ AMX, and can require one or more XSTATE components to be enabled.
+
+ The feature argument is the number of the highest XSTATE component which
+ is required for a facility to work.
+
+When requesting permission for a feature, the kernel checks the
+availability. The kernel ensures that sigaltstacks in the process's tasks
+are large enough to accommodate the resulting large signal frame. It
+enforces this both during ARCH_REQ_XCOMP_SUPP and during any subsequent
+sigaltstack(2) calls. If an installed sigaltstack is smaller than the
+resulting sigframe size, ARCH_REQ_XCOMP_SUPP results in -ENOSUPP. Also,
+sigaltstack(2) results in -ENOMEM if the requested altstack is too small
+for the permitted features.
+
+Permission, when granted, is valid per process. Permissions are inherited
+on fork(2) and cleared on exec(3).
+
+The first use of an instruction related to a dynamically enabled feature is
+trapped by the kernel. The trap handler checks whether the process has
+permission to use the feature. If the process has no permission then the
+kernel sends SIGILL to the application. If the process has permission then
+the handler allocates a larger xstate buffer for the task so the large
+state can be context switched. In the unlikely cases that the allocation
+fails, the kernel sends SIGSEGV.
+
+AMX TILE_DATA enabling example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Below is the example of how userspace applications enable
+TILE_DATA dynamically:
+
+ 1. The application first needs to query the kernel for AMX
+ support::
+
+ #include <asm/prctl.h>
+ #include <sys/syscall.h>
+ #include <stdio.h>
+ #include <unistd.h>
+
+ #ifndef ARCH_GET_XCOMP_SUPP
+ #define ARCH_GET_XCOMP_SUPP 0x1021
+ #endif
+
+ #ifndef ARCH_XCOMP_TILECFG
+ #define ARCH_XCOMP_TILECFG 17
+ #endif
+
+ #ifndef ARCH_XCOMP_TILEDATA
+ #define ARCH_XCOMP_TILEDATA 18
+ #endif
+
+ #define MASK_XCOMP_TILE ((1 << ARCH_XCOMP_TILECFG) | \
+ (1 << ARCH_XCOMP_TILEDATA))
+
+ unsigned long features;
+ long rc;
+
+ ...
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+
+ if (!rc && (features & MASK_XCOMP_TILE) == MASK_XCOMP_TILE)
+ printf("AMX is available.\n");
+
+ 2. After that, determining support for AMX, an application must
+ explicitly ask permission to use it::
+
+ #ifndef ARCH_REQ_XCOMP_PERM
+ #define ARCH_REQ_XCOMP_PERM 0x1023
+ #endif
+
+ ...
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, ARCH_XCOMP_TILEDATA);
+
+ if (!rc)
+ printf("AMX is ready for use.\n");
+
+Note this example does not include the sigaltstack preparation.
+
+Dynamic features in signal frames
+---------------------------------
+
+Dynamcally enabled features are not written to the signal frame upon signal
+entry if the feature is in its initial configuration. This differs from
+non-dynamic features which are always written regardless of their
+configuration. Signal handlers can examine the XSAVE buffer's XSTATE_BV
+field to determine if a features was written.
+
+Dynamic features for virtual machines
+-------------------------------------
+
+The permission for the guest state component needs to be managed separately
+from the host, as they are exclusive to each other. A coupled of options
+are extended to control the guest permission:
+
+-ARCH_GET_XCOMP_GUEST_PERM
+
+ arch_prctl(ARCH_GET_XCOMP_GUEST_PERM, &features);
+
+ ARCH_GET_XCOMP_GUEST_PERM is a variant of ARCH_GET_XCOMP_PERM. So it
+ provides the same semantics and functionality but for the guest
+ components.
+
+-ARCH_REQ_XCOMP_GUEST_PERM
+
+ arch_prctl(ARCH_REQ_XCOMP_GUEST_PERM, feature_nr);
+
+ ARCH_REQ_XCOMP_GUEST_PERM is a variant of ARCH_REQ_XCOMP_PERM. It has the
+ same semantics for the guest permission. While providing a similar
+ functionality, this comes with a constraint. Permission is frozen when the
+ first VCPU is created. Any attempt to change permission after that point
+ is going to be rejected. So, the permission has to be requested before the
+ first VCPU creation.
+
+Note that some VMMs may have already established a set of supported state
+components. These options are not presumed to support any particular VMM.
diff --git a/Documentation/x86/zero-page.rst b/Documentation/arch/x86/zero-page.rst
similarity index 100%
rename from Documentation/x86/zero-page.rst
rename to Documentation/arch/x86/zero-page.rst
diff --git a/Documentation/xtensa/atomctl.rst b/Documentation/arch/xtensa/atomctl.rst
similarity index 100%
rename from Documentation/xtensa/atomctl.rst
rename to Documentation/arch/xtensa/atomctl.rst
diff --git a/Documentation/xtensa/booting.rst b/Documentation/arch/xtensa/booting.rst
similarity index 100%
rename from Documentation/xtensa/booting.rst
rename to Documentation/arch/xtensa/booting.rst
diff --git a/Documentation/xtensa/features.rst b/Documentation/arch/xtensa/features.rst
similarity index 100%
rename from Documentation/xtensa/features.rst
rename to Documentation/arch/xtensa/features.rst
diff --git a/Documentation/xtensa/index.rst b/Documentation/arch/xtensa/index.rst
similarity index 100%
rename from Documentation/xtensa/index.rst
rename to Documentation/arch/xtensa/index.rst
diff --git a/Documentation/xtensa/mmu.rst b/Documentation/arch/xtensa/mmu.rst
similarity index 100%
rename from Documentation/xtensa/mmu.rst
rename to Documentation/arch/xtensa/mmu.rst
diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
index ae42fe8..fd43502 100644
--- a/Documentation/arm/index.rst
+++ b/Documentation/arm/index.rst
@@ -58,6 +58,7 @@
stm32/stm32f769-overview
stm32/stm32f429-overview
stm32/stm32mp13-overview
+ stm32/stm32mp151-overview
stm32/stm32mp157-overview
stm32/stm32-dma-mdma-chaining
@@ -69,11 +70,9 @@
spear/overview
- sti/stih416-overview
sti/stih407-overview
sti/stih418-overview
sti/overview
- sti/stih415-overview
vfp/release-notes
diff --git a/Documentation/arm/sti/overview.rst b/Documentation/arm/sti/overview.rst
index 7074361..ae16ace 100644
--- a/Documentation/arm/sti/overview.rst
+++ b/Documentation/arm/sti/overview.rst
@@ -7,22 +7,18 @@
The ST Microelectronics Multimedia and Application Processors range of
CortexA9 System-on-Chip are supported by the 'STi' platform of
- ARM Linux. Currently STiH415, STiH416 SOCs are supported with both
- B2000 and B2020 Reference boards.
+ ARM Linux. Currently STiH407, STiH410 and STiH418 are supported.
configuration
-------------
- A generic configuration is provided for both STiH415/416, and can be used as the
- default by::
-
- make stih41x_defconfig
+ The configuration for the STi platform is supported via the multi_v7_defconfig.
Layout
------
- All the files for multiple machine families (STiH415, STiH416, and STiG125)
+ All the files for multiple machine families (STiH407, STiH410, and STiH418)
are located in the platform code contained in arch/arm/mach-sti
There is a generic board board-dt.c in the mach folder which support
diff --git a/Documentation/arm/sti/stih415-overview.rst b/Documentation/arm/sti/stih415-overview.rst
deleted file mode 100644
index b67452d..0000000
--- a/Documentation/arm/sti/stih415-overview.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-================
-STiH415 Overview
-================
-
-Introduction
-------------
-
- The STiH415 is the next generation of HD, AVC set-top box processors
- for satellite, cable, terrestrial and IP-STB markets.
-
- Features:
-
- - ARM Cortex-A9 1.0 GHz, dual-core CPU
- - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.rst b/Documentation/arm/sti/stih416-overview.rst
deleted file mode 100644
index 93f17d7..0000000
--- a/Documentation/arm/sti/stih416-overview.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-================
-STiH416 Overview
-================
-
-Introduction
-------------
-
- The STiH416 is the next generation of HD, AVC set-top box processors
- for satellite, cable, terrestrial and IP-STB markets.
-
- Features
- - ARM Cortex-A9 1.2 GHz dual core CPU
- - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/stm32/stm32mp151-overview.rst b/Documentation/arm/stm32/stm32mp151-overview.rst
new file mode 100644
index 0000000..f42a2ac3
--- /dev/null
+++ b/Documentation/arm/stm32/stm32mp151-overview.rst
@@ -0,0 +1,36 @@
+===================
+STM32MP151 Overview
+===================
+
+Introduction
+------------
+
+The STM32MP151 is a Cortex-A MPU aimed at various applications.
+It features:
+
+- Single Cortex-A7 application core
+- Standard memories interface support
+- Standard connectivity, widely inherited from the STM32 MCU family
+- Comprehensive security support
+
+More details:
+
+- Cortex-A7 core running up to @800MHz
+- FMC controller to connect SDRAM, NOR and NAND memories
+- QSPI
+- SD/MMC/SDIO support
+- Ethernet controller
+- ADC/DAC
+- USB EHCI/OHCI controllers
+- USB OTG
+- I2C, SPI busses support
+- Several general purpose timers
+- Serial Audio interface
+- LCD-TFT controller
+- DCMIPP
+- SPDIFRX
+- DFSDM
+
+:Authors:
+
+- Roan van Dijk <roan@protonic.nl>
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ec5f889..9e311bc4 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -172,6 +172,8 @@
+----------------+-----------------+-----------------+-----------------------------+
| NVIDIA | Carmel Core | N/A | NVIDIA_CARMEL_CNP_ERRATUM |
+----------------+-----------------+-----------------+-----------------------------+
+| NVIDIA | T241 GICv3/4.x | T241-FABRIC-4 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
+----------------+-----------------+-----------------+-----------------------------+
@@ -205,6 +207,9 @@
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 |
+----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index 1029531..9fea696 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -18,7 +18,6 @@
kyber-iosched
null_blk
pr
- request
stat
switching-sched
writeback_cache_control
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
index f9bf18e..90b7334 100644
--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@@ -270,8 +270,7 @@
encryption need to create their own blk_crypto_profile for their request_queue,
and expose whatever functionality they choose. When a layered device wants to
pass a clone of that request to another request_queue, blk-crypto will
-initialize and prepare the clone as necessary; see
-``blk_crypto_insert_cloned_request()``.
+initialize and prepare the clone as necessary.
Interaction between inline encryption and blk integrity
=======================================================
diff --git a/Documentation/block/request.rst b/Documentation/block/request.rst
deleted file mode 100644
index 747021e..0000000
--- a/Documentation/block/request.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-============================
-struct request documentation
-============================
-
-Jens Axboe <jens.axboe@oracle.com> 27/05/02
-
-
-.. FIXME:
- No idea about what does mean - seems just some noise, so comment it
-
- 1.0
- Index
-
- 2.0 Struct request members classification
-
- 2.1 struct request members explanation
-
- 3.0
-
-
- 2.0
-
-
-
-Short explanation of request members
-====================================
-
-Classification flags:
-
- = ====================
- D driver member
- B block layer member
- I I/O scheduler member
- = ====================
-
-Unless an entry contains a D classification, a device driver must not access
-this member. Some members may contain D classifications, but should only be
-access through certain macros or functions (eg ->flags).
-
-<linux/blkdev.h>
-
-=============================== ======= =======================================
-Member Flag Comment
-=============================== ======= =======================================
-struct list_head queuelist BI Organization on various internal
- queues
-
-``void *elevator_private`` I I/O scheduler private data
-
-unsigned char cmd[16] D Driver can use this for setting up
- a cdb before execution, see
- blk_queue_prep_rq
-
-unsigned long flags DBI Contains info about data direction,
- request type, etc.
-
-int rq_status D Request status bits
-
-kdev_t rq_dev DBI Target device
-
-int errors DB Error counts
-
-sector_t sector DBI Target location
-
-unsigned long hard_nr_sectors B Used to keep sector sane
-
-unsigned long nr_sectors DBI Total number of sectors in request
-
-unsigned long hard_nr_sectors B Used to keep nr_sectors sane
-
-unsigned short nr_phys_segments DB Number of physical scatter gather
- segments in a request
-
-unsigned short nr_hw_segments DB Number of hardware scatter gather
- segments in a request
-
-unsigned int current_nr_sectors DB Number of sectors in first segment
- of request
-
-unsigned int hard_cur_sectors B Used to keep current_nr_sectors sane
-
-int tag DB TCQ tag, if assigned
-
-``void *special`` D Free to be used by driver
-
-``char *buffer`` D Map of first segment, also see
- section on bouncing SECTION
-
-``struct completion *waiting`` D Can be used by driver to get signalled
- on request completion
-
-``struct bio *bio`` DBI First bio in request
-
-``struct bio *biotail`` DBI Last bio in request
-
-``struct request_queue *q`` DB Request queue this request belongs to
-
-``struct request_list *rl`` B Request list this request came from
-=============================== ======= =======================================
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index bfff0e7..38372a9 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -314,7 +314,7 @@
Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
values (when using BTF support for BPF maps). This allows to use helpers for
such objects on these fields inside map values. Users are also allowed to embed
-pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the
kernel preserve backwards compatibility for these features?
A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
@@ -324,7 +324,7 @@
the kernel will preserve backwards compatibility, as they are part of UAPI.
For kptrs, they are also part of UAPI, but only with respect to the kptr
-mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged
pointer in your struct are NOT part of the UAPI contract. The supported types can
and will change across kernel releases. However, operations like accessing kptr
fields and bpf_kptr_xchg() helper will continue to be supported across kernel
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index b421d94..609b71f 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,8 @@
net-next are both run by David S. Miller. From there, they will go
into the kernel mainline tree run by Linus Torvalds. To read up on the
process of net and net-next being merged into the mainline tree, see
-the :ref:`netdev-FAQ`
+the documentation on netdev subsystem at
+Documentation/process/maintainer-netdev.rst.
@@ -147,7 +148,8 @@
Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
---------------------------------------------------------------------------------
-A: The process is the very same as described in the :ref:`netdev-FAQ`,
+A: The process is the very same as described in the netdev subsystem
+documentation at Documentation/process/maintainer-netdev.rst,
so please read up on it. The subject line must indicate whether the
patch is a fix or rather "next-like" content in order to let the
maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,8 +208,9 @@
Once the BPF pull request was accepted by David S. Miller, then
the patches end up in net or net-next tree, respectively, and
make their way from there further into mainline. Again, see the
-:ref:`netdev-FAQ` for additional information e.g. on how often they are
-merged to mainline.
+documentation for netdev subsystem at
+Documentation/process/maintainer-netdev.rst for additional information
+e.g. on how often they are merged to mainline.
Q: How long do I need to wait for feedback on my BPF patches?
-------------------------------------------------------------
@@ -230,7 +233,8 @@
-----------------------------------------------------------------
A: For the time when the merge window is open, bpf-next will not be
processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the :ref:`netdev-FAQ` about further details.
+so feel free to read up on the netdev docs at
+Documentation/process/maintainer-netdev.rst about further details.
During those two weeks of merge window, we might ask you to resend
your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +398,8 @@
netdev@vger.kernel.org
The process in general is the same as on netdev itself, see also the
-:ref:`netdev-FAQ`.
+the documentation on networking subsystem at
+Documentation/process/maintainer-netdev.rst.
Q: Do you also backport to kernels not currently maintained as stable?
----------------------------------------------------------------------
@@ -410,7 +415,7 @@
What should I do?
A: The same rules apply as with netdev patch submissions in general, see
-the :ref:`netdev-FAQ`.
+the netdev docs at Documentation/process/maintainer-netdev.rst.
Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
ask the BPF maintainers to queue the patches instead. This can be done
@@ -684,7 +689,6 @@
.. Links
-.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
.. _selftests:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst
index 528fedd..2c872a1 100644
--- a/Documentation/bpf/clang-notes.rst
+++ b/Documentation/bpf/clang-notes.rst
@@ -20,6 +20,12 @@
For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with
``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included.
+Jump instructions
+=================
+
+If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d)
+instruction, which is not supported by the Linux kernel verifier.
+
Atomic operations
=================
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 24bef9c..41efd88 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -51,7 +51,7 @@
.. code-block:: c
struct cpumask_map_value {
- struct bpf_cpumask __kptr_ref * cpumask;
+ struct bpf_cpumask __kptr * cpumask;
};
struct array_map {
@@ -117,18 +117,13 @@
As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
a map, the reference can be removed from the map with bpf_kptr_xchg(), or
-opportunistically acquired with bpf_cpumask_kptr_get():
-
-.. kernel-doc:: kernel/bpf/cpumask.c
- :identifiers: bpf_cpumask_kptr_get
-
-Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
+opportunistically acquired using RCU:
.. code-block:: c
/* struct containing the struct bpf_cpumask kptr which is stored in the map. */
struct cpumasks_kfunc_map_value {
- struct bpf_cpumask __kptr_ref * bpf_cpumask;
+ struct bpf_cpumask __kptr * bpf_cpumask;
};
/* The map containing struct cpumasks_kfunc_map_value entries. */
@@ -144,7 +139,7 @@
/**
* A simple example tracepoint program showing how a
* struct bpf_cpumask * kptr that is stored in a map can
- * be acquired using the bpf_cpumask_kptr_get() kfunc.
+ * be passed to kfuncs using RCU protection.
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
@@ -158,26 +153,21 @@
if (!v)
return -ENOENT;
+ bpf_rcu_read_lock();
/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
- kptr = bpf_cpumask_kptr_get(&v->cpumask);
- if (!kptr)
+ kptr = v->cpumask;
+ if (!kptr) {
/* If no bpf_cpumask was present in the map, it's because
* we're racing with another CPU that removed it with
* bpf_kptr_xchg() between the bpf_map_lookup_elem()
- * above, and our call to bpf_cpumask_kptr_get().
- * bpf_cpumask_kptr_get() internally safely handles this
- * race, and will return NULL if the cpumask is no longer
- * present in the map by the time we invoke the kfunc.
+ * above, and our load of the pointer from the map.
*/
+ bpf_rcu_read_unlock();
return -EBUSY;
+ }
- /* Free the reference we just took above. Note that the
- * original struct bpf_cpumask * kptr is still in the map. It will
- * be freed either at a later time if another context deletes
- * it from the map, or automatically by the BPF subsystem if
- * it's still present when the map is destroyed.
- */
- bpf_cpumask_release(kptr);
+ bpf_cpumask_setall(kptr);
+ bpf_rcu_read_unlock();
return 0;
}
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index af515de..492980e 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -11,7 +11,8 @@
=========================
For brevity, this document uses the type notion "u64", "u32", etc.
-to mean an unsigned integer whose width is the specified number of bits.
+to mean an unsigned integer whose width is the specified number of bits,
+and "s32", etc. to mean a signed integer of the specified number of bits.
Registers and calling convention
================================
@@ -38,14 +39,11 @@
* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
constant) value after the basic instruction for a total of 128 bits.
-The basic instruction encoding is as follows, where MSB and LSB mean the most significant
-bits and least significant bits, respectively:
+The fields conforming an encoded basic instruction are stored in the
+following order::
-============= ======= ======= ======= ============
-32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
-============= ======= ======= ======= ============
-imm offset src_reg dst_reg opcode
-============= ======= ======= ======= ============
+ opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
+ opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
**imm**
signed integer immediate value
@@ -63,6 +61,18 @@
**opcode**
operation to perform
+Note that the contents of multi-byte fields ('imm' and 'offset') are
+stored using big-endian byte ordering in big-endian BPF and
+little-endian byte ordering in little-endian BPF.
+
+For example::
+
+ opcode offset imm assembly
+ src_reg dst_reg
+ 07 0 1 00 00 44 33 22 11 r1 += 0x11223344 // little
+ dst_reg src_reg
+ 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big
+
Note that most instructions do not use all of the fields.
Unused fields shall be cleared to zero.
@@ -72,18 +82,23 @@
using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
and imm containing the high 32 bits of the immediate value.
-================= ==================
-64 bits (MSB) 64 bits (LSB)
-================= ==================
-basic instruction pseudo instruction
-================= ==================
+This is depicted in the following figure::
+
+ basic_instruction
+ .-----------------------------.
+ | |
+ code:8 regs:8 offset:16 imm:32 unused:32 imm:32
+ | |
+ '--------------'
+ pseudo instruction
Thus the 64-bit immediate value is constructed as follows:
imm64 = (next_imm << 32) | imm
where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction.
+following the basic instruction. The unused bytes in the pseudo
+instruction are reserved and shall be cleared to zero.
Instruction classes
-------------------
@@ -228,28 +243,58 @@
otherwise identical operations.
The 'code' field encodes the operation as below:
-======== ===== ========================= ============
-code value description notes
-======== ===== ========================= ============
-BPF_JA 0x00 PC += off BPF_JMP only
-BPF_JEQ 0x10 PC += off if dst == src
-BPF_JGT 0x20 PC += off if dst > src unsigned
-BPF_JGE 0x30 PC += off if dst >= src unsigned
-BPF_JSET 0x40 PC += off if dst & src
-BPF_JNE 0x50 PC += off if dst != src
-BPF_JSGT 0x60 PC += off if dst > src signed
-BPF_JSGE 0x70 PC += off if dst >= src signed
-BPF_CALL 0x80 function call
-BPF_EXIT 0x90 function / program return BPF_JMP only
-BPF_JLT 0xa0 PC += off if dst < src unsigned
-BPF_JLE 0xb0 PC += off if dst <= src unsigned
-BPF_JSLT 0xc0 PC += off if dst < src signed
-BPF_JSLE 0xd0 PC += off if dst <= src signed
-======== ===== ========================= ============
+======== ===== === =========================================== =========================================
+code value src description notes
+======== ===== === =========================================== =========================================
+BPF_JA 0x0 0x0 PC += offset BPF_JMP only
+BPF_JEQ 0x1 any PC += offset if dst == src
+BPF_JGT 0x2 any PC += offset if dst > src unsigned
+BPF_JGE 0x3 any PC += offset if dst >= src unsigned
+BPF_JSET 0x4 any PC += offset if dst & src
+BPF_JNE 0x5 any PC += offset if dst != src
+BPF_JSGT 0x6 any PC += offset if dst > src signed
+BPF_JSGE 0x7 any PC += offset if dst >= src signed
+BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
+BPF_CALL 0x8 0x1 call PC += offset see `Program-local functions`_
+BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
+BPF_EXIT 0x9 0x0 return BPF_JMP only
+BPF_JLT 0xa any PC += offset if dst < src unsigned
+BPF_JLE 0xb any PC += offset if dst <= src unsigned
+BPF_JSLT 0xc any PC += offset if dst < src signed
+BPF_JSLE 0xd any PC += offset if dst <= src signed
+======== ===== === =========================================== =========================================
The eBPF program needs to store the return value into register R0 before doing a
-BPF_EXIT.
+``BPF_EXIT``.
+Example:
+
+``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means::
+
+ if (s32)dst s>= (s32)src goto +offset
+
+where 's>=' indicates a signed '>=' comparison.
+
+Helper functions
+~~~~~~~~~~~~~~~~
+
+Helper functions are a concept whereby BPF programs can call into a
+set of function calls exposed by the underlying platform.
+
+Historically, each helper function was identified by an address
+encoded in the imm field. The available helper functions may differ
+for each program type, but address values are unique across all program types.
+
+Platforms that support the BPF Type Format (BTF) support identifying
+a helper function by a BTF ID encoded in the imm field, where the BTF ID
+identifies the helper name and type.
+
+Program-local functions
+~~~~~~~~~~~~~~~~~~~~~~~
+Program-local functions are functions exposed by the same BPF program as the
+caller, and are referenced by offset from the call instruction, similar to
+``BPF_JA``. A ``BPF_EXIT`` within the program-local function will return to
+the caller.
Load and store instructions
===========================
@@ -371,14 +416,56 @@
-----------------------------
Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction
-encoding for an extra imm64 value.
+encoding defined in `Instruction encoding`_, and use the 'src' field of the
+basic instruction to hold an opcode subtype.
-There is currently only one such instruction.
+The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions
+with opcode subtypes in the 'src' field, using new terms such as "map"
+defined further below:
-``BPF_LD | BPF_DW | BPF_IMM`` means::
+========================= ====== === ========================================= =========== ==============
+opcode construction opcode src pseudocode imm type dst type
+========================= ====== === ========================================= =========== ==============
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer
+========================= ====== === ========================================= =========== ==============
- dst = imm64
+where
+* map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map (see `Maps`_)
+* map_by_idx(imm) means to convert a 32-bit index into an address of a map
+* map_val(map) gets the address of the first value in a given map
+* var_addr(imm) gets the address of a platform variable (see `Platform Variables`_) with a given id
+* code_addr(imm) gets the address of the instruction at a specified relative offset in number of (64-bit) instructions
+* the 'imm type' can be used by disassemblers for display
+* the 'dst type' can be used for verification and JIT compilation purposes
+
+Maps
+~~~~
+
+Maps are shared memory regions accessible by eBPF programs on some platforms.
+A map can have various semantics as defined in a separate document, and may or
+may not have a single contiguous memory region, but the 'map_val(map)' is
+currently only defined for maps that do have a single contiguous memory region.
+
+Each map can have a file descriptor (fd) if supported by the platform, where
+'map_by_fd(imm)' means to get the map with the specified file descriptor. Each
+BPF program can also be defined to use a set of maps associated with the
+program at load time, and 'map_by_idx(imm)' means to get the map with the given
+index in the set associated with the BPF program containing the instruction.
+
+Platform Variables
+~~~~~~~~~~~~~~~~~~
+
+Platform variables are memory regions, identified by integer ids, exposed by
+the runtime and accessible by BPF programs on some platforms. The
+'var_addr(imm)' operation means to get the address of the memory region
+identified by the given id.
Legacy BPF Packet access instructions
-------------------------------------
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ca96ef3..ea2516374 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,6 +100,23 @@
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
+2.2.2 __uninit Annotation
+-------------------------
+
+This annotation is used to indicate that the argument will be treated as
+uninitialized.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
+ {
+ ...
+ }
+
+Here, the dynptr will be treated as an uninitialized dynptr. Without this
+annotation, the verifier will reject the program if the dynptr passed in is
+not initialized.
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -162,20 +179,12 @@
---------------------
The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
-passed in to it. There can be only one referenced pointer that can be passed in.
-All copies of the pointer being released are invalidated as a result of invoking
-kfunc with this flag.
+passed in to it. There can be only one referenced pointer that can be passed
+in. All copies of the pointer being released are invalidated as a result of
+invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
+protection afforded by the KF_TRUSTED_ARGS flag described below.
-2.4.4 KF_KPTR_GET flag
-----------------------
-
-The KF_KPTR_GET flag is used to indicate that the kfunc takes the first argument
-as a pointer to kptr, safely increments the refcount of the object it points to,
-and returns a reference to the user. The rest of the arguments may be normal
-arguments of a kfunc. The KF_KPTR_GET flag should be used in conjunction with
-KF_ACQUIRE and KF_RET_NULL flags.
-
-2.4.5 KF_TRUSTED_ARGS flag
+2.4.4 KF_TRUSTED_ARGS flag
--------------------------
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
@@ -187,7 +196,7 @@
There are two types of pointers to kernel objects which are considered "valid":
1. Pointers which are passed as tracepoint or struct_ops callback arguments.
-2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc.
+2. Pointers which were returned from a KF_ACQUIRE kfunc.
Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
@@ -214,13 +223,13 @@
2. Specify the type and name of the trusted nested field. This field must match
the field in the original type definition exactly.
-2.4.6 KF_SLEEPABLE flag
+2.4.5 KF_SLEEPABLE flag
-----------------------
The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only
be called by sleepable BPF programs (BPF_F_SLEEPABLE).
-2.4.7 KF_DESTRUCTIVE flag
+2.4.6 KF_DESTRUCTIVE flag
--------------------------
The KF_DESTRUCTIVE flag is used to indicate functions calling which is
@@ -229,18 +238,20 @@
calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
added later.
-2.4.8 KF_RCU flag
+2.4.7 KF_RCU flag
-----------------
-The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
-When used together with KF_ACQUIRE, it indicates the kfunc should have a
-single argument which must be a trusted argument or a MEM_RCU pointer.
-The argument may have reference count of 0 and the kfunc must take this
-into consideration.
+The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
+KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
+that the objects are valid and there is no use-after-free. The pointers are not
+NULL, but the object's refcount could have reached zero. The kfuncs need to
+consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
+pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
+also be KF_RET_NULL.
.. _KF_deprecated_flag:
-2.4.9 KF_DEPRECATED flag
+2.4.8 KF_DEPRECATED flag
------------------------
The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
@@ -451,13 +462,50 @@
struct task_struct *acquired;
acquired = bpf_task_acquire(task);
+ if (acquired)
+ /*
+ * In a typical program you'd do something like store
+ * the task in a map, and the map will automatically
+ * release it later. Here, we release it manually.
+ */
+ bpf_task_release(acquired);
+ return 0;
+ }
- /*
- * In a typical program you'd do something like store
- * the task in a map, and the map will automatically
- * release it later. Here, we release it manually.
- */
- bpf_task_release(acquired);
+
+References acquired on ``struct task_struct *`` objects are RCU protected.
+Therefore, when in an RCU read region, you can obtain a pointer to a task
+embedded in a map value without having to acquire a reference:
+
+.. code-block:: c
+
+ #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+ private(TASK) static struct task_struct *global;
+
+ /**
+ * A trivial example showing how to access a task stored
+ * in a map using RCU.
+ */
+ SEC("tp_btf/task_newtask")
+ int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags)
+ {
+ struct task_struct *local_copy;
+
+ bpf_rcu_read_lock();
+ local_copy = global;
+ if (local_copy)
+ /*
+ * We could also pass local_copy to kfuncs or helper functions here,
+ * as we're guaranteed that local_copy will be valid until we exit
+ * the RCU read region below.
+ */
+ bpf_printk("Global task %s is valid", local_copy->comm);
+ else
+ bpf_printk("No global task found");
+ bpf_rcu_read_unlock();
+
+ /* At this point we can no longer reference local_copy. */
+
return 0;
}
@@ -515,81 +563,17 @@
----
-You may also acquire a reference to a ``struct cgroup`` kptr that's already
-stored in a map using bpf_cgroup_kptr_get():
-
-.. kernel-doc:: kernel/bpf/helpers.c
- :identifiers: bpf_cgroup_kptr_get
-
-Here's an example of how it can be used:
-
-.. code-block:: c
-
- /* struct containing the struct task_struct kptr which is actually stored in the map. */
- struct __cgroups_kfunc_map_value {
- struct cgroup __kptr_ref * cgroup;
- };
-
- /* The map containing struct __cgroups_kfunc_map_value entries. */
- struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, int);
- __type(value, struct __cgroups_kfunc_map_value);
- __uint(max_entries, 1);
- } __cgroups_kfunc_map SEC(".maps");
-
- /* ... */
-
- /**
- * A simple example tracepoint program showing how a
- * struct cgroup kptr that is stored in a map can
- * be acquired using the bpf_cgroup_kptr_get() kfunc.
- */
- SEC("tp_btf/cgroup_mkdir")
- int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
- {
- struct cgroup *kptr;
- struct __cgroups_kfunc_map_value *v;
- s32 id = cgrp->self.id;
-
- /* Assume a cgroup kptr was previously stored in the map. */
- v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
- if (!v)
- return -ENOENT;
-
- /* Acquire a reference to the cgroup kptr that's already stored in the map. */
- kptr = bpf_cgroup_kptr_get(&v->cgroup);
- if (!kptr)
- /* If no cgroup was present in the map, it's because
- * we're racing with another CPU that removed it with
- * bpf_kptr_xchg() between the bpf_map_lookup_elem()
- * above, and our call to bpf_cgroup_kptr_get().
- * bpf_cgroup_kptr_get() internally safely handles this
- * race, and will return NULL if the task is no longer
- * present in the map by the time we invoke the kfunc.
- */
- return -EBUSY;
-
- /* Free the reference we just took above. Note that the
- * original struct cgroup kptr is still in the map. It will
- * be freed either at a later time if another context deletes
- * it from the map, or automatically by the BPF subsystem if
- * it's still present when the map is destroyed.
- */
- bpf_cgroup_release(kptr);
-
- return 0;
- }
-
-----
-
-Another kfunc available for interacting with ``struct cgroup *`` objects is
-bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
-and return it as a cgroup kptr.
+Other kfuncs available for interacting with ``struct cgroup *`` objects are
+bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
+the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
+return a cgroup kptr.
.. kernel-doc:: kernel/bpf/helpers.c
:identifiers: bpf_cgroup_ancestor
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_from_id
+
Eventually, BPF should be updated to allow this to happen with a normal memory
load in the program itself. This is currently not possible without more work in
the verifier. bpf_cgroup_ancestor() can be used as follows:
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
index f9b3b25..7545a20 100644
--- a/Documentation/bpf/libbpf/index.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -2,23 +2,32 @@
.. _libbpf:
+======
libbpf
======
+If you are looking to develop BPF applications using the libbpf library, this
+directory contains important documentation that you should read.
+
+To get started, it is recommended to begin with the :doc:`libbpf Overview
+<libbpf_overview>` document, which provides a high-level understanding of the
+libbpf APIs and their usage. This will give you a solid foundation to start
+exploring and utilizing the various features of libbpf to develop your BPF
+applications.
+
.. toctree::
:maxdepth: 1
+ libbpf_overview
API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
program_types
libbpf_naming_convention
libbpf_build
-This is documentation for libbpf, a userspace library for loading and
-interacting with bpf programs.
-All general BPF questions, including kernel functionality, libbpf APIs and
-their application, should be sent to bpf@vger.kernel.org mailing list.
-You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
-mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
-Please search the archive before asking new questions. It very well might
-be that this was already addressed or answered before.
+All general BPF questions, including kernel functionality, libbpf APIs and their
+application, should be sent to bpf@vger.kernel.org mailing list. You can
+`subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the mailing list
+search its `archive <https://lore.kernel.org/bpf/>`_. Please search the archive
+before asking new questions. It may be that this was already addressed or
+answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst
new file mode 100644
index 0000000..f36a2d4
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_overview.rst
@@ -0,0 +1,228 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+libbpf Overview
+===============
+
+libbpf is a C-based library containing a BPF loader that takes compiled BPF
+object files and prepares and loads them into the Linux kernel. libbpf takes the
+heavy lifting of loading, verifying, and attaching BPF programs to various
+kernel hooks, allowing BPF application developers to focus only on BPF program
+correctness and performance.
+
+The following are the high-level features supported by libbpf:
+
+* Provides high-level and low-level APIs for user space programs to interact
+ with BPF programs. The low-level APIs wrap all the bpf system call
+ functionality, which is useful when users need more fine-grained control
+ over the interactions between user space and BPF programs.
+* Provides overall support for the BPF object skeleton generated by bpftool.
+ The skeleton file simplifies the process for the user space programs to access
+ global variables and work with BPF programs.
+* Provides BPF-side APIS, including BPF helper definitions, BPF maps support,
+ and tracing helpers, allowing developers to simplify BPF code writing.
+* Supports BPF CO-RE mechanism, enabling BPF developers to write portable
+ BPF programs that can be compiled once and run across different kernel
+ versions.
+
+This document will delve into the above concepts in detail, providing a deeper
+understanding of the capabilities and advantages of libbpf and how it can help
+you develop BPF applications efficiently.
+
+BPF App Lifecycle and libbpf APIs
+==================================
+
+A BPF application consists of one or more BPF programs (either cooperating or
+completely independent), BPF maps, and global variables. The global
+variables are shared between all BPF programs, which allows them to cooperate on
+a common set of data. libbpf provides APIs that user space programs can use to
+manipulate the BPF programs by triggering different phases of a BPF application
+lifecycle.
+
+The following section provides a brief overview of each phase in the BPF life
+cycle:
+
+* **Open phase**: In this phase, libbpf parses the BPF
+ object file and discovers BPF maps, BPF programs, and global variables. After
+ a BPF app is opened, user space apps can make additional adjustments
+ (setting BPF program types, if necessary; pre-setting initial values for
+ global variables, etc.) before all the entities are created and loaded.
+
+* **Load phase**: In the load phase, libbpf creates BPF
+ maps, resolves various relocations, and verifies and loads BPF programs into
+ the kernel. At this point, libbpf validates all the parts of a BPF application
+ and loads the BPF program into the kernel, but no BPF program has yet been
+ executed. After the load phase, it’s possible to set up the initial BPF map
+ state without racing with the BPF program code execution.
+
+* **Attachment phase**: In this phase, libbpf
+ attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes,
+ cgroup hooks, network packet processing pipeline, etc.). During this
+ phase, BPF programs perform useful work such as processing
+ packets, or updating BPF maps and global variables that can be read from user
+ space.
+
+* **Tear down phase**: In the tear down phase,
+ libbpf detaches BPF programs and unloads them from the kernel. BPF maps are
+ destroyed, and all the resources used by the BPF app are freed.
+
+BPF Object Skeleton File
+========================
+
+BPF skeleton is an alternative interface to libbpf APIs for working with BPF
+objects. Skeleton code abstract away generic libbpf APIs to significantly
+simplify code for manipulating BPF programs from user space. Skeleton code
+includes a bytecode representation of the BPF object file, simplifying the
+process of distributing your BPF code. With BPF bytecode embedded, there are no
+extra files to deploy along with your application binary.
+
+You can generate the skeleton header file ``(.skel.h)`` for a specific object
+file by passing the BPF object to the bpftool. The generated BPF skeleton
+provides the following custom functions that correspond to the BPF lifecycle,
+each of them prefixed with the specific object name:
+
+* ``<name>__open()`` – creates and opens BPF application (``<name>`` stands for
+ the specific bpf object name)
+* ``<name>__load()`` – instantiates, loads,and verifies BPF application parts
+* ``<name>__attach()`` – attaches all auto-attachable BPF programs (it’s
+ optional, you can have more control by using libbpf APIs directly)
+* ``<name>__destroy()`` – detaches all BPF programs and
+ frees up all used resources
+
+Using the skeleton code is the recommended way to work with bpf programs. Keep
+in mind, BPF skeleton provides access to the underlying BPF object, so whatever
+was possible to do with generic libbpf APIs is still possible even when the BPF
+skeleton is used. It's an additive convenience feature, with no syscalls, and no
+cumbersome code.
+
+Other Advantages of Using Skeleton File
+---------------------------------------
+
+* BPF skeleton provides an interface for user space programs to work with BPF
+ global variables. The skeleton code memory maps global variables as a struct
+ into user space. The struct interface allows user space programs to initialize
+ BPF programs before the BPF load phase and fetch and update data from user
+ space afterward.
+
+* The ``skel.h`` file reflects the object file structure by listing out the
+ available maps, programs, etc. BPF skeleton provides direct access to all the
+ BPF maps and BPF programs as struct fields. This eliminates the need for
+ string-based lookups with ``bpf_object_find_map_by_name()`` and
+ ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source
+ code and user-space code getting out of sync.
+
+* The embedded bytecode representation of the object file ensures that the
+ skeleton and the BPF object file are always in sync.
+
+BPF Helpers
+===========
+
+libbpf provides BPF-side APIs that BPF programs can use to interact with the
+system. The BPF helpers definition allows developers to use them in BPF code as
+any other plain C function. For example, there are helper functions to print
+debugging messages, get the time since the system was booted, interact with BPF
+maps, manipulate network packets, etc.
+
+For a complete description of what the helpers do, the arguments they take, and
+the return value, see the `bpf-helpers
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man page.
+
+BPF CO-RE (Compile Once – Run Everywhere)
+=========================================
+
+BPF programs work in the kernel space and have access to kernel memory and data
+structures. One limitation that BPF applications come across is the lack of
+portability across different kernel versions and configurations. `BCC
+<https://github.com/iovisor/bcc/>`_ is one of the solutions for BPF
+portability. However, it comes with runtime overhead and a large binary size
+from embedding the compiler with the application.
+
+libbpf steps up the BPF program portability by supporting the BPF CO-RE concept.
+BPF CO-RE brings together BTF type information, libbpf, and the compiler to
+produce a single executable binary that you can run on multiple kernel versions
+and configurations.
+
+To make BPF programs portable libbpf relies on the BTF type information of the
+running kernel. Kernel also exposes this self-describing authoritative BTF
+information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``.
+
+You can generate the BTF information for the running kernel with the following
+command:
+
+::
+
+ $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+
+The command generates a ``vmlinux.h`` header file with all kernel types
+(:doc:`BTF types <../btf>`) that the running kernel uses. Including
+``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel
+headers.
+
+libbpf enables portability of BPF programs by looking at the BPF program’s
+recorded BTF type and relocation information and matching them to BTF
+information (vmlinux) provided by the running kernel. libbpf then resolves and
+matches all the types and fields, and updates necessary offsets and other
+relocatable data to ensure that BPF program’s logic functions correctly for a
+specific kernel on the host. BPF CO-RE concept thus eliminates overhead
+associated with BPF development and allows developers to write portable BPF
+applications without modifications and runtime source code compilation on the
+target machine.
+
+The following code snippet shows how to read the parent field of a kernel
+``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a
+CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read
+``sz`` bytes from the field referenced by ``src`` into the memory pointed to by
+``dst``.
+
+.. code-block:: C
+ :emphasize-lines: 6
+
+ //...
+ struct task_struct *task = (void *)bpf_get_current_task();
+ struct task_struct *parent_task;
+ int err;
+
+ err = bpf_core_read(&parent_task, sizeof(void *), &task->parent);
+ if (err) {
+ /* handle error */
+ }
+
+ /* parent_task contains the value of task->parent pointer */
+
+In the code snippet, we first get a pointer to the current ``task_struct`` using
+``bpf_get_current_task()``. We then use ``bpf_core_read()`` to read the parent
+field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is
+just like ``bpf_probe_read_kernel()`` BPF helper, except it records information
+about the field that should be relocated on the target kernel. i.e, if the
+``parent`` field gets shifted to a different offset within
+``struct task_struct`` due to some new field added in front of it, libbpf will
+automatically adjust the actual offset to the proper value.
+
+Getting Started with libbpf
+===========================
+
+Check out the `libbpf-bootstrap <https://github.com/libbpf/libbpf-bootstrap>`_
+repository with simple examples of using libbpf to build various BPF
+applications.
+
+See also `libbpf API documentation
+<https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+libbpf and Rust
+===============
+
+If you are building BPF applications in Rust, it is recommended to use the
+`Libbpf-rs <https://github.com/libbpf/libbpf-rs>`_ library instead of bindgen
+bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in
+Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code
+compilation and skeleton generation. Using Libbpf-rs will make building user
+space part of the BPF application easier. Note that the BPF program themselves
+must still be written in plain C.
+
+Additional Documentation
+========================
+
+* `Program types and ELF Sections <https://libbpf.readthedocs.io/en/latest/program_types.html>`_
+* `API naming convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html>`_
+* `Building libbpf <https://libbpf.readthedocs.io/en/latest/libbpf_build.html>`_
+* `API documentation Convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html#api-documentation-convention>`_
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst
index 956b0c8..508d009 100644
--- a/Documentation/bpf/linux-notes.rst
+++ b/Documentation/bpf/linux-notes.rst
@@ -12,6 +12,36 @@
``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively.
+Jump instructions
+=================
+
+``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function
+integer would be read from a specified register, is not currently supported
+by the verifier. Any programs with this instruction will fail to load
+until such support is added.
+
+Maps
+====
+
+Linux only supports the 'map_val(map)' operation on array maps with a single element.
+
+Linux uses an fd_array to store maps associated with a BPF program. Thus,
+map_by_idx(imm) uses the fd at that index in the array.
+
+Variables
+=========
+
+The following 64-bit immediate instruction specifies that a variable address,
+which corresponds to some integer stored in the 'imm' field, should be loaded:
+
+========================= ====== === ========================================= =========== ==============
+opcode construction opcode src pseudocode imm type dst type
+========================= ====== === ========================================= =========== ==============
+BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
+========================= ====== === ========================================= =========== ==============
+
+On Linux, this integer is a BTF ID.
+
Legacy BPF Packet access instructions
=====================================
diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
index 4906ff0..6f069f3 100644
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@@ -11,9 +11,9 @@
`man-pages`_ for `bpf-helpers(7)`_.
BPF maps are accessed from user space via the ``bpf`` syscall, which provides
-commands to create maps, lookup elements, update elements and delete
-elements. More details of the BPF syscall are available in
-:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.
+commands to create maps, lookup elements, update elements and delete elements.
+More details of the BPF syscall are available in `ebpf-syscall`_ and in the
+`man-pages`_ for `bpf(2)`_.
Map Types
=========
@@ -79,3 +79,4 @@
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html
diff --git a/Documentation/bpf/prog_lsm.rst b/Documentation/bpf/prog_lsm.rst
index 0dc3fb0..ad2be02 100644
--- a/Documentation/bpf/prog_lsm.rst
+++ b/Documentation/bpf/prog_lsm.rst
@@ -18,7 +18,7 @@
.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot);
Other LSM hooks which can be instrumented can be found in
-``include/linux/lsm_hooks.h``.
+``security/security.c``.
eBPF programs that use Documentation/bpf/btf.rst do not need to include kernel
headers for accessing information from the attached eBPF program's context.
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index e87a878..e9b022d 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-=====
-cdrom
-=====
+======
+CD-ROM
+======
.. toctree::
:maxdepth: 1
diff --git a/Documentation/conf.py b/Documentation/conf.py
index db16814..37314af 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -343,9 +343,10 @@
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['sphinx-static']
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-html_use_smartypants = False
+# If true, Docutils "smart quotes" will be used to convert quotes and dashes
+# to typographically correct entities. This will convert "--" to "—",
+# which is not always what we want, so disable it.
+smartquotes = False
# Custom sidebar templates, maps document names to template names.
# Note that the RTD theme ignores this
diff --git a/Documentation/core-api/asm-annotations.rst b/Documentation/core-api/asm-annotations.rst
index bc514ed..11c96d3 100644
--- a/Documentation/core-api/asm-annotations.rst
+++ b/Documentation/core-api/asm-annotations.rst
@@ -44,7 +44,7 @@
run to check and fix the object if needed. Currently, ``objtool`` can report
missing frame pointer setup/destruction in functions. It can also
automatically generate annotations for the ORC unwinder
-(Documentation/x86/orc-unwinder.rst)
+(Documentation/arch/x86/orc-unwinder.rst)
for most code. Both of these are especially important to support reliable
stack traces which are in turn necessary for kernel live patching
(Documentation/livepatch/livepatch.rst).
diff --git a/Documentation/core-api/dma-api-howto.rst b/Documentation/core-api/dma-api-howto.rst
index 8288468..72f6cdb 100644
--- a/Documentation/core-api/dma-api-howto.rst
+++ b/Documentation/core-api/dma-api-howto.rst
@@ -185,7 +185,7 @@
your device. For example, &pdev->dev is a pointer to the device struct of a
PCI device (pdev is a pointer to the PCI device struct of your device).
-These calls usually return zero to indicated your device can perform DMA
+These calls usually return zero to indicate your device can perform DMA
properly on the machine given the address mask you provided, but they might
return an error if the mask is too small to be supportable on the given
system. If it returns non-zero, your device cannot perform DMA properly on
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 62f9616..9b3f3e5 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -220,12 +220,30 @@
Module Support
==============
-Module Loading
---------------
+Kernel module auto-loading
+--------------------------
-.. kernel-doc:: kernel/kmod.c
+.. kernel-doc:: kernel/module/kmod.c
:export:
+Module debugging
+----------------
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: module debugging statistics overview
+
+dup_failed_modules - tracks duplicate failed modules
+****************************************************
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: dup_failed_modules - tracks duplicate failed modules
+
+module statistics debugfs counters
+**********************************
+
+.. kernel-doc:: kernel/module/stats.c
+ :doc: module statistics debugfs counters
+
Inter Module support
--------------------
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index 5954ddf..1c58d88 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -170,7 +170,16 @@
After the cache is created kmem_cache_alloc() and its convenience
wrappers can allocate memory from that cache.
-When the allocated memory is no longer needed it must be freed. You can
-use kvfree() for the memory allocated with `kmalloc`, `vmalloc` and
-`kvmalloc`. The slab caches should be freed with kmem_cache_free(). And
-don't forget to destroy the cache with kmem_cache_destroy().
+When the allocated memory is no longer needed it must be freed.
+
+Objects allocated by `kmalloc` can be freed by `kfree` or `kvfree`. Objects
+allocated by `kmem_cache_alloc` can be freed with `kmem_cache_free`, `kfree`
+or `kvfree`, where the latter two might be more convenient thanks to not
+needing the kmem_cache pointer.
+
+The same rules apply to _bulk and _rcu flavors of freeing functions.
+
+Memory allocated by `vmalloc` can be freed with `vfree` or `kvfree`.
+Memory allocated by `kvmalloc` can be freed with `kvfree`.
+Caches created by `kmem_cache_create` should be freed with
+`kmem_cache_destroy` only after freeing all the allocated objects first.
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index dbe1aac..dfe7e75a 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -575,20 +575,26 @@
Helper macros cpumask_pr_args() and nodemask_pr_args() are available to ease
printing cpumask and nodemask.
-Flags bitfields such as page flags, gfp_flags
----------------------------------------------
+Flags bitfields such as page flags, page_type, gfp_flags
+--------------------------------------------------------
::
%pGp 0x17ffffc0002036(referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff)
+ %pGt 0xffffff7f(buddy)
%pGg GFP_USER|GFP_DMA32|GFP_NOWARN
%pGv read|exec|mayread|maywrite|mayexec|denywrite
For printing flags bitfields as a collection of symbolic constants that
would construct the value. The type of flags is given by the third
-character. Currently supported are [p]age flags, [v]ma_flags (both
-expect ``unsigned long *``) and [g]fp_flags (expects ``gfp_t *``). The flag
-names and print order depends on the particular type.
+character. Currently supported are:
+
+ - p - [p]age flags, expects value of type (``unsigned long *``)
+ - t - page [t]ype, expects value of type (``unsigned int *``)
+ - v - [v]ma_flags, expects value of type (``unsigned long *``)
+ - g - [g]fp_flags, expects value of type (``gfp_t *``)
+
+The flag names and print order depends on the particular type.
Note that this format should not be used directly in the
:c:func:`TP_printk()` part of a tracepoint. Instead, use the show_*_flags()
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index d83c9ab..6611434 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -1,42 +1,50 @@
-kcov: code coverage for fuzzing
+KCOV: code coverage for fuzzing
===============================
-kcov exposes kernel code coverage information in a form suitable for coverage-
-guided fuzzing (randomized testing). Coverage data of a running kernel is
-exported via the "kcov" debugfs file. Coverage collection is enabled on a task
-basis, and thus it can capture precise coverage of a single system call.
+KCOV collects and exposes kernel code coverage information in a form suitable
+for coverage-guided fuzzing. Coverage data of a running kernel is exported via
+the ``kcov`` debugfs file. Coverage collection is enabled on a task basis, and
+thus KCOV can capture precise coverage of a single system call.
-Note that kcov does not aim to collect as much coverage as possible. It aims
-to collect more or less stable coverage that is function of syscall inputs.
-To achieve this goal it does not collect coverage in soft/hard interrupts
-and instrumentation of some inherently non-deterministic parts of kernel is
-disabled (e.g. scheduler, locking).
+Note that KCOV does not aim to collect as much coverage as possible. It aims
+to collect more or less stable coverage that is a function of syscall inputs.
+To achieve this goal, it does not collect coverage in soft/hard interrupts
+(unless remove coverage collection is enabled, see below) and from some
+inherently non-deterministic parts of the kernel (e.g. scheduler, locking).
-kcov is also able to collect comparison operands from the instrumented code
-(this feature currently requires that the kernel is compiled with clang).
+Besides collecting code coverage, KCOV can also collect comparison operands.
+See the "Comparison operands collection" section for details.
+
+Besides collecting coverage data from syscall handlers, KCOV can also collect
+coverage for annotated parts of the kernel executing in background kernel
+tasks or soft interrupts. See the "Remote coverage collection" section for
+details.
Prerequisites
-------------
-Configure the kernel with::
+KCOV relies on compiler instrumentation and requires GCC 6.1.0 or later
+or any Clang version supported by the kernel.
+
+Collecting comparison operands is supported with GCC 8+ or with Clang.
+
+To enable KCOV, configure the kernel with::
CONFIG_KCOV=y
-CONFIG_KCOV requires gcc 6.1.0 or later.
-
-If the comparison operands need to be collected, set::
+To enable comparison operands collection, set::
CONFIG_KCOV_ENABLE_COMPARISONS=y
-Profiling data will only become accessible once debugfs has been mounted::
+Coverage data only becomes accessible once debugfs has been mounted::
mount -t debugfs none /sys/kernel/debug
Coverage collection
-------------------
-The following program demonstrates coverage collection from within a test
-program using kcov:
+The following program demonstrates how to use KCOV to collect coverage for a
+single syscall from within a test program:
.. code-block:: c
@@ -84,7 +92,7 @@
perror("ioctl"), exit(1);
/* Reset coverage from the tail of the ioctl() call. */
__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
- /* That's the target syscal call. */
+ /* Call the target syscall call. */
read(-1, NULL, 0);
/* Read number of PCs collected. */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
@@ -103,7 +111,7 @@
return 0;
}
-After piping through addr2line output of the program looks as follows::
+After piping through ``addr2line`` the output of the program looks as follows::
SyS_read
fs/read_write.c:562
@@ -121,12 +129,13 @@
fs/read_write.c:562
If a program needs to collect coverage from several threads (independently),
-it needs to open /sys/kernel/debug/kcov in each thread separately.
+it needs to open ``/sys/kernel/debug/kcov`` in each thread separately.
The interface is fine-grained to allow efficient forking of test processes.
-That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
-mmaps coverage buffer and then forks child processes in a loop. Child processes
-only need to enable coverage (disable happens automatically on thread end).
+That is, a parent process opens ``/sys/kernel/debug/kcov``, enables trace mode,
+mmaps coverage buffer, and then forks child processes in a loop. The child
+processes only need to enable coverage (it gets disabled automatically when
+a thread exits).
Comparison operands collection
------------------------------
@@ -205,52 +214,78 @@
return 0;
}
-Note that the kcov modes (coverage collection or comparison operands) are
-mutually exclusive.
+Note that the KCOV modes (collection of code coverage or comparison operands)
+are mutually exclusive.
Remote coverage collection
--------------------------
-With KCOV_ENABLE coverage is collected only for syscalls that are issued
-from the current process. With KCOV_REMOTE_ENABLE it's possible to collect
-coverage for arbitrary parts of the kernel code, provided that those parts
-are annotated with kcov_remote_start()/kcov_remote_stop().
+Besides collecting coverage data from handlers of syscalls issued from a
+userspace process, KCOV can also collect coverage for parts of the kernel
+executing in other contexts - so-called "remote" coverage.
-This allows to collect coverage from two types of kernel background
-threads: the global ones, that are spawned during kernel boot in a limited
-number of instances (e.g. one USB hub_event() worker thread is spawned per
-USB HCD); and the local ones, that are spawned when a user interacts with
-some kernel interface (e.g. vhost workers); as well as from soft
-interrupts.
+Using KCOV to collect remote coverage requires:
-To enable collecting coverage from a global background thread or from a
-softirq, a unique global handle must be assigned and passed to the
-corresponding kcov_remote_start() call. Then a userspace process can pass
-a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles
-array field of the kcov_remote_arg struct. This will attach the used kcov
-device to the code sections, that are referenced by those handles.
+1. Modifying kernel code to annotate the code section from where coverage
+ should be collected with ``kcov_remote_start`` and ``kcov_remote_stop``.
-Since there might be many local background threads spawned from different
-userspace processes, we can't use a single global handle per annotation.
-Instead, the userspace process passes a non-zero handle through the
-common_handle field of the kcov_remote_arg struct. This common handle gets
-saved to the kcov_handle field in the current task_struct and needs to be
-passed to the newly spawned threads via custom annotations. Those threads
-should in turn be annotated with kcov_remote_start()/kcov_remote_stop().
+2. Using ``KCOV_REMOTE_ENABLE`` instead of ``KCOV_ENABLE`` in the userspace
+ process that collects coverage.
-Internally kcov stores handles as u64 integers. The top byte of a handle
-is used to denote the id of a subsystem that this handle belongs to, and
-the lower 4 bytes are used to denote the id of a thread instance within
-that subsystem. A reserved value 0 is used as a subsystem id for common
-handles as they don't belong to a particular subsystem. The bytes 4-7 are
-currently reserved and must be zero. In the future the number of bytes
-used for the subsystem or handle ids might be increased.
+Both ``kcov_remote_start`` and ``kcov_remote_stop`` annotations and the
+``KCOV_REMOTE_ENABLE`` ioctl accept handles that identify particular coverage
+collection sections. The way a handle is used depends on the context where the
+matching code section executes.
-When a particular userspace process collects coverage via a common
-handle, kcov will collect coverage for each code section that is annotated
-to use the common handle obtained as kcov_handle from the current
-task_struct. However non common handles allow to collect coverage
-selectively from different subsystems.
+KCOV supports collecting remote coverage from the following contexts:
+
+1. Global kernel background tasks. These are the tasks that are spawned during
+ kernel boot in a limited number of instances (e.g. one USB ``hub_event``
+ worker is spawned per one USB HCD).
+
+2. Local kernel background tasks. These are spawned when a userspace process
+ interacts with some kernel interface and are usually killed when the process
+ exits (e.g. vhost workers).
+
+3. Soft interrupts.
+
+For #1 and #3, a unique global handle must be chosen and passed to the
+corresponding ``kcov_remote_start`` call. Then a userspace process must pass
+this handle to ``KCOV_REMOTE_ENABLE`` in the ``handles`` array field of the
+``kcov_remote_arg`` struct. This will attach the used KCOV device to the code
+section referenced by this handle. Multiple global handles identifying
+different code sections can be passed at once.
+
+For #2, the userspace process instead must pass a non-zero handle through the
+``common_handle`` field of the ``kcov_remote_arg`` struct. This common handle
+gets saved to the ``kcov_handle`` field in the current ``task_struct`` and
+needs to be passed to the newly spawned local tasks via custom kernel code
+modifications. Those tasks should in turn use the passed handle in their
+``kcov_remote_start`` and ``kcov_remote_stop`` annotations.
+
+KCOV follows a predefined format for both global and common handles. Each
+handle is a ``u64`` integer. Currently, only the one top and the lower 4 bytes
+are used. Bytes 4-7 are reserved and must be zero.
+
+For global handles, the top byte of the handle denotes the id of a subsystem
+this handle belongs to. For example, KCOV uses ``1`` as the USB subsystem id.
+The lower 4 bytes of a global handle denote the id of a task instance within
+that subsystem. For example, each ``hub_event`` worker uses the USB bus number
+as the task instance id.
+
+For common handles, a reserved value ``0`` is used as a subsystem id, as such
+handles don't belong to a particular subsystem. The lower 4 bytes of a common
+handle identify a collective instance of all local tasks spawned by the
+userspace process that passed a common handle to ``KCOV_REMOTE_ENABLE``.
+
+In practice, any value can be used for common handle instance id if coverage
+is only collected from a single userspace process on the system. However, if
+common handles are used by multiple processes, unique instance ids must be
+used for each process. One option is to use the process id as the common
+handle instance id.
+
+The following program demonstrates using KCOV to collect coverage from both
+local tasks spawned by the process and the global task that handles USB bus #1:
.. code-block:: c
diff --git a/Documentation/dev-tools/kmemleak.rst b/Documentation/dev-tools/kmemleak.rst
index 5483fd3..2cb00b5 100644
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -227,7 +227,7 @@
--------------------------
To check if you have all set up to use kmemleak, you can use the kmemleak-test
-module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
+module, a module that deliberately leaks memory. Set CONFIG_SAMPLE_KMEMLEAK
as module (it can't be used as built-in) and boot the kernel with kmemleak
enabled. Load the module and perform a scan with::
diff --git a/Documentation/devicetree/bindings/.yamllint b/Documentation/devicetree/bindings/.yamllint
index 214abd3..4abe9f0 100644
--- a/Documentation/devicetree/bindings/.yamllint
+++ b/Documentation/devicetree/bindings/.yamllint
@@ -19,7 +19,7 @@
colons: {max-spaces-before: 0, max-spaces-after: 1}
commas: {min-spaces-after: 1, max-spaces-after: 1}
comments:
- require-starting-space: false
+ require-starting-space: true
min-spaces-from-content: 1
comments-indentation: disable
document-start:
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index b634d5b..274ee08 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -153,17 +153,27 @@
- description: Boards with the Amlogic Meson G12B A311D SoC
items:
- enum:
+ - bananapi,bpi-m2s
- khadas,vim3
- radxa,zero2
- const: amlogic,a311d
- const: amlogic,g12b
+ - description: Boards using the BPI-CM4 module with Amlogic Meson G12B A311D SoC
+ items:
+ - enum:
+ - bananapi,bpi-cm4io
+ - const: bananapi,bpi-cm4
+ - const: amlogic,a311d
+ - const: amlogic,g12b
+
- description: Boards with the Amlogic Meson G12B S922X SoC
items:
- enum:
- azw,gsking-x
- azw,gtking
- azw,gtking-pro
+ - bananapi,bpi-m2s
- hardkernel,odroid-go-ultra
- hardkernel,odroid-n2
- hardkernel,odroid-n2l
diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
index 1748f16..7dff32f 100644
--- a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
@@ -2,8 +2,8 @@
# Copyright 2019 BayLibre, SAS
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/amlogic/amlogic,meson-gx-ao-secure.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/amlogic/amlogic,meson-gx-ao-secure.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Amlogic Meson Firmware registers Interface
diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml
index eee7cda..09b27e9 100644
--- a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-mx-secbus2.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/amlogic/amlogic,meson-mx-secbus2.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/amlogic/amlogic,meson-mx-secbus2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Amlogic Meson8/Meson8b/Meson8m2 SECBUS2 register interface
diff --git a/Documentation/devicetree/bindings/arm/apple.yaml b/Documentation/devicetree/bindings/arm/apple.yaml
index da78c69..883fd67 100644
--- a/Documentation/devicetree/bindings/arm/apple.yaml
+++ b/Documentation/devicetree/bindings/arm/apple.yaml
@@ -19,6 +19,12 @@
- MacBook Air (M1, 2020)
- iMac (24-inch, M1, 2021)
+ Devices based on the "M2" SoC:
+
+ - MacBook Air (M2, 2022)
+ - MacBook Pro (13-inch, M2, 2022)
+ - Mac mini (M2, 2023)
+
And devices based on the "M1 Pro", "M1 Max" and "M1 Ultra" SoCs:
- MacBook Pro (14-inch, M1 Pro, 2021)
@@ -70,6 +76,15 @@
- const: apple,t8103
- const: apple,arm-platform
+ - description: Apple M2 SoC based platforms
+ items:
+ - enum:
+ - apple,j413 # MacBook Air (M2, 2022)
+ - apple,j473 # Mac mini (M2, 2023)
+ - apple,j493 # MacBook Pro (13-inch, M2, 2022)
+ - const: apple,t8112
+ - const: apple,arm-platform
+
- description: Apple M1 Pro SoC based platforms
items:
- enum:
diff --git a/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml b/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml
index 0dc957a..673277a 100644
--- a/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml
+++ b/Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml
@@ -23,6 +23,7 @@
items:
- enum:
- apple,t8103-pmgr
+ - apple,t8112-pmgr
- apple,t6000-pmgr
- const: apple,pmgr
- const: syscon
diff --git a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
index eec190a..09c319f 100644
--- a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
@@ -144,6 +144,7 @@
it is stricter and always has two compatibles.
type: object
$ref: '/schemas/simple-bus.yaml'
+ unevaluatedProperties: false
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
index b369b37..39e3c24 100644
--- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
@@ -30,6 +30,7 @@
clocks:
type: object
+ additionalProperties: false
properties:
compatible:
@@ -47,6 +48,7 @@
reset:
type: object
+ additionalProperties: false
properties:
compatible:
@@ -63,6 +65,7 @@
pwm:
type: object
+ additionalProperties: false
properties:
compatible:
@@ -76,8 +79,6 @@
- compatible
- "#pwm-cells"
- additionalProperties: false
-
required:
- compatible
- mboxes
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index c145f6a..ff272e5 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -85,6 +85,8 @@
compatible:
enum:
+ - apple,avalanche
+ - apple,blizzard
- apple,icestorm
- apple,firestorm
- arm,arm710t
@@ -139,6 +141,7 @@
- arm,cortex-a77
- arm,cortex-a78
- arm,cortex-a78ae
+ - arm,cortex-a78c
- arm,cortex-a510
- arm,cortex-a710
- arm,cortex-a715
@@ -151,6 +154,7 @@
- arm,cortex-r5
- arm,cortex-r7
- arm,cortex-x1
+ - arm,cortex-x1c
- arm,cortex-x2
- arm,cortex-x3
- arm,neoverse-e1
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
index d4dc074..5d033570 100644
--- a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
@@ -28,7 +28,8 @@
maxItems: 1
description: |
This interrupt which is used to signal an event by the secure world
- software is expected to be edge-triggered.
+ software is expected to be either a per-cpu interrupt or an
+ edge-triggered peripheral interrupt.
method:
enum: [smc, hvc]
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 442ce8f..15d4110 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -300,6 +300,7 @@
- variscite,dt6customboard
- wand,imx6q-wandboard # Wandboard i.MX6 Quad Board
- ysoft,imx6q-yapp4-crux # i.MX6 Quad Y Soft IOTA Crux board
+ - ysoft,imx6q-yapp4-pegasus # i.MX6 Quad Y Soft IOTA Pegasus board
- zealz,imx6q-gk802 # Zealz GK802
- zii,imx6q-zii-rdu2 # ZII RDU2 Board
- const: fsl,imx6q
@@ -410,6 +411,7 @@
- prt,prtwd3 # Protonic WD3 board
- wand,imx6qp-wandboard # Wandboard i.MX6 QuadPlus Board
- ysoft,imx6qp-yapp4-crux-plus # i.MX6 Quad Plus Y Soft IOTA Crux+ board
+ - ysoft,imx6qp-yapp4-pegasus-plus # i.MX6 Quad Plus Y Soft IOTA Pegasus+ board
- zii,imx6qp-zii-rdu2 # ZII RDU2+ Board
- const: fsl,imx6qp
@@ -474,9 +476,11 @@
- udoo,imx6dl-udoo # Udoo i.MX6 Dual-lite Board
- vdl,lanmcu # Van der Laan LANMCU board
- wand,imx6dl-wandboard # Wandboard i.MX6 Dual Lite Board
- - ysoft,imx6dl-yapp4-draco # i.MX6 DualLite Y Soft IOTA Draco board
+ - ysoft,imx6dl-yapp4-draco # i.MX6 Solo Y Soft IOTA Draco board
- ysoft,imx6dl-yapp4-hydra # i.MX6 DualLite Y Soft IOTA Hydra board
+ - ysoft,imx6dl-yapp4-lynx # i.MX6 DualLite Y Soft IOTA Lynx board
- ysoft,imx6dl-yapp4-orion # i.MX6 DualLite Y Soft IOTA Orion board
+ - ysoft,imx6dl-yapp4-phoenix # i.MX6 DualLite Y Soft IOTA Phoenix board
- ysoft,imx6dl-yapp4-ursa # i.MX6 Solo Y Soft IOTA Ursa board
- const: fsl,imx6dl
@@ -581,6 +585,7 @@
- kobo,aura2
- kobo,tolino-shine2hd
- kobo,tolino-shine3
+ - kobo,tolino-vision
- kobo,tolino-vision5
- revotics,imx6sl-warp # Revotics WaRP Board
- const: fsl,imx6sl
@@ -702,6 +707,15 @@
- const: armadeus,imx6ull-opos6ul # OPOS6UL (i.MX6ULL) SoM
- const: fsl,imx6ull
+ - description: i.MX6ULL chargebyte Tarragon Boards
+ items:
+ - enum:
+ - chargebyte,imx6ull-tarragon-master
+ - chargebyte,imx6ull-tarragon-micro
+ - chargebyte,imx6ull-tarragon-slave
+ - chargebyte,imx6ull-tarragon-slavext
+ - const: fsl,imx6ull
+
- description: i.MX6ULL DHCOM SoM based Boards
items:
- enum:
@@ -1002,6 +1016,7 @@
items:
- enum:
- beacon,imx8mp-beacon-kit # i.MX8MP Beacon Development Kit
+ - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC
- fsl,imx8mp-evk # i.MX8MP EVK Board
- gateworks,imx8mp-gw74xx # i.MX8MP Gateworks Board
- polyhex,imx8mp-debix # Polyhex Debix boards
@@ -1020,7 +1035,9 @@
- description: i.MX8MP DHCOM based Boards
items:
- - const: dh,imx8mp-dhcom-pdk2 # i.MX8MP DHCOM SoM on PDK2 board
+ - enum:
+ - dh,imx8mp-dhcom-pdk2 # i.MX8MP DHCOM SoM on PDK2 board
+ - dh,imx8mp-dhcom-pdk3 # i.MX8MP DHCOM SoM on PDK3 board
- const: dh,imx8mp-dhcom-som # i.MX8MP DHCOM SoM
- const: fsl,imx8mp
@@ -1119,6 +1136,25 @@
items:
- enum:
- fsl,imx8qm-mek # i.MX8QM MEK Board
+ - toradex,apalis-imx8 # Apalis iMX8 Modules
+ - toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules
+ - const: fsl,imx8qm
+
+ - description: i.MX8QM Boards with Toradex Apalis iMX8 Modules
+ items:
+ - enum:
+ - toradex,apalis-imx8-eval # Apalis iMX8 Module on Apalis Evaluation Board
+ - toradex,apalis-imx8-ixora-v1.1 # Apalis iMX8 Module on Ixora V1.1 Carrier Board
+ - const: toradex,apalis-imx8
+ - const: fsl,imx8qm
+
+ - description: i.MX8QM Boards with Toradex Apalis iMX8 V1.1 Modules
+ items:
+ - enum:
+ - toradex,apalis-imx8-v1.1-eval # Apalis iMX8 V1.1 Module on Apalis Eval. Board
+ - toradex,apalis-imx8-v1.1-ixora-v1.1 # Apalis iMX8 V1.1 Module on Ixora V1.1 C. Board
+ - toradex,apalis-imx8-v1.1-ixora-v1.2 # Apalis iMX8 V1.1 Module on Ixora V1.2 C. Board
+ - const: toradex,apalis-imx8-v1.1
- const: fsl,imx8qm
- description: i.MX8QXP based Boards
@@ -1135,10 +1171,13 @@
- fsl,imx8dxl-evk # i.MX8DXL EVK Board
- const: fsl,imx8dxl
- - description: i.MX8QXP Boards with Toradex Coilbri iMX8X Modules
+ - description: i.MX8QXP Boards with Toradex Colibri iMX8X Modules
items:
- enum:
+ - toradex,colibri-imx8x-aster # Colibri iMX8X Module on Aster Board
- toradex,colibri-imx8x-eval-v3 # Colibri iMX8X Module on Colibri Evaluation Board V3
+ - toradex,colibri-imx8x-iris # Colibri iMX8X Module on Iris Board
+ - toradex,colibri-imx8x-iris-v2 # Colibri iMX8X Module on Iris Board V2
- const: toradex,colibri-imx8x
- const: fsl,imx8qxp
diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.yaml b/Documentation/devicetree/bindings/arm/l2c2x0.yaml
deleted file mode 100644
index 6b8f4d4..0000000
--- a/Documentation/devicetree/bindings/arm/l2c2x0.yaml
+++ /dev/null
@@ -1,242 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/arm/l2c2x0.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: ARM L2 Cache Controller
-
-maintainers:
- - Rob Herring <robh@kernel.org>
-
-description: |+
- ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/
- PL220/PL310 and variants) based level 2 cache controller. All these various
- implementations of the L2 cache controller have compatible programming
- models (Note 1). Some of the properties that are just prefixed "cache-*" are
- taken from section 3.7.3 of the Devicetree Specification which can be found
- at:
- https://www.devicetree.org/specifications/
-
- Note 1: The description in this document doesn't apply to integrated L2
- cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
- integrated L2 controllers are assumed to be all preconfigured by
- early secure boot code. Thus no need to deal with their configuration
- in the kernel at all.
-
-allOf:
- - $ref: /schemas/cache-controller.yaml#
-
-properties:
- compatible:
- oneOf:
- - enum:
- - arm,pl310-cache
- - arm,l220-cache
- - arm,l210-cache
- # DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
- - bcm,bcm11351-a2-pl310-cache
- # For Broadcom bcm11351 chipset where an
- # offset needs to be added to the address before passing down to the L2
- # cache controller
- - brcm,bcm11351-a2-pl310-cache
- # Marvell Controller designed to be
- # compatible with the ARM one, with system cache mode (meaning
- # maintenance operations on L1 are broadcasted to the L2 and L2
- # performs the same operation).
- - marvell,aurora-system-cache
- # Marvell Controller designed to be
- # compatible with the ARM one with outer cache mode.
- - marvell,aurora-outer-cache
- - items:
- # Marvell Tauros3 cache controller, compatible
- # with arm,pl310-cache controller.
- - const: marvell,tauros3-cache
- - const: arm,pl310-cache
-
- cache-level:
- const: 2
-
- cache-unified: true
- cache-size: true
- cache-sets: true
- cache-block-size: true
- cache-line-size: true
-
- reg:
- maxItems: 1
-
- arm,data-latency:
- description: Cycles of latency for Data RAM accesses. Specifies 3 cells of
- read, write and setup latencies. Minimum valid values are 1. Controllers
- without setup latency control should use a value of 0.
- $ref: /schemas/types.yaml#/definitions/uint32-array
- minItems: 2
- maxItems: 3
- items:
- minimum: 0
- maximum: 8
-
- arm,tag-latency:
- description: Cycles of latency for Tag RAM accesses. Specifies 3 cells of
- read, write and setup latencies. Controllers without setup latency control
- should use 0. Controllers without separate read and write Tag RAM latency
- values should only use the first cell.
- $ref: /schemas/types.yaml#/definitions/uint32-array
- minItems: 1
- maxItems: 3
- items:
- minimum: 0
- maximum: 8
-
- arm,dirty-latency:
- description: Cycles of latency for Dirty RAMs. This is a single cell.
- $ref: /schemas/types.yaml#/definitions/uint32
- minimum: 1
- maximum: 8
-
- arm,filter-ranges:
- description: <start length> Starting address and length of window to
- filter. Addresses in the filter window are directed to the M1 port. Other
- addresses will go to the M0 port.
- $ref: /schemas/types.yaml#/definitions/uint32-array
- items:
- minItems: 2
- maxItems: 2
-
- arm,io-coherent:
- description: indicates that the system is operating in an hardware
- I/O coherent mode. Valid only when the arm,pl310-cache compatible
- string is used.
- type: boolean
-
- interrupts:
- # Either a single combined interrupt or up to 9 individual interrupts
- minItems: 1
- maxItems: 9
-
- cache-id-part:
- description: cache id part number to be used if it is not present
- on hardware
- $ref: /schemas/types.yaml#/definitions/uint32
-
- wt-override:
- description: If present then L2 is forced to Write through mode
- type: boolean
-
- arm,double-linefill:
- description: Override double linefill enable setting. Enable if
- non-zero, disable if zero.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,double-linefill-incr:
- description: Override double linefill on INCR read. Enable
- if non-zero, disable if zero.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,double-linefill-wrap:
- description: Override double linefill on WRAP read. Enable
- if non-zero, disable if zero.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,prefetch-drop:
- description: Override prefetch drop enable setting. Enable if non-zero,
- disable if zero.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,prefetch-offset:
- description: Override prefetch offset value.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1, 2, 3, 4, 5, 6, 7, 15, 23, 31]
-
- arm,shared-override:
- description: The default behavior of the L220 or PL310 cache
- controllers with respect to the shareable attribute is to transform "normal
- memory non-cacheable transactions" into "cacheable no allocate" (for reads)
- or "write through no write allocate" (for writes).
- On systems where this may cause DMA buffer corruption, this property must
- be specified to indicate that such transforms are precluded.
- type: boolean
-
- arm,parity-enable:
- description: enable parity checking on the L2 cache (L220 or PL310).
- type: boolean
-
- arm,parity-disable:
- description: disable parity checking on the L2 cache (L220 or PL310).
- type: boolean
-
- marvell,ecc-enable:
- description: enable ECC protection on the L2 cache
- type: boolean
-
- arm,outer-sync-disable:
- description: disable the outer sync operation on the L2 cache.
- Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
- will randomly hang unless outer sync operations are disabled.
- type: boolean
-
- prefetch-data:
- description: |
- Data prefetch. Value: <0> (forcibly disable), <1>
- (forcibly enable), property absent (retain settings set by firmware)
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- prefetch-instr:
- description: |
- Instruction prefetch. Value: <0> (forcibly disable),
- <1> (forcibly enable), property absent (retain settings set by
- firmware)
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,dynamic-clock-gating:
- description: |
- L2 dynamic clock gating. Value: <0> (forcibly
- disable), <1> (forcibly enable), property absent (OS specific behavior,
- preferably retain firmware settings)
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,standby-mode:
- description: L2 standby mode enable. Value <0> (forcibly disable),
- <1> (forcibly enable), property absent (OS specific behavior,
- preferably retain firmware settings)
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [0, 1]
-
- arm,early-bresp-disable:
- description: Disable the CA9 optimization Early BRESP (PL310)
- type: boolean
-
- arm,full-line-zero-disable:
- description: Disable the CA9 optimization Full line of zero
- write (PL310)
- type: boolean
-
-required:
- - compatible
- - cache-unified
- - reg
-
-additionalProperties: false
-
-examples:
- - |
- cache-controller@fff12000 {
- compatible = "arm,pl310-cache";
- reg = <0xfff12000 0x1000>;
- arm,data-latency = <1 1 1>;
- arm,tag-latency = <2 2 2>;
- arm,filter-ranges = <0x80000000 0x8000000>;
- cache-unified;
- cache-level = <2>;
- interrupts = <45>;
- };
-
-...
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.yaml
index e997635..ea98043 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Infrastructure System Configuration Controller
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
index d141034..536f5a5 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mmsys.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mmsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek mmsys controller
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
index 9fbeb62..d89848a 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek PCIE Mirror Controller for MT7622
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
index 5c223cb..28ded09 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-wed.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-wed.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Wireless Ethernet Dispatch Controller for MT7622
@@ -20,6 +20,7 @@
items:
- enum:
- mediatek,mt7622-wed
+ - mediatek,mt7981-wed
- mediatek,mt7986-wed
- const: syscon
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml
index 96221f5..82f6446 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7986-wed-pcie.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7986-wed-pcie.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt7986-wed-pcie.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek PCIE WED Controller for MT7986
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-clock.yaml
index cf1002c..7cd14b16 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8186-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8186-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Functional Clock Controller for MT8186
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-sys-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-sys-clock.yaml
index 661047d..64c7694 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-sys-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-sys-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8186-sys-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8186-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek System Clock Controller for MT8186
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml
index b57cc2e..dff4c8e 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Functional Clock Controller for MT8192
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml
index 27f7917..8d608fd 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-sys-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8192-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek System Clock Controller for MT8192
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml
index d62d601..d17164b 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8195-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8195-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Functional Clock Controller for MT8195
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml
index 95b6bdf..066c9b3 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt8195-sys-clock.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,mt8195-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek System Clock Controller for MT8195
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
index ef62cbb..26158d0 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,pericfg.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,pericfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Peripheral Configuration Controller
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
deleted file mode 100644
index d2c24c2..0000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-MediaTek SGMIISYS controller
-============================
-
-The MediaTek SGMIISYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
- - "mediatek,mt7622-sgmiisys", "syscon"
- - "mediatek,mt7629-sgmiisys", "syscon"
- - "mediatek,mt7981-sgmiisys_0", "syscon"
- - "mediatek,mt7981-sgmiisys_1", "syscon"
- - "mediatek,mt7986-sgmiisys_0", "syscon"
- - "mediatek,mt7986-sgmiisys_1", "syscon"
-- #clock-cells: Must be 1
-
-The SGMIISYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-sgmiisys: sgmiisys@1b128000 {
- compatible = "mediatek,mt7622-sgmiisys", "syscon";
- reg = <0 0x1b128000 0 0x1000>;
- #clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
deleted file mode 100644
index 7f69636..0000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Krait Processor Sub-system (KPSS) Application Clock Controller (ACC)
-
-The KPSS ACC provides clock, power domain, and reset control to a Krait CPU.
-There is one ACC register region per CPU within the KPSS remapped region as
-well as an alias register region that remaps accesses to the ACC associated
-with the CPU accessing the region.
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: should be one of:
- "qcom,kpss-acc-v1"
- "qcom,kpss-acc-v2"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: the first element specifies the base address and size of
- the register region. An optional second element specifies
- the base address and size of the alias register region.
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the pll parents.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "pll8_vote", "pxo".
-
-- clock-output-names:
- Usage: optional
- Value type: <string>
- Definition: Name of the output clock. Typically acpuX_aux where X is a
- CPU number starting at 0.
-
-Example:
-
- clock-controller@2088000 {
- compatible = "qcom,kpss-acc-v2";
- reg = <0x02088000 0x1000>,
- <0x02008000 0x1000>;
- clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>;
- clock-names = "pll8_vote", "pxo";
- clock-output-names = "acpu0_aux";
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt
deleted file mode 100644
index e628758..0000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-gcc.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Krait Processor Sub-system (KPSS) Global Clock Controller (GCC)
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: should be one of the following. The generic compatible
- "qcom,kpss-gcc" should also be included.
- "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc"
- "qcom,kpss-gcc-apq8064", "qcom,kpss-gcc"
- "qcom,kpss-gcc-msm8974", "qcom,kpss-gcc"
- "qcom,kpss-gcc-msm8960", "qcom,kpss-gcc"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: base address and size of the register region
-
-- clocks:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: reference to the pll parents.
-
-- clock-names:
- Usage: required
- Value type: <stringlist>
- Definition: must be "pll8_vote", "pxo".
-
-- clock-output-names:
- Usage: required
- Value type: <string>
- Definition: Name of the output clock. Typically acpu_l2_aux indicating
- an L2 cache auxiliary clock.
-
-Example:
-
- l2cc: clock-controller@2011000 {
- compatible = "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc";
- reg = <0x2011000 0x1000>;
- clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>;
- clock-names = "pll8_vote", "pxo";
- clock-output-names = "acpu_l2_aux";
- };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml b/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
deleted file mode 100644
index 38efcad..0000000
--- a/Documentation/devicetree/bindings/arm/msm/qcom,llcc.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/arm/msm/qcom,llcc.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Last Level Cache Controller
-
-maintainers:
- - Rishabh Bhatnagar <rishabhb@codeaurora.org>
- - Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
-
-description: |
- LLCC (Last Level Cache Controller) provides last level of cache memory in SoC,
- that can be shared by multiple clients. Clients here are different cores in the
- SoC, the idea is to minimize the local caches at the clients and migrate to
- common pool of memory. Cache memory is divided into partitions called slices
- which are assigned to clients. Clients can query the slice details, activate
- and deactivate them.
-
-properties:
- compatible:
- enum:
- - qcom,sc7180-llcc
- - qcom,sc7280-llcc
- - qcom,sc8180x-llcc
- - qcom,sc8280xp-llcc
- - qcom,sdm845-llcc
- - qcom,sm6350-llcc
- - qcom,sm8150-llcc
- - qcom,sm8250-llcc
- - qcom,sm8350-llcc
- - qcom,sm8450-llcc
- - qcom,sm8550-llcc
-
- reg:
- items:
- - description: LLCC base register region
- - description: LLCC broadcast base register region
-
- reg-names:
- items:
- - const: llcc_base
- - const: llcc_broadcast_base
-
- interrupts:
- maxItems: 1
-
-required:
- - compatible
- - reg
- - reg-names
-
-additionalProperties: false
-
-examples:
- - |
- #include <dt-bindings/interrupt-controller/arm-gic.h>
-
- system-cache-controller@1100000 {
- compatible = "qcom,sdm845-llcc";
- reg = <0x1100000 0x200000>, <0x1300000 0x50000> ;
- reg-names = "llcc_base", "llcc_broadcast_base";
- interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
- };
diff --git a/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml b/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml
index b6f57d7..84dc6b7 100644
--- a/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml
+++ b/Documentation/devicetree/bindings/arm/nvidia,tegra194-ccplex.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/nvidia,tegra194-ccplex.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/nvidia,tegra194-ccplex.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: NVIDIA Tegra194 CPU Complex
@@ -25,7 +25,7 @@
- nvidia,tegra194-ccplex
nvidia,bpmp:
- $ref: '/schemas/types.yaml#/definitions/phandle'
+ $ref: /schemas/types.yaml#/definitions/phandle
description: |
Specifies the bpmp node that needs to be queried to get
operating point data for all CPUs.
diff --git a/Documentation/devicetree/bindings/arm/oxnas.txt b/Documentation/devicetree/bindings/arm/oxnas.txt
deleted file mode 100644
index ac64e60..0000000
--- a/Documentation/devicetree/bindings/arm/oxnas.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Oxford Semiconductor OXNAS SoCs Family device tree bindings
--------------------------------------------
-
-Boards with the OX810SE SoC shall have the following properties:
- Required root node property:
- compatible: "oxsemi,ox810se"
-
-Boards with the OX820 SoC shall have the following properties:
- Required root node property:
- compatible: "oxsemi,ox820"
-
-Board compatible values:
- - "wd,mbwe" (OX810SE)
- - "cloudengines,pogoplugv3" (OX820)
diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index dbb6f3d..e14358b 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -20,6 +20,8 @@
items:
- enum:
- apm,potenza-pmu
+ - apple,avalanche-pmu
+ - apple,blizzard-pmu
- apple,firestorm-pmu
- apple,icestorm-pmu
- arm,armv8-pmuv3 # Only for s/w models
diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml
index 1bb24d4..d9dd256 100644
--- a/Documentation/devicetree/bindings/arm/qcom.yaml
+++ b/Documentation/devicetree/bindings/arm/qcom.yaml
@@ -30,8 +30,10 @@
apq8084
apq8096
ipq4018
+ ipq5332
ipq6018
ipq8074
+ ipq9574
mdm9615
msm8226
msm8916
@@ -45,7 +47,10 @@
msm8996
msm8998
qcs404
+ qcm2290
qdu1000
+ qrb2210
+ qrb4210
qru1000
sa8155p
sa8540p
@@ -80,6 +85,9 @@
The 'board' element must be one of the following strings:
adp
+ ap-al02-c7
+ ap-mi01.2
+ ap-mi01.6
cdp
cp01-c1
dragonboard
@@ -90,6 +98,7 @@
liquid
mtp
qrd
+ rb2
ride
sbc
x100
@@ -226,6 +235,7 @@
- thwc,uf896
- thwc,ufi001c
- wingtech,wt88047
+ - yiming,uz801-v3
- const: qcom,msm8916
- items:
@@ -322,6 +332,12 @@
- items:
- enum:
+ - qcom,ipq5332-ap-mi01.2
+ - qcom,ipq5332-ap-mi01.6
+ - const: qcom,ipq5332
+
+ - items:
+ - enum:
- mikrotik,rb3011
- qcom,ipq8064-ap148
- const: qcom,ipq8064
@@ -333,12 +349,24 @@
- qcom,ipq8074-hk10-c2
- const: qcom,ipq8074
+ - items:
+ - enum:
+ - qcom,ipq9574-ap-al02-c7
+ - const: qcom,ipq9574
+
- description: Sierra Wireless MangOH Green with WP8548 Module
items:
- const: swir,mangoh-green-wp8548
- const: swir,wp8548
- const: qcom,mdm9615
+ - description: Qualcomm Technologies, Inc. Robotics RB1
+ items:
+ - enum:
+ - qcom,qrb2210-rb1
+ - const: qcom,qrb2210
+ - const: qcom,qcm2290
+
- description: Qualcomm Technologies, Inc. Distributed Unit 1000 platform
items:
- enum:
@@ -850,6 +878,12 @@
- items:
- enum:
+ - qcom,qrb4210-rb2
+ - const: qcom,qrb4210
+ - const: qcom,sm4250
+
+ - items:
+ - enum:
- lenovo,j606f
- const: qcom,sm6115p
- const: qcom,sm6115
@@ -857,6 +891,7 @@
- items:
- enum:
- sony,pdx201
+ - xiaomi,laurel-sprout
- const: qcom,sm6125
- items:
@@ -913,6 +948,7 @@
- items:
- enum:
- qcom,sm8550-mtp
+ - qcom,sm8550-qrd
- const: qcom,sm8550
# Board compatibles go above
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index 35f74ed..ec141c93 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -185,9 +185,11 @@
- const: firefly,rk3566-roc-pc
- const: rockchip,rk3566
- - description: FriendlyElec NanoPi R2S
+ - description: FriendlyElec NanoPi R2 series boards
items:
- - const: friendlyarm,nanopi-r2s
+ - enum:
+ - friendlyarm,nanopi-r2c
+ - friendlyarm,nanopi-r2s
- const: rockchip,rk3328
- description: FriendlyElec NanoPi4 series boards
@@ -201,6 +203,13 @@
- friendlyarm,nanopi-r4s-enterprise
- const: rockchip,rk3399
+ - description: FriendlyElec NanoPi R5 series boards
+ items:
+ - enum:
+ - friendlyarm,nanopi-r5c
+ - friendlyarm,nanopi-r5s
+ - const: rockchip,rk3568
+
- description: GeekBuying GeekBox
items:
- const: geekbuying,geekbox
@@ -533,6 +542,11 @@
- khadas,edge-v
- const: rockchip,rk3399
+ - description: Khadas Edge2 series boards
+ items:
+ - const: khadas,edge2
+ - const: rockchip,rk3588s
+
- description: Kobol Helios64
items:
- const: kobol,helios64
@@ -817,9 +831,11 @@
- const: tronsmart,orion-r68-meta
- const: rockchip,rk3368
- - description: Xunlong Orange Pi R1 Plus
+ - description: Xunlong Orange Pi R1 Plus / LTS
items:
- - const: xunlong,orangepi-r1-plus
+ - enum:
+ - xunlong,orangepi-r1-plus
+ - xunlong,orangepi-r1-plus-lts
- const: rockchip,rk3328
- description: Zkmagic A95X Z2
diff --git a/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml b/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml
deleted file mode 100644
index 6096c08..0000000
--- a/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/arm/socionext/socionext,uniphier-system-cache.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: UniPhier outer cache controller
-
-description: |
- UniPhier ARM 32-bit SoCs are integrated with a full-custom outer cache
- controller system. All of them have a level 2 cache controller, and some
- have a level 3 cache controller as well.
-
-maintainers:
- - Masahiro Yamada <yamada.masahiro@socionext.com>
-
-properties:
- compatible:
- const: socionext,uniphier-system-cache
-
- reg:
- description: |
- should contain 3 regions: control register, revision register,
- operation register, in this order.
- maxItems: 3
-
- interrupts:
- description: |
- Interrupts can be used to notify the completion of cache operations.
- The number of interrupts should match to the number of CPU cores.
- The specified interrupts correspond to CPU0, CPU1, ... in this order.
- minItems: 1
- maxItems: 4
-
- cache-unified: true
-
- cache-size: true
-
- cache-sets: true
-
- cache-line-size: true
-
- cache-level:
- minimum: 2
- maximum: 3
-
- next-level-cache: true
-
-allOf:
- - $ref: /schemas/cache-controller.yaml#
-
-additionalProperties: false
-
-required:
- - compatible
- - reg
- - interrupts
- - cache-unified
- - cache-size
- - cache-sets
- - cache-line-size
- - cache-level
-
-examples:
- - |
- // System with L2.
- cache-controller@500c0000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c0000 0x2000>, <0x503c0100 0x4>, <0x506c0000 0x400>;
- interrupts = <0 174 4>, <0 175 4>, <0 190 4>, <0 191 4>;
- cache-unified;
- cache-size = <0x140000>;
- cache-sets = <512>;
- cache-line-size = <128>;
- cache-level = <2>;
- };
- - |
- // System with L2 and L3.
- // L2 should specify the next level cache by 'next-level-cache'.
- l2: cache-controller@500c0000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c0000 0x2000>, <0x503c0100 0x8>, <0x506c0000 0x400>;
- interrupts = <0 190 4>, <0 191 4>;
- cache-unified;
- cache-size = <0x200000>;
- cache-sets = <512>;
- cache-line-size = <128>;
- cache-level = <2>;
- next-level-cache = <&l3>;
- };
-
- l3: cache-controller@500c8000 {
- compatible = "socionext,uniphier-system-cache";
- reg = <0x500c8000 0x2000>, <0x503c8100 0x8>, <0x506c8000 0x400>;
- interrupts = <0 174 4>, <0 175 4>;
- cache-unified;
- cache-size = <0x200000>;
- cache-sets = <512>;
- cache-line-size = <256>;
- cache-level = <3>;
- };
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
index b2b156c..ad8e51a 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -20,6 +20,7 @@
- st,stm32-syscfg
- st,stm32-power-config
- st,stm32-tamp
+ - st,stm32f4-gcan
- const: syscon
- items:
- const: st,stm32-tamp
@@ -42,6 +43,7 @@
contains:
enum:
- st,stm32mp157-syscfg
+ - st,stm32f4-gcan
then:
required:
- clocks
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 3ad1cd5..013821f 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -366,6 +366,12 @@
- const: lamobo,lamobo-r1
- const: allwinner,sun7i-a20
+ - description: Lctech Pi F1C200s
+ items:
+ - const: lctech,pi-f1c200s
+ - const: allwinner,suniv-f1c200s
+ - const: allwinner,suniv-f1c100s
+
- description: Libre Computer Board ALL-H3-CC H2+
items:
- const: libretech,all-h3-cc-h2-plus
@@ -807,6 +813,13 @@
- const: sinlinx,sina33
- const: allwinner,sun8i-a33
+ - description: SourceParts PopStick v1.1
+ items:
+ - const: sourceparts,popstick-v1.1
+ - const: sourceparts,popstick
+ - const: allwinner,suniv-f1c200s
+ - const: allwinner,suniv-f1c100s
+
- description: SL631 Action Camera with IMX179
items:
- const: allwinner,sl631-imx179
@@ -843,6 +856,11 @@
- const: wexler,tab7200
- const: allwinner,sun7i-a20
+ - description: MangoPi MQ-R board
+ items:
+ - const: widora,mangopi-mq-r-t113
+ - const: allwinner,sun8i-t113s
+
- description: WITS A31 Colombus Evaluation Board
items:
- const: wits,colombus
diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml
index 1f62253..0df41f5 100644
--- a/Documentation/devicetree/bindings/arm/tegra.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra.yaml
@@ -167,5 +167,14 @@
- const: nvidia,p3737-0000+p3701-0000
- const: nvidia,p3701-0000
- const: nvidia,tegra234
+ - description: Jetson Orin NX
+ items:
+ - const: nvidia,p3767-0000
+ - const: nvidia,tegra234
+ - description: Jetson Orin NX Engineering Reference Developer Kit
+ items:
+ - const: nvidia,p3768-0000+p3767-0000
+ - const: nvidia,p3767-0000
+ - const: nvidia,tegra234
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
index 6089a96..36dbd08 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: NVIDIA Tegra CPU COMPLEX CLUSTER area
@@ -29,7 +29,7 @@
maxItems: 1
nvidia,bpmp:
- $ref: '/schemas/types.yaml#/definitions/phandle'
+ $ref: /schemas/types.yaml#/definitions/phandle
description: |
Specifies the BPMP node that needs to be queried to get
operating point data for all CPUs.
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml
index 788a13f..5e0f1dc 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-axi2apb.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-axi2apb.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-axi2apb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: NVIDIA Tegra194 AXI2APB bridge
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
index dd3a477..d9c54c3 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: NVIDIA Tegra194 CBB 1.0
@@ -64,13 +64,13 @@
- description: secure interrupt
nvidia,axi2apb:
- $ref: '/schemas/types.yaml#/definitions/phandle'
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Specifies the node having all axi2apb bridges which need to be checked
for any error logged in their status register.
nvidia,apbmisc:
- $ref: '/schemas/types.yaml#/definitions/phandle'
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
Specifies the apbmisc node which need to be used for reading the ERD
register.
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml
index 4a00593..89191cf 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml
@@ -234,6 +234,7 @@
patternProperties:
"^[a-z0-9]+$":
type: object
+ additionalProperties: false
properties:
clocks:
@@ -252,6 +253,9 @@
for controlling a power-gate.
See ../reset/reset.txt for more details.
+ power-domains:
+ maxItems: 1
+
'#power-domain-cells':
const: 0
description: Must be 0.
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
index 44184ee..fcdf031 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra234-cbb.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra234-cbb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: NVIDIA Tegra CBB 2.0
diff --git a/Documentation/devicetree/bindings/arm/ti/k3.yaml b/Documentation/devicetree/bindings/arm/ti/k3.yaml
index a60a406..e1183f9 100644
--- a/Documentation/devicetree/bindings/arm/ti/k3.yaml
+++ b/Documentation/devicetree/bindings/arm/ti/k3.yaml
@@ -28,7 +28,9 @@
- description: K3 AM625 SoC
items:
- enum:
+ - beagle,am625-beagleplay
- ti,am625-sk
+ - ti,am62-lp-sk
- const: ti,am625
- description: K3 AM642 SoC
diff --git a/Documentation/devicetree/bindings/ata/ahci-common.yaml b/Documentation/devicetree/bindings/ata/ahci-common.yaml
index 94d72ae..7fdf409 100644
--- a/Documentation/devicetree/bindings/ata/ahci-common.yaml
+++ b/Documentation/devicetree/bindings/ata/ahci-common.yaml
@@ -59,7 +59,7 @@
const: sata-phy
hba-cap:
- $ref: '/schemas/types.yaml#/definitions/uint32'
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Bitfield of the HBA generic platform capabilities like Staggered
Spin-up or Mechanical Presence Switch support. It can be used to
@@ -67,7 +67,7 @@
in case if the system firmware hasn't done it.
ports-implemented:
- $ref: '/schemas/types.yaml#/definitions/uint32'
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Mask that indicates which ports the HBA supports. Useful if PI is not
programmed by the BIOS, which is true for some embedded SoC's.
@@ -110,7 +110,7 @@
description: Power regulator for SATA port target device
hba-port-cap:
- $ref: '/schemas/types.yaml#/definitions/uint32'
+ $ref: /schemas/types.yaml#/definitions/uint32
description:
Bitfield of the HBA port-specific platform capabilities like Hot
plugging, eSATA, FIS-based Switching, etc (see AHCI specification
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.yaml b/Documentation/devicetree/bindings/ata/ahci-platform.yaml
index 7dc2a2e..3586171 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.yaml
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.yaml
@@ -30,12 +30,12 @@
- marvell,armada-3700-ahci
- marvell,armada-8k-ahci
- marvell,berlin2q-ahci
+ - socionext,uniphier-pro4-ahci
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
required:
- compatible
-allOf:
- - $ref: "ahci-common.yaml#"
-
properties:
compatible:
oneOf:
@@ -45,6 +45,9 @@
- marvell,armada-8k-ahci
- marvell,berlin2-ahci
- marvell,berlin2q-ahci
+ - socionext,uniphier-pro4-ahci
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
- const: generic-ahci
- enum:
- cavium,octeon-7130-ahci
@@ -74,7 +77,8 @@
maxItems: 1
resets:
- maxItems: 1
+ minItems: 1
+ maxItems: 3
patternProperties:
"^sata-port@[0-9a-f]+$":
@@ -91,6 +95,43 @@
- reg
- interrupts
+allOf:
+ - $ref: ahci-common.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: socionext,uniphier-pro4-ahci
+ then:
+ properties:
+ resets:
+ items:
+ - description: reset line for the parent
+ - description: reset line for the glue logic
+ - description: reset line for the controller
+ required:
+ - resets
+ else:
+ if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - socionext,uniphier-pxs2-ahci
+ - socionext,uniphier-pxs3-ahci
+ then:
+ properties:
+ resets:
+ items:
+ - description: reset for the glue logic
+ - description: reset for the controller
+ required:
+ - resets
+ else:
+ properties:
+ resets:
+ maxItems: 1
+
unevaluatedProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
index 9b31f86..71364c60 100644
--- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@@ -32,7 +32,7 @@
maxItems: 1
iommus:
- maxItems: 1
+ maxItems: 4
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
index c4e4a9e..fe09095 100644
--- a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/ata/renesas,rcar-sata.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/ata/renesas,rcar-sata.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Renesas R-Car Serial-ATA Interface
diff --git a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
index fc4873d..49304a1 100644
--- a/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
+++ b/Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml
@@ -10,7 +10,7 @@
- Robin van der Gracht <robin@protonic.nl>
allOf:
- - $ref: "/schemas/input/matrix-keymap.yaml#"
+ - $ref: /schemas/input/matrix-keymap.yaml#
properties:
compatible:
@@ -72,7 +72,7 @@
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/input/input.h>
#include <dt-bindings/leds/common.h>
- i2c1 {
+ i2c {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
index 85c4a97..9845a18 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
@@ -46,6 +46,7 @@
# All other properties should be child nodes with unit-address and 'reg'
"^[a-zA-Z][a-zA-Z0-9,+\\-._]{0,63}@[0-9a-fA-F]+$":
type: object
+ additionalProperties: true
properties:
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
index bee5f53..24c939f 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -45,6 +45,7 @@
patternProperties:
"^.*@[0-9a-fA-F]+$":
type: object
+ additionalProperties: true
properties:
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
new file mode 100644
index 0000000..a8d40c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/microsoft,vmbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microsoft Hyper-V VMBus
+
+maintainers:
+ - Saurabh Sengar <ssengar@linux.microsoft.com>
+
+description:
+ VMBus is a software bus that implement the protocols for communication
+ between the root or host OS and guest OSs (virtual machines).
+
+properties:
+ compatible:
+ const: microsoft,vmbus
+
+ ranges: true
+
+ '#address-cells':
+ const: 2
+
+ '#size-cells':
+ const: 1
+
+required:
+ - compatible
+ - ranges
+ - '#address-cells'
+ - '#size-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ bus {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges;
+
+ vmbus@ff0000000 {
+ compatible = "microsoft,vmbus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0x0f 0xf0000000 0x0f 0xf0000000 0x10000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/palmbus.yaml b/Documentation/devicetree/bindings/bus/palmbus.yaml
index 30fa652..c36c1e9 100644
--- a/Documentation/devicetree/bindings/bus/palmbus.yaml
+++ b/Documentation/devicetree/bindings/bus/palmbus.yaml
@@ -36,6 +36,7 @@
# All other properties should be child nodes with unit-address and 'reg'
"@[0-9a-f]+$":
type: object
+ additionalProperties: true
properties:
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml b/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml
new file mode 100644
index 0000000..7f62ffb
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/xlnx,versal-net-cdx.yaml
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/xlnx,versal-net-cdx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AMD CDX bus controller
+
+description: |
+ CDX bus controller for AMD devices is implemented to dynamically
+ detect CDX bus and devices using the firmware.
+ The CDX bus manages multiple FPGA based hardware devices, which
+ can support network, crypto or any other specialized type of
+ devices. These FPGA based devices can be added/modified dynamically
+ on run-time.
+
+ All devices on the CDX bus will have a unique streamid (for IOMMU)
+ and a unique device ID (for MSI) corresponding to a requestor ID
+ (one to one associated with the device). The streamid and deviceid
+ are used to configure SMMU and GIC-ITS respectively.
+
+ iommu-map property is used to define the set of stream ids
+ corresponding to each device and the associated IOMMU.
+
+ The MSI writes are accompanied by sideband data (Device ID).
+ The msi-map property is used to associate the devices with the
+ device ID as well as the associated ITS controller.
+
+ rproc property (xlnx,rproc) is used to identify the remote processor
+ with which APU (Application Processor Unit) interacts to find out
+ the bus and device configuration.
+
+maintainers:
+ - Nipun Gupta <nipun.gupta@amd.com>
+ - Nikhil Agarwal <nikhil.agarwal@amd.com>
+
+properties:
+ compatible:
+ const: xlnx,versal-net-cdx
+
+ iommu-map: true
+
+ msi-map: true
+
+ xlnx,rproc:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ phandle to the remoteproc_r5 rproc node using which APU interacts
+ with remote processor.
+
+ ranges: true
+
+ "#address-cells":
+ enum: [1, 2]
+
+ "#size-cells":
+ enum: [1, 2]
+
+required:
+ - compatible
+ - iommu-map
+ - msi-map
+ - xlnx,rproc
+ - ranges
+ - "#address-cells"
+ - "#size-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ cdx {
+ compatible = "xlnx,versal-net-cdx";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ /* define map for RIDs 250-259 */
+ iommu-map = <250 &smmu 250 10>;
+ /* define msi map for RIDs 250-259 */
+ msi-map = <250 &its 250 10>;
+ xlnx,rproc = <&remoteproc_r5>;
+ ranges;
+ };
diff --git a/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
new file mode 100644
index 0000000..ec4f367
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/baikal,bt1-l2-ctl.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/baikal,bt1-l2-ctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 L2-cache Control Block
+
+maintainers:
+ - Serge Semin <fancer.lancer@gmail.com>
+
+description: |
+ By means of the System Controller Baikal-T1 SoC exposes a few settings to
+ tune the MIPS P5600 CM2 L2 cache performance up. In particular it's possible
+ to change the Tag, Data and Way-select RAM access latencies. Baikal-T1
+ L2-cache controller block is responsible for the tuning. Its DT node is
+ supposed to be a child of the system controller.
+
+properties:
+ compatible:
+ const: baikal,bt1-l2-ctl
+
+ reg:
+ maxItems: 1
+
+ baikal,l2-ws-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Way-select RAM accesses
+ default: 0
+ minimum: 0
+ maximum: 3
+
+ baikal,l2-tag-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Tag RAM accesses
+ default: 0
+ minimum: 0
+ maximum: 3
+
+ baikal,l2-data-latency:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Cycles of latency for Data RAM accesses
+ default: 1
+ minimum: 0
+ maximum: 3
+
+additionalProperties: false
+
+required:
+ - compatible
+
+examples:
+ - |
+ l2@1f04d028 {
+ compatible = "baikal,bt1-l2-ctl";
+ reg = <0x1f04d028 0x004>;
+
+ baikal,l2-ws-latency = <1>;
+ baikal,l2-tag-latency = <1>;
+ baikal,l2-data-latency = <2>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt b/Documentation/devicetree/bindings/cache/freescale-l2cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
rename to Documentation/devicetree/bindings/cache/freescale-l2cache.txt
diff --git a/Documentation/devicetree/bindings/cache/l2c2x0.yaml b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
new file mode 100644
index 0000000..d7840a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/l2c2x0.yaml
@@ -0,0 +1,242 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/l2c2x0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM L2 Cache Controller
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+description: |+
+ ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/
+ PL220/PL310 and variants) based level 2 cache controller. All these various
+ implementations of the L2 cache controller have compatible programming
+ models (Note 1). Some of the properties that are just prefixed "cache-*" are
+ taken from section 3.7.3 of the Devicetree Specification which can be found
+ at:
+ https://www.devicetree.org/specifications/
+
+ Note 1: The description in this document doesn't apply to integrated L2
+ cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These
+ integrated L2 controllers are assumed to be all preconfigured by
+ early secure boot code. Thus no need to deal with their configuration
+ in the kernel at all.
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - arm,pl310-cache
+ - arm,l220-cache
+ - arm,l210-cache
+ # DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
+ - bcm,bcm11351-a2-pl310-cache
+ # For Broadcom bcm11351 chipset where an
+ # offset needs to be added to the address before passing down to the L2
+ # cache controller
+ - brcm,bcm11351-a2-pl310-cache
+ # Marvell Controller designed to be
+ # compatible with the ARM one, with system cache mode (meaning
+ # maintenance operations on L1 are broadcasted to the L2 and L2
+ # performs the same operation).
+ - marvell,aurora-system-cache
+ # Marvell Controller designed to be
+ # compatible with the ARM one with outer cache mode.
+ - marvell,aurora-outer-cache
+ - items:
+ # Marvell Tauros3 cache controller, compatible
+ # with arm,pl310-cache controller.
+ - const: marvell,tauros3-cache
+ - const: arm,pl310-cache
+
+ cache-level:
+ const: 2
+
+ cache-unified: true
+ cache-size: true
+ cache-sets: true
+ cache-block-size: true
+ cache-line-size: true
+
+ reg:
+ maxItems: 1
+
+ arm,data-latency:
+ description: Cycles of latency for Data RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Minimum valid values are 1. Controllers
+ without setup latency control should use a value of 0.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 2
+ maxItems: 3
+ items:
+ minimum: 0
+ maximum: 8
+
+ arm,tag-latency:
+ description: Cycles of latency for Tag RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Controllers without setup latency control
+ should use 0. Controllers without separate read and write Tag RAM latency
+ values should only use the first cell.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 3
+ items:
+ minimum: 0
+ maximum: 8
+
+ arm,dirty-latency:
+ description: Cycles of latency for Dirty RAMs. This is a single cell.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 8
+
+ arm,filter-ranges:
+ description: <start length> Starting address and length of window to
+ filter. Addresses in the filter window are directed to the M1 port. Other
+ addresses will go to the M0 port.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ minItems: 2
+ maxItems: 2
+
+ arm,io-coherent:
+ description: indicates that the system is operating in an hardware
+ I/O coherent mode. Valid only when the arm,pl310-cache compatible
+ string is used.
+ type: boolean
+
+ interrupts:
+ # Either a single combined interrupt or up to 9 individual interrupts
+ minItems: 1
+ maxItems: 9
+
+ cache-id-part:
+ description: cache id part number to be used if it is not present
+ on hardware
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ wt-override:
+ description: If present then L2 is forced to Write through mode
+ type: boolean
+
+ arm,double-linefill:
+ description: Override double linefill enable setting. Enable if
+ non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,double-linefill-incr:
+ description: Override double linefill on INCR read. Enable
+ if non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,double-linefill-wrap:
+ description: Override double linefill on WRAP read. Enable
+ if non-zero, disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,prefetch-drop:
+ description: Override prefetch drop enable setting. Enable if non-zero,
+ disable if zero.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,prefetch-offset:
+ description: Override prefetch offset value.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4, 5, 6, 7, 15, 23, 31]
+
+ arm,shared-override:
+ description: The default behavior of the L220 or PL310 cache
+ controllers with respect to the shareable attribute is to transform "normal
+ memory non-cacheable transactions" into "cacheable no allocate" (for reads)
+ or "write through no write allocate" (for writes).
+ On systems where this may cause DMA buffer corruption, this property must
+ be specified to indicate that such transforms are precluded.
+ type: boolean
+
+ arm,parity-enable:
+ description: enable parity checking on the L2 cache (L220 or PL310).
+ type: boolean
+
+ arm,parity-disable:
+ description: disable parity checking on the L2 cache (L220 or PL310).
+ type: boolean
+
+ marvell,ecc-enable:
+ description: enable ECC protection on the L2 cache
+ type: boolean
+
+ arm,outer-sync-disable:
+ description: disable the outer sync operation on the L2 cache.
+ Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that
+ will randomly hang unless outer sync operations are disabled.
+ type: boolean
+
+ prefetch-data:
+ description: |
+ Data prefetch. Value: <0> (forcibly disable), <1>
+ (forcibly enable), property absent (retain settings set by firmware)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ prefetch-instr:
+ description: |
+ Instruction prefetch. Value: <0> (forcibly disable),
+ <1> (forcibly enable), property absent (retain settings set by
+ firmware)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,dynamic-clock-gating:
+ description: |
+ L2 dynamic clock gating. Value: <0> (forcibly
+ disable), <1> (forcibly enable), property absent (OS specific behavior,
+ preferably retain firmware settings)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,standby-mode:
+ description: L2 standby mode enable. Value <0> (forcibly disable),
+ <1> (forcibly enable), property absent (OS specific behavior,
+ preferably retain firmware settings)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+
+ arm,early-bresp-disable:
+ description: Disable the CA9 optimization Early BRESP (PL310)
+ type: boolean
+
+ arm,full-line-zero-disable:
+ description: Disable the CA9 optimization Full line of zero
+ write (PL310)
+ type: boolean
+
+required:
+ - compatible
+ - cache-unified
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ cache-controller@fff12000 {
+ compatible = "arm,pl310-cache";
+ reg = <0xfff12000 0x1000>;
+ arm,data-latency = <1 1 1>;
+ arm,tag-latency = <2 2 2>;
+ arm,filter-ranges = <0x80000000 0x8000000>;
+ cache-unified;
+ cache-level = <2>;
+ interrupts = <45>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt b/Documentation/devicetree/bindings/cache/marvell,feroceon-cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
rename to Documentation/devicetree/bindings/cache/marvell,feroceon-cache.txt
diff --git a/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt b/Documentation/devicetree/bindings/cache/marvell,tauros2-cache.txt
similarity index 100%
rename from Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
rename to Documentation/devicetree/bindings/cache/marvell,tauros2-cache.txt
diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
new file mode 100644
index 0000000..d8b9194
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/qcom,llcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Last Level Cache Controller
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ LLCC (Last Level Cache Controller) provides last level of cache memory in SoC,
+ that can be shared by multiple clients. Clients here are different cores in the
+ SoC, the idea is to minimize the local caches at the clients and migrate to
+ common pool of memory. Cache memory is divided into partitions called slices
+ which are assigned to clients. Clients can query the slice details, activate
+ and deactivate them.
+
+properties:
+ compatible:
+ enum:
+ - qcom,sc7180-llcc
+ - qcom,sc7280-llcc
+ - qcom,sc8180x-llcc
+ - qcom,sc8280xp-llcc
+ - qcom,sdm845-llcc
+ - qcom,sm6350-llcc
+ - qcom,sm7150-llcc
+ - qcom,sm8150-llcc
+ - qcom,sm8250-llcc
+ - qcom,sm8350-llcc
+ - qcom,sm8450-llcc
+ - qcom,sm8550-llcc
+
+ reg:
+ minItems: 2
+ maxItems: 9
+
+ reg-names:
+ minItems: 2
+ maxItems: 9
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7180-llcc
+ - qcom,sm6350-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc7280-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sc8180x-llcc
+ - qcom,sc8280xp-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC4 base register region
+ - description: LLCC5 base register region
+ - description: LLCC6 base register region
+ - description: LLCC7 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc4_base
+ - const: llcc5_base
+ - const: llcc6_base
+ - const: llcc7_base
+ - const: llcc_broadcast_base
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,sdm845-llcc
+ - qcom,sm8150-llcc
+ - qcom,sm8250-llcc
+ - qcom,sm8350-llcc
+ - qcom,sm8450-llcc
+ then:
+ properties:
+ reg:
+ items:
+ - description: LLCC0 base register region
+ - description: LLCC1 base register region
+ - description: LLCC2 base register region
+ - description: LLCC3 base register region
+ - description: LLCC broadcast base register region
+ reg-names:
+ items:
+ - const: llcc0_base
+ - const: llcc1_base
+ - const: llcc2_base
+ - const: llcc3_base
+ - const: llcc_broadcast_base
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ system-cache-controller@1100000 {
+ compatible = "qcom,sdm845-llcc";
+ reg = <0 0x01100000 0 0x50000>, <0 0x01180000 0 0x50000>,
+ <0 0x01200000 0 0x50000>, <0 0x01280000 0 0x50000>,
+ <0 0x01300000 0 0x50000>;
+ reg-names = "llcc0_base", "llcc1_base", "llcc2_base",
+ "llcc3_base", "llcc_broadcast_base";
+ interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
new file mode 100644
index 0000000..8a6a78e
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) 2020 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/sifive,ccache0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive Composable Cache Controller
+
+maintainers:
+ - Paul Walmsley <paul.walmsley@sifive.com>
+
+description:
+ The SiFive Composable Cache Controller is used to provide access to fast copies
+ of memory for masters in a Core Complex. The Composable Cache Controller also
+ acts as directory-based coherency manager.
+ All the properties in ePAPR/DeviceTree specification applies for this platform.
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,ccache0
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
+
+ required:
+ - compatible
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - sifive,ccache0
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
+ - const: cache
+ - items:
+ - const: starfive,jh7110-ccache
+ - const: sifive,ccache0
+ - const: cache
+ - items:
+ - const: microchip,mpfs-ccache
+ - const: sifive,fu540-c000-ccache
+ - const: cache
+
+ cache-block-size:
+ const: 64
+
+ cache-level:
+ enum: [2, 3]
+
+ cache-sets:
+ enum: [1024, 2048]
+
+ cache-size:
+ const: 2097152
+
+ cache-unified: true
+
+ interrupts:
+ minItems: 3
+ items:
+ - description: DirError interrupt
+ - description: DataError interrupt
+ - description: DataFail interrupt
+ - description: DirFail interrupt
+
+ reg:
+ maxItems: 1
+
+ next-level-cache: true
+
+ memory-region:
+ maxItems: 1
+ description: |
+ The reference to the reserved-memory for the L2 Loosely Integrated Memory region.
+ The reserved memory node should be defined as per the bindings in reserved-memory.txt.
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,fu740-c000-ccache
+ - starfive,jh7110-ccache
+ - microchip,mpfs-ccache
+
+ then:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError, DataFail, DirFail signals.
+ minItems: 4
+
+ else:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError and DataFail signals.
+ maxItems: 3
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,fu740-c000-ccache
+ - starfive,jh7110-ccache
+
+ then:
+ properties:
+ cache-sets:
+ const: 2048
+
+ else:
+ properties:
+ cache-sets:
+ const: 1024
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: sifive,ccache0
+
+ then:
+ properties:
+ cache-level:
+ enum: [2, 3]
+
+ else:
+ properties:
+ cache-level:
+ const: 2
+
+additionalProperties: false
+
+required:
+ - compatible
+ - cache-block-size
+ - cache-level
+ - cache-sets
+ - cache-size
+ - cache-unified
+ - interrupts
+ - reg
+
+examples:
+ - |
+ cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ reg = <0x2010000 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <1>,
+ <2>,
+ <3>;
+ next-level-cache = <&L25>;
+ memory-region = <&l2_lim>;
+ };
diff --git a/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml b/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml
new file mode 100644
index 0000000..3196263
--- /dev/null
+++ b/Documentation/devicetree/bindings/cache/socionext,uniphier-system-cache.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/socionext,uniphier-system-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: UniPhier outer cache controller
+
+description: |
+ UniPhier ARM 32-bit SoCs are integrated with a full-custom outer cache
+ controller system. All of them have a level 2 cache controller, and some
+ have a level 3 cache controller as well.
+
+maintainers:
+ - Masahiro Yamada <yamada.masahiro@socionext.com>
+
+properties:
+ compatible:
+ const: socionext,uniphier-system-cache
+
+ reg:
+ description: |
+ should contain 3 regions: control register, revision register,
+ operation register, in this order.
+ maxItems: 3
+
+ interrupts:
+ description: |
+ Interrupts can be used to notify the completion of cache operations.
+ The number of interrupts should match to the number of CPU cores.
+ The specified interrupts correspond to CPU0, CPU1, ... in this order.
+ minItems: 1
+ maxItems: 4
+
+ cache-unified: true
+
+ cache-size: true
+
+ cache-sets: true
+
+ cache-line-size: true
+
+ cache-level:
+ minimum: 2
+ maximum: 3
+
+ next-level-cache: true
+
+allOf:
+ - $ref: /schemas/cache-controller.yaml#
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - cache-unified
+ - cache-size
+ - cache-sets
+ - cache-line-size
+ - cache-level
+
+examples:
+ - |
+ // System with L2.
+ cache-controller@500c0000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c0000 0x2000>, <0x503c0100 0x4>, <0x506c0000 0x400>;
+ interrupts = <0 174 4>, <0 175 4>, <0 190 4>, <0 191 4>;
+ cache-unified;
+ cache-size = <0x140000>;
+ cache-sets = <512>;
+ cache-line-size = <128>;
+ cache-level = <2>;
+ };
+ - |
+ // System with L2 and L3.
+ // L2 should specify the next level cache by 'next-level-cache'.
+ l2: cache-controller@500c0000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c0000 0x2000>, <0x503c0100 0x8>, <0x506c0000 0x400>;
+ interrupts = <0 190 4>, <0 191 4>;
+ cache-unified;
+ cache-size = <0x200000>;
+ cache-sets = <512>;
+ cache-line-size = <128>;
+ cache-level = <2>;
+ next-level-cache = <&l3>;
+ };
+
+ l3: cache-controller@500c8000 {
+ compatible = "socionext,uniphier-system-cache";
+ reg = <0x500c8000 0x2000>, <0x503c8100 0x8>, <0x506c8000 0x400>;
+ interrupts = <0 174 4>, <0 175 4>;
+ cache-unified;
+ cache-size = <0x200000>;
+ cache-sets = <512>;
+ cache-line-size = <256>;
+ cache-level = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
index defcf1e1..3b0548c 100644
--- a/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
+++ b/Documentation/devicetree/bindings/chrome/google,cros-ec-typec.yaml
@@ -41,7 +41,7 @@
examples:
- |+
- spi0 {
+ spi {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/chrome/google,cros-kbd-led-backlight.yaml b/Documentation/devicetree/bindings/chrome/google,cros-kbd-led-backlight.yaml
index 40244d0..c94ab8f 100644
--- a/Documentation/devicetree/bindings/chrome/google,cros-kbd-led-backlight.yaml
+++ b/Documentation/devicetree/bindings/chrome/google,cros-kbd-led-backlight.yaml
@@ -20,7 +20,7 @@
examples:
- |
- spi0 {
+ spi {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/clock/apple,nco.yaml b/Documentation/devicetree/bindings/clock/apple,nco.yaml
index 74eab5c..8b8411d 100644
--- a/Documentation/devicetree/bindings/clock/apple,nco.yaml
+++ b/Documentation/devicetree/bindings/clock/apple,nco.yaml
@@ -23,6 +23,7 @@
- enum:
- apple,t6000-nco
- apple,t8103-nco
+ - apple,t8112-nco
- const: apple,nco
clocks:
diff --git a/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml b/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
index 90eadf6..b5533f8 100644
--- a/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
+++ b/Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
@@ -81,11 +81,11 @@
maxItems: 1
lock-offset:
- $ref: '/schemas/types.yaml#/definitions/uint32'
+ $ref: /schemas/types.yaml#/definitions/uint32
description: Offset to the unlocking register for the oscillator
vco-offset:
- $ref: '/schemas/types.yaml#/definitions/uint32'
+ $ref: /schemas/types.yaml#/definitions/uint32
description: Offset to the VCO register for the oscillator
deprecated: true
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml b/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml
new file mode 100644
index 0000000..199818b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm63268-timer-clocks.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/brcm,bcm63268-timer-clocks.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM63268 Timer Clock and Reset Device Tree Bindings
+
+maintainers:
+ - Álvaro Fernández Rojas <noltari@gmail.com>
+
+properties:
+ compatible:
+ const: brcm,bcm63268-timer-clocks
+
+ reg:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+ "#reset-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - "#clock-cells"
+ - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ timer_clk: clock-controller@100000ac {
+ compatible = "brcm,bcm63268-timer-clocks";
+ reg = <0x100000ac 0x4>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml b/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml
new file mode 100644
index 0000000..ff96004
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/imx8mp-audiomix.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8MP AudioMIX Block Control Binding
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+
+description: |
+ NXP i.MX8M Plus AudioMIX is dedicated clock muxing and gating IP
+ used to control Audio related clock on the SoC.
+
+properties:
+ compatible:
+ const: fsl,imx8mp-audio-blk-ctrl
+
+ reg:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+ clocks:
+ minItems: 7
+ maxItems: 7
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: sai1
+ - const: sai2
+ - const: sai3
+ - const: sai5
+ - const: sai6
+ - const: sai7
+
+ '#clock-cells':
+ const: 1
+ description:
+ The clock consumer should specify the desired clock by having the clock
+ ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mp-clock.h
+ for the full list of i.MX8MP IMX8MP_CLK_AUDIOMIX_ clock IDs.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - power-domains
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ # Clock Control Module node:
+ - |
+ #include <dt-bindings/clock/imx8mp-clock.h>
+
+ clock-controller@30e20000 {
+ compatible = "fsl,imx8mp-audio-blk-ctrl";
+ reg = <0x30e20000 0x10000>;
+ #clock-cells = <1>;
+ clocks = <&clk IMX8MP_CLK_AUDIO_ROOT>,
+ <&clk IMX8MP_CLK_SAI1>,
+ <&clk IMX8MP_CLK_SAI2>,
+ <&clk IMX8MP_CLK_SAI3>,
+ <&clk IMX8MP_CLK_SAI5>,
+ <&clk IMX8MP_CLK_SAI6>,
+ <&clk IMX8MP_CLK_SAI7>;
+ clock-names = "ahb",
+ "sai1", "sai2", "sai3",
+ "sai5", "sai6", "sai7";
+ power-domains = <&pgc_audio>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml b/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml
new file mode 100644
index 0000000..01561a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/loongson,ls1x-clk.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/loongson,ls1x-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1 Clock Controller
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - loongson,ls1b-clk
+ - loongson,ls1c-clk
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#clock-cells":
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ clkc: clock-controller@1fe78030 {
+ compatible = "loongson,ls1b-clk";
+ reg = <0x1fe78030 0x8>;
+
+ clocks = <&xtal>;
+ #clock-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml
index dae25db..372c1d7 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/clock/mediatek,apmixedsys.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/clock/mediatek,apmixedsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek AP Mixedsys Controller
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml
index cfd042a..d00327d 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8186-fhctl.yaml
@@ -16,7 +16,12 @@
properties:
compatible:
- const: mediatek,mt8186-fhctl
+ enum:
+ - mediatek,mt6795-fhctl
+ - mediatek,mt8173-fhctl
+ - mediatek,mt8186-fhctl
+ - mediatek,mt8192-fhctl
+ - mediatek,mt8195-fhctl
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
new file mode 100644
index 0000000..d7214d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8188-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Functional Clock Controller for MT8188
+
+maintainers:
+ - Garmin Chang <garmin.chang@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The devices provide clock gate control in different IP blocks.
+
+properties:
+ compatible:
+ enum:
+ - mediatek,mt8188-adsp-audio26m
+ - mediatek,mt8188-camsys
+ - mediatek,mt8188-camsys-rawa
+ - mediatek,mt8188-camsys-rawb
+ - mediatek,mt8188-camsys-yuva
+ - mediatek,mt8188-camsys-yuvb
+ - mediatek,mt8188-ccusys
+ - mediatek,mt8188-imgsys
+ - mediatek,mt8188-imgsys-wpe1
+ - mediatek,mt8188-imgsys-wpe2
+ - mediatek,mt8188-imgsys-wpe3
+ - mediatek,mt8188-imgsys1-dip-nr
+ - mediatek,mt8188-imgsys1-dip-top
+ - mediatek,mt8188-imp-iic-wrap-c
+ - mediatek,mt8188-imp-iic-wrap-en
+ - mediatek,mt8188-imp-iic-wrap-w
+ - mediatek,mt8188-ipesys
+ - mediatek,mt8188-mfgcfg
+ - mediatek,mt8188-vdecsys
+ - mediatek,mt8188-vdecsys-soc
+ - mediatek,mt8188-vencsys
+ - mediatek,mt8188-vppsys0
+ - mediatek,mt8188-vppsys1
+ - mediatek,mt8188-wpesys
+ - mediatek,mt8188-wpesys-vpp0
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@11283000 {
+ compatible = "mediatek,mt8188-imp-iic-wrap-c";
+ reg = <0x11283000 0x1000>;
+ #clock-cells = <1>;
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml
new file mode 100644
index 0000000..4cf8d3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt8188-sys-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek System Clock Controller for MT8188
+
+maintainers:
+ - Garmin Chang <garmin.chang@mediatek.com>
+
+description: |
+ The clock architecture in MediaTek like below
+ PLLs -->
+ dividers -->
+ muxes
+ -->
+ clock gate
+
+ The apmixedsys provides most of PLLs which generated from SoC 26m.
+ The topckgen provides dividers and muxes which provide the clock source to other IP blocks.
+ The infracfg_ao provides clock gate in peripheral and infrastructure IP blocks.
+ The mcusys provides mux control to select the clock source in AP MCU.
+ The device nodes also provide the system control capacity for configuration.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt8188-apmixedsys
+ - mediatek,mt8188-infracfg-ao
+ - mediatek,mt8188-pericfg-ao
+ - mediatek,mt8188-topckgen
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ clock-controller@10000000 {
+ compatible = "mediatek,mt8188-topckgen", "syscon";
+ reg = <0x10000000 0x1000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml
index 0fdf564..6d087de 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/clock/mediatek,topckgen.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/clock/mediatek,topckgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
title: MediaTek Top Clock Generator Controller
diff --git a/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
index 525ebaa..659669b 100644
--- a/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,a53pll.yaml
@@ -16,6 +16,7 @@
properties:
compatible:
enum:
+ - qcom,ipq5332-a53pll
- qcom,ipq6018-a53pll
- qcom,ipq8074-a53pll
- qcom,msm8916-a53pll
@@ -45,14 +46,14 @@
additionalProperties: false
examples:
- #Example 1 - A53 PLL found on MSM8916 devices
+ # Example 1 - A53 PLL found on MSM8916 devices
- |
a53pll: clock@b016000 {
compatible = "qcom,msm8916-a53pll";
reg = <0xb016000 0x40>;
#clock-cells = <0>;
};
- #Example 2 - A53 PLL found on IPQ6018 devices
+ # Example 2 - A53 PLL found on IPQ6018 devices
- |
a53pll_ipq: clock-controller@b116000 {
compatible = "qcom,ipq6018-a53pll";
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml
new file mode 100644
index 0000000..6ebaef2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-ipq4019.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ4019
+
+maintainers:
+ - Stephen Boyd <sboyd@kernel.org>
+ - Taniya Das <tdas@codeaurora.org>
+ - Robert Marko <robert.markoo@sartura.hr>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ4019.
+
+ See also:: include/dt-bindings/clock/qcom,gcc-ipq4019.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,gcc-ipq4019
+
+ clocks:
+ items:
+ - description: board XO clock
+ - description: sleep clock
+
+ clock-names:
+ items:
+ - const: xo
+ - const: sleep_clk
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,gcc-ipq4019";
+ reg = <0x1800000 0x60000>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ #reset-cells = <1>;
+ clocks = <&xo>, <&sleep_clk>;
+ clock-names = "xo", "sleep_clk";
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml
index 6279a59..b914625 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml
@@ -4,20 +4,25 @@
$id: http://devicetree.org/schemas/clock/qcom,gcc-msm8909.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Qualcomm Global Clock & Reset Controller on MSM8909
+title: Qualcomm Global Clock & Reset Controller on MSM8909, MSM8917 and QM215
maintainers:
- Stephan Gerhold <stephan@gerhold.net>
description: |
Qualcomm global clock control module provides the clocks, resets and power
- domains on MSM8909.
+ domains on MSM8909, MSM8917 or QM215.
- See also:: include/dt-bindings/clock/qcom,gcc-msm8909.h
+ See also::
+ include/dt-bindings/clock/qcom,gcc-msm8909.h
+ include/dt-bindings/clock/qcom,gcc-msm8917.h
properties:
compatible:
- const: qcom,gcc-msm8909
+ enum:
+ - qcom,gcc-msm8909
+ - qcom,gcc-msm8917
+ - qcom,gcc-qm215
clocks:
items:
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-other.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-other.yaml
index 2e8acca6..ae01e77 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc-other.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc-other.yaml
@@ -15,7 +15,6 @@
domains.
See also::
- include/dt-bindings/clock/qcom,gcc-ipq4019.h
include/dt-bindings/clock/qcom,gcc-ipq6018.h
include/dt-bindings/reset/qcom,gcc-ipq6018.h
include/dt-bindings/clock/qcom,gcc-msm8953.h
@@ -29,7 +28,6 @@
properties:
compatible:
enum:
- - qcom,gcc-ipq4019
- qcom,gcc-ipq6018
- qcom,gcc-mdm9607
- qcom,gcc-msm8953
diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
index db53eb2..1e3dc9d 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
@@ -15,6 +15,7 @@
See also::
include/dt-bindings/clock/qcom,gpucc-sdm845.h
+ include/dt-bindings/clock/qcom,gpucc-sa8775p.h
include/dt-bindings/clock/qcom,gpucc-sc7180.h
include/dt-bindings/clock/qcom,gpucc-sc7280.h
include/dt-bindings/clock/qcom,gpucc-sc8280xp.h
@@ -27,6 +28,7 @@
compatible:
enum:
- qcom,sdm845-gpucc
+ - qcom,sa8775p-gpucc
- qcom,sc7180-gpucc
- qcom,sc7280-gpucc
- qcom,sc8180x-gpucc
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml
new file mode 100644
index 0000000..718fe06
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq5332-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ5332
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ5332.
+
+ See also:: include/dt-bindings/clock/qcom,gcc-ipq5332.h
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+properties:
+ compatible:
+ const: qcom,ipq5332-gcc
+
+ clocks:
+ items:
+ - description: Board XO clock source
+ - description: Sleep clock source
+ - description: PCIE 2lane PHY pipe clock source
+ - description: PCIE 2lane x1 PHY pipe clock source (For second lane)
+ - description: USB PCIE wrapper pipe clock source
+
+required:
+ - compatible
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,ipq5332-gcc";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo_board>,
+ <&sleep_clk>,
+ <&pcie_2lane_phy_pipe_clk>,
+ <&pcie_2lane_phy_pipe_clk_x1>,
+ <&usb_pcie_wrapper_pipe_clk>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ #reset-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml
new file mode 100644
index 0000000..afc68eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,ipq9574-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on IPQ9574
+
+maintainers:
+ - Anusha Rao <quic_anusha@quicinc.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on IPQ9574
+
+ See also::
+ include/dt-bindings/clock/qcom,ipq9574-gcc.h
+ include/dt-bindings/reset/qcom,ipq9574-gcc.h
+
+properties:
+ compatible:
+ const: qcom,ipq9574-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Sleep clock source
+ - description: Bias PLL ubi clock source
+ - description: PCIE30 PHY0 pipe clock source
+ - description: PCIE30 PHY1 pipe clock source
+ - description: PCIE30 PHY2 pipe clock source
+ - description: PCIE30 PHY3 pipe clock source
+ - description: USB3 PHY pipe clock source
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ clock-controller@1800000 {
+ compatible = "qcom,ipq9574-gcc";
+ reg = <0x01800000 0x80000>;
+ clocks = <&xo_board_clk>,
+ <&sleep_clk>,
+ <&bias_pll_ubi_nc_clk>,
+ <&pcie30_phy0_pipe_clk>,
+ <&pcie30_phy1_pipe_clk>,
+ <&pcie30_phy2_pipe_clk>,
+ <&pcie30_phy3_pipe_clk>,
+ <&usb3phy_0_cc_pipe_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml b/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml
new file mode 100644
index 0000000..a466e4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kpss-acc-v1.yaml
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,kpss-acc-v1.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Krait Processor Sub-system (KPSS) Application Clock Controller (ACC) v1
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ The KPSS ACC provides clock, power domain, and reset control to a Krait CPU.
+ There is one ACC register region per CPU within the KPSS remapped region as
+ well as an alias register region that remaps accesses to the ACC associated
+ with the CPU accessing the region. ACC v1 is currently used as a
+ clock-controller for enabling the cpu and hanling the aux clocks.
+
+properties:
+ compatible:
+ const: qcom,kpss-acc-v1
+
+ reg:
+ items:
+ - description: Base address and size of the register region
+ - description: Optional base address and size of the alias register region
+ minItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pll8_vote
+ - const: pxo
+
+ clock-output-names:
+ description: Name of the aux clock. Krait can have at most 4 cpu.
+ enum:
+ - acpu0_aux
+ - acpu1_aux
+ - acpu2_aux
+ - acpu3_aux
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - clock-output-names
+ - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-ipq806x.h>
+
+ clock-controller@2088000 {
+ compatible = "qcom,kpss-acc-v1";
+ reg = <0x02088000 0x1000>, <0x02008000 0x1000>;
+ clocks = <&gcc PLL8_VOTE>, <&pxo_board>;
+ clock-names = "pll8_vote", "pxo";
+ clock-output-names = "acpu0_aux";
+ #clock-cells = <0>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml
new file mode 100644
index 0000000..88b7672
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kpss-gcc.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,kpss-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Krait Processor Sub-system (KPSS) Global Clock Controller (GCC)
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description:
+ Krait Processor Sub-system (KPSS) Global Clock Controller (GCC). Used
+ to control L2 mux (in the current implementation) and provide access
+ to the kpss-gcc registers.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - qcom,kpss-gcc-ipq8064
+ - qcom,kpss-gcc-apq8064
+ - qcom,kpss-gcc-msm8974
+ - qcom,kpss-gcc-msm8960
+ - qcom,kpss-gcc-msm8660
+ - qcom,kpss-gcc-mdm9615
+ - const: qcom,kpss-gcc
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 2
+ maxItems: 2
+
+ clock-names:
+ items:
+ - const: pll8_vote
+ - const: pxo
+
+ '#clock-cells':
+ const: 0
+
+required:
+ - compatible
+ - reg
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,kpss-gcc-ipq8064
+ - qcom,kpss-gcc-apq8064
+ - qcom,kpss-gcc-msm8974
+ - qcom,kpss-gcc-msm8960
+then:
+ required:
+ - clocks
+ - clock-names
+ - '#clock-cells'
+else:
+ properties:
+ clock: false
+ clock-names: false
+ '#clock-cells': false
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-ipq806x.h>
+
+ clock-controller@2011000 {
+ compatible = "qcom,kpss-gcc-ipq8064", "qcom,kpss-gcc", "syscon";
+ reg = <0x2011000 0x1000>;
+ clocks = <&gcc PLL8_VOTE>, <&pxo_board>;
+ clock-names = "pll8_vote", "pxo";
+ #clock-cells = <0>;
+ };
+
+ - |
+ clock-controller@2011000 {
+ compatible = "qcom,kpss-gcc-mdm9615", "qcom,kpss-gcc", "syscon";
+ reg = <0x02011000 0x1000>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml
index 2a95bf8..3665dd3 100644
--- a/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml
@@ -31,6 +31,7 @@
- qcom,rpmcc-msm8660
- qcom,rpmcc-msm8909
- qcom,rpmcc-msm8916
+ - qcom,rpmcc-msm8917
- qcom,rpmcc-msm8936
- qcom,rpmcc-msm8953
- qcom,rpmcc-msm8974
@@ -107,6 +108,7 @@
- qcom,rpmcc-mdm9607
- qcom,rpmcc-msm8226
- qcom,rpmcc-msm8916
+ - qcom,rpmcc-msm8917
- qcom,rpmcc-msm8936
- qcom,rpmcc-msm8953
- qcom,rpmcc-msm8974
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml
index 6151fde..97c6bd9 100644
--- a/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml
@@ -41,6 +41,12 @@
- const: qdsp6ss
- const: top_cc
+ qcom,adsp-pil-mode:
+ description:
+ Indicates if the LPASS would be brought out of reset using
+ remoteproc peripheral loader.
+ type: boolean
+
required:
- compatible
- reg
@@ -60,6 +66,7 @@
reg-names = "qdsp6ss", "top_cc";
clocks = <&gcc GCC_CFG_NOC_LPASS_CLK>;
clock-names = "iface";
+ qcom,adsp-pil-mode;
#clock-cells = <1>;
};
...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml
new file mode 100644
index 0000000..cf19f44
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6115-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6115
+
+maintainers:
+ - Konrad Dybcio <konrad.dybcio@linaro.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also:: include/dt-bindings/clock/qcom,sm6115-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6115-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 main div source
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm6115.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6115-gpucc";
+ reg = <0x05990000 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml
new file mode 100644
index 0000000..374a184
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6125-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6125
+
+maintainers:
+ - Konrad Dybcio <konrad.dybcio@linaro.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks and power domains on
+ Qualcomm SoCs.
+
+ See also:: include/dt-bindings/clock/qcom,sm6125-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6125-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+
+ '#clock-cells':
+ const: 1
+
+ '#power-domain-cells':
+ const: 1
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#clock-cells'
+ - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,gcc-sm6125.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6125-gpucc";
+ reg = <0x05990000 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>;
+ #clock-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml
new file mode 100644
index 0000000..b480ead
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm6375-gpucc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Graphics Clock & Reset Controller on SM6375
+
+maintainers:
+ - Konrad Dybcio <konrad.dybcio@linaro.org>
+
+description: |
+ Qualcomm graphics clock control module provides clocks, resets and power
+ domains on Qualcomm SoCs.
+
+ See also:: include/dt-bindings/clock/qcom,sm6375-gpucc.h
+
+properties:
+ compatible:
+ enum:
+ - qcom,sm6375-gpucc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: GPLL0 main branch source
+ - description: GPLL0 div branch source
+ - description: SNoC DVM GFX source
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,sm6375-gcc.h>
+ #include <dt-bindings/clock/qcom,rpmcc.h>
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ clock-controller@5990000 {
+ compatible = "qcom,sm6375-gpucc";
+ reg = <0 0x05990000 0 0x9000>;
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_CLK_SRC>,
+ <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>,
+ <&gcc GCC_GPU_SNOC_DVM_GFX_CLK>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml
new file mode 100644
index 0000000..0eb76d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sm7150-gcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller on SM7150
+
+maintainers:
+ - Bjorn Andersson <andersson@kernel.org>
+ - Danila Tikhonov <danila@jiaxyga.com>
+ - David Wronek <davidwronek@gmail.com>
+
+description: |
+ Qualcomm global clock control module provides the clocks, resets and power
+ domains on SM7150
+
+ See also:: include/dt-bindings/clock/qcom,sm7150-gcc.h
+
+properties:
+ compatible:
+ const: qcom,sm7150-gcc
+
+ clocks:
+ items:
+ - description: Board XO source
+ - description: Board XO Active-Only source
+ - description: Sleep clock source
+
+required:
+ - compatible
+ - clocks
+
+allOf:
+ - $ref: qcom,gcc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/qcom,rpmh.h>
+ clock-controller@100000 {
+ compatible = "qcom,sm7150-gcc";
+ reg = <0x00100000 0x001f0000>;
+ clocks = <&rpmhcc RPMH_CXO_CLK>,
+ <&rpmhcc RPMH_CXO_CLK_A>,
+ <&sleep_clk>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ #power-domain-cells = <1>;